From aa3ee206f8927ad25a85813cb2bbd2bd222c776a Mon Sep 17 00:00:00 2001 From: Gavin Huttley Date: Tue, 3 Sep 2024 08:17:23 +1000 Subject: [PATCH 1/2] DEV: renamed project to ensembl-tui because pypi disallowed ensembl_lite --- .github/workflows/release.yml | 6 +- .github/workflows/testing_develop.yml | 2 +- README.md | 44 +++++------ pyproject.toml | 17 ++--- src/{ensembl_lite => ensembl_tui}/__init__.py | 0 src/{ensembl_lite => ensembl_tui}/_align.py | 12 +-- src/{ensembl_lite => ensembl_tui}/_config.py | 13 ++-- .../_download.py | 10 +-- src/{ensembl_lite => ensembl_tui}/_emf.py | 6 +- .../_faster_fasta.py | 0 .../_ftp_download.py | 4 +- src/{ensembl_lite => ensembl_tui}/_genome.py | 74 +++++++++---------- .../_homology.py | 12 +-- src/{ensembl_lite => ensembl_tui}/_install.py | 21 +++--- src/{ensembl_lite => ensembl_tui}/_maf.py | 6 +- src/{ensembl_lite => ensembl_tui}/_name.py | 3 +- .../_site_map.py | 0 src/{ensembl_lite => ensembl_tui}/_species.py | 6 +- .../_storage_mixin.py | 2 +- src/{ensembl_lite => ensembl_tui}/_util.py | 14 ++-- src/{ensembl_lite => ensembl_tui}/cli.py | 22 +++--- .../data/__init__.py | 0 .../data/sample.cfg | 0 .../data/species.tsv | 0 tests/conftest.py | 5 +- tests/test_align.py | 25 ++++--- tests/test_cli.py | 7 +- tests/test_config.py | 7 +- tests/test_dbs.py | 11 +-- tests/test_emf.py | 5 +- tests/test_genome.py | 16 ++-- tests/test_homology.py | 11 +-- tests/test_installed.py | 7 +- tests/test_maf.py | 3 +- tests/test_name.py | 3 +- tests/test_site_map.py | 3 +- tests/test_species.py | 3 +- tests/test_util.py | 15 ++-- 38 files changed, 205 insertions(+), 190 deletions(-) rename src/{ensembl_lite => ensembl_tui}/__init__.py (100%) rename src/{ensembl_lite => ensembl_tui}/_align.py (98%) rename src/{ensembl_lite => ensembl_tui}/_config.py (96%) rename src/{ensembl_lite => ensembl_tui}/_download.py (97%) rename src/{ensembl_lite => ensembl_tui}/_emf.py (94%) rename src/{ensembl_lite => ensembl_tui}/_faster_fasta.py (100%) rename src/{ensembl_lite => ensembl_tui}/_ftp_download.py (97%) rename src/{ensembl_lite => ensembl_tui}/_genome.py (96%) rename src/{ensembl_lite => ensembl_tui}/_homology.py (98%) rename src/{ensembl_lite => ensembl_tui}/_install.py (93%) rename src/{ensembl_lite => ensembl_tui}/_maf.py (96%) rename src/{ensembl_lite => ensembl_tui}/_name.py (98%) rename src/{ensembl_lite => ensembl_tui}/_site_map.py (100%) rename src/{ensembl_lite => ensembl_tui}/_species.py (98%) rename src/{ensembl_lite => ensembl_tui}/_storage_mixin.py (99%) rename src/{ensembl_lite => ensembl_tui}/_util.py (97%) rename src/{ensembl_lite => ensembl_tui}/cli.py (97%) rename src/{ensembl_lite => ensembl_tui}/data/__init__.py (100%) rename src/{ensembl_lite => ensembl_tui}/data/sample.cfg (100%) rename src/{ensembl_lite => ensembl_tui}/data/species.tsv (100%) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 4bc7c9f..bb331b8 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -52,7 +52,7 @@ jobs: - name: Upload sdist and wheel uses: actions/upload-artifact@v4 with: - name: elt-wheel-sdist + name: eti-wheel-sdist path: | ./dist/*.whl ./dist/*.tar.gz @@ -69,7 +69,7 @@ jobs: - name: Download sdist and wheel uses: actions/download-artifact@v4 with: - name: elt-wheel-sdist + name: eti-wheel-sdist path: ./dist - name: Publish package distributions to Test PyPI @@ -89,7 +89,7 @@ jobs: - name: Download sdist and wheel uses: actions/download-artifact@v4 with: - name: elt-wheel-sdist + name: eti-wheel-sdist path: ./dist - name: Publish package distributions to PyPI diff --git a/.github/workflows/testing_develop.yml b/.github/workflows/testing_develop.yml index 41fa4fb..901d5c4 100644 --- a/.github/workflows/testing_develop.yml +++ b/.github/workflows/testing_develop.yml @@ -35,7 +35,7 @@ jobs: pip install --upgrade nox - name: "Run nox for ${{ matrix.python-version }}" - run: "nox -s test-${{ matrix.python-version }} -- --cov-report lcov:lcov-${{matrix.os}}-${{matrix.python-version}}.lcov --cov-report term --cov-append --cov ensembl_lite" + run: "nox -s test-${{ matrix.python-version }} -- --cov-report lcov:lcov-${{matrix.os}}-${{matrix.python-version}}.lcov --cov-report term --cov-append --cov ensembl_tui" - name: Coveralls Parallel uses: coverallsapp/github-action@v2 diff --git a/README.md b/README.md index 9068a18..e6f4f0d 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,12 @@ [![CodeQL](https://github.com/cogent3/EnsemblLite/actions/workflows/codeql.yml/badge.svg)](https://github.com/cogent3/EnsemblLite/actions/workflows/codeql.yml) [![Coverage Status](https://coveralls.io/repos/github/cogent3/EnsemblLite/badge.svg?branch=develop)](https://coveralls.io/github/cogent3/EnsemblLite?branch=develop) -# EnsemblLite +# ensembl-tui -EnsemblLite provides the `elt` command line application for obtaining a subset of the data provided by Ensembl which can then be queried locally. You can have multiple such subsets on your machine, each corresponding to a different selection of species and data types. +ensembl-tui provides the `eti` command line application for obtaining a subset of the data provided by Ensembl which can then be queried locally. You can have multiple such subsets on your machine, each corresponding to a different selection of species and data types. > **Warning** -> EnsemblLite is in a preliminary phase of development with a limited feature set and incomplete test coverage! Please validate results against the web version. If you discover errors, please post a [bug report](https://github.com/cogent3/EnsemblLite/issues). +> ensembl-tui is in a preliminary phase of development with a limited feature set and incomplete test coverage! Please validate results against the web version. If you discover errors, please post a [bug report](https://github.com/cogent3/EnsemblLite/issues). ## Installing the software @@ -47,17 +47,17 @@ Some commands can be run in parallel but have moderate memory requirements. If y ``` - Usage: elt exportrc [OPTIONS] + Usage: eti exportrc [OPTIONS] exports sample config and species table to the nominated path @@ -69,7 +69,7 @@ Some commands can be run in parallel but have moderate memory requirements. If y ```shell - $ elt exportrc -o ~/Desktop/Outbox/ensembl_download + $ eti exportrc -o ~/Desktop/Outbox/ensembl_download ``` This command creates a `ensembl_download` download directory and writes two plain text files into it: @@ -85,17 +85,17 @@ Some commands can be run in parallel but have moderate memory requirements. If y ``` - Usage: elt download [OPTIONS] + Usage: eti download [OPTIONS] download data from Ensembl's ftp site @@ -114,7 +114,7 @@ Some commands can be run in parallel but have moderate memory requirements. If y ```shell $ cd to/directory/with/config.cfg - $ elt download -c config.cfg + $ eti download -c config.cfg ``` > **Note** @@ -129,17 +129,17 @@ The download creates a new `.cfg` file inside the download directory. This file ``` - Usage: elt install [OPTIONS] + Usage: eti install [OPTIONS] create the local representations of the data @@ -158,7 +158,7 @@ The following command uses 2 CPUs and has been safe on systems with only 16GB of ```shell $ cd to/directory/with/downloaded_data -$ elt install -d downloaded_data -np 2 +$ eti install -d downloaded_data -np 2 ``` @@ -168,17 +168,17 @@ $ elt install -d downloaded_data -np 2 ``` - Usage: elt installed [OPTIONS] + Usage: eti installed [OPTIONS] show what is installed @@ -198,21 +198,21 @@ We provide a conventional command line interface for querying the data with subc
The full list of subcommands - You can get help on individual subcommands by running `elt ` in the terminal. + You can get help on individual subcommands by running `eti ` in the terminal. ``` - Usage: elt [OPTIONS] COMMAND [ARGS]... + Usage: eti [OPTIONS] COMMAND [ARGS]... Tools for obtaining and interrogating subsets of https://ensembl.org genomic data. diff --git a/pyproject.toml b/pyproject.toml index fd547e4..bd8df0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["flit_core >=3.2,<4"] build-backend = "flit_core.buildapi" [project] -name = "ensembl_lite" +name = "ensembl_tui" authors = [ { name = "Gavin Huttley", email = "Gavin.Huttley@anu.edu.au"}, ] @@ -45,7 +45,7 @@ Documentation = "https://github.com/cogent3/EnsemblLite" "Source Code" = "https://github.com/cogent3/EnsemblLite" [project.scripts] -elt = "ensembl_lite.cli:main" +eti = "ensembl_tui.cli:main" [project.optional-dependencies] test = [ @@ -53,7 +53,7 @@ test = [ "pytest", "pytest-cov", "pytest-xdist", - "ruff==0.5.7", + "ruff==0.6.3", "nox"] doc = ["click==8.1.3", "sphinx", @@ -85,7 +85,7 @@ dev = ["click", "pytest", "pytest-cov", "pytest-xdist", - "ruff==0.5.7", + "ruff==0.6.3", "scriv", "sphinx", "sphinx-autobuild", @@ -94,7 +94,7 @@ dev = ["click", "sphinxcontrib-bibtex"] [tool.flit.sdist] -include = ["src/*", "tests/", "pyproject.toml"] +include = ["src/*", "tests/*", "pyproject.toml"] [tool.pytest.ini_options] markers = [ @@ -107,7 +107,6 @@ testpaths = "tests" [tool.ruff] exclude = [ - ".bzr", ".direnv", ".eggs", ".git", @@ -127,10 +126,8 @@ exclude = [ ".vscode", "__pypackages__", "_build", - "buck-out", "build", "dist", - "node_modules", "site-packages", "venv", ] @@ -139,7 +136,7 @@ exclude = [ line-length = 88 indent-width = 4 -target-version = "py39" +target-version = "py310" [tool.ruff.lint] # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. @@ -196,7 +193,7 @@ docstring-code-line-length = "dynamic" format="md" categories=["Contributors", "ENH", "BUG", "DOC", "Deprecations", "Discontinued"] output_file="changelog.md" -version="literal: src/cogent3/__init__.py:__version__" +version="literal: src/ensembl_tui/__init__.py:__version__" skip_fragments="README.*" new_fragment_template="file: changelog.d/templates/new.md.j2" entry_title_template="file: changelog.d/templates/title.md.j2" diff --git a/src/ensembl_lite/__init__.py b/src/ensembl_tui/__init__.py similarity index 100% rename from src/ensembl_lite/__init__.py rename to src/ensembl_tui/__init__.py diff --git a/src/ensembl_lite/_align.py b/src/ensembl_tui/_align.py similarity index 98% rename from src/ensembl_lite/_align.py rename to src/ensembl_tui/_align.py index 8830b18..b07306b 100644 --- a/src/ensembl_lite/_align.py +++ b/src/ensembl_tui/_align.py @@ -9,9 +9,9 @@ from cogent3.core.alignment import Aligned, Alignment from cogent3.core.location import _DEFAULT_GAP_DTYPE, IndelMap -from ensembl_lite import _genome as elt_genome -from ensembl_lite import _storage_mixin as elt_mixin -from ensembl_lite import _util as elt_util +from ensembl_tui import _genome as elt_genome +from ensembl_tui import _storage_mixin as elt_mixin +from ensembl_tui import _util as elt_util _no_gaps = numpy.array([], dtype=_DEFAULT_GAP_DTYPE) @@ -81,7 +81,7 @@ class GapStore(elt_mixin.Hdf5Mixin): def __init__( self, source: elt_util.PathType, - align_name: typing.Optional[str] = None, + align_name: str | None = None, mode: str = "r", in_memory: bool = False, ): @@ -402,7 +402,7 @@ def _add_alignments(*alns, sep="?") -> Alignment: all_names.update(set(aln.names)) result = {n: [] for n in all_names} - for aln, default in zip(alns, defaults): + for aln, default in zip(alns, defaults, strict=False): data = aln.to_dict() for name in all_names: result[name].append(data.get(name, default)) @@ -423,7 +423,7 @@ def __init__( self, align_db: AlignDb, genomes: dict[str, elt_genome.Genome], - mask_features: typing.Optional[list[str]] = None, + mask_features: list[str] | None = None, sep: str = "?", ) -> None: self._align_db = align_db diff --git a/src/ensembl_lite/_config.py b/src/ensembl_tui/_config.py similarity index 96% rename from src/ensembl_lite/_config.py rename to src/ensembl_tui/_config.py index 8f1e854..4215f5a 100644 --- a/src/ensembl_lite/_config.py +++ b/src/ensembl_tui/_config.py @@ -1,14 +1,13 @@ import configparser import fnmatch import pathlib -import typing from collections.abc import Iterable from dataclasses import dataclass import click -from ensembl_lite import _species as elt_species -from ensembl_lite import _util as elt_util +from ensembl_tui import _species as elt_species +from ensembl_tui import _util as elt_util INSTALLED_CONFIG_NAME = "installed.cfg" DOWNLOADED_CONFIG_NAME = "downloaded.cfg" @@ -25,7 +24,9 @@ def make_relative_to( ) -> pathlib.Path: assert staging_path.is_absolute() and install_path.is_absolute() - for i, (s_part, i_part) in enumerate(zip(staging_path.parts, install_path.parts)): + for i, (s_part, i_part) in enumerate( + zip(staging_path.parts, install_path.parts, strict=False), + ): if s_part != i_part: break change_up = ("..",) * (len(staging_path.parts) - i) @@ -243,11 +244,11 @@ def _standardise_path( def read_config( config_path: pathlib.Path, - root_dir: typing.Optional[pathlib.Path] = None, + root_dir: pathlib.Path | None = None, ) -> Config: """returns ensembl release, local path, and db specifics from the provided config path""" - from ensembl_lite._download import download_ensembl_tree + from ensembl_tui._download import download_ensembl_tree if not config_path.exists(): click.secho(f"File not found {config_path.resolve()!s}", fg="red") diff --git a/src/ensembl_lite/_download.py b/src/ensembl_tui/_download.py similarity index 97% rename from src/ensembl_lite/_download.py rename to src/ensembl_tui/_download.py index 919b399..c7de359 100644 --- a/src/ensembl_lite/_download.py +++ b/src/ensembl_tui/_download.py @@ -6,11 +6,11 @@ from cogent3 import load_tree from rich.progress import Progress -from ensembl_lite import _config as elt_config -from ensembl_lite import _ftp_download as elt_ftp -from ensembl_lite import _site_map as elt_site_map -from ensembl_lite import _species as elt_species -from ensembl_lite import _util as elt_util +from ensembl_tui import _config as elt_config +from ensembl_tui import _ftp_download as elt_ftp +from ensembl_tui import _site_map as elt_site_map +from ensembl_tui import _species as elt_species +from ensembl_tui import _util as elt_util _cfg = elt_util.get_resource_path("sample.cfg") diff --git a/src/ensembl_lite/_emf.py b/src/ensembl_tui/_emf.py similarity index 94% rename from src/ensembl_lite/_emf.py rename to src/ensembl_tui/_emf.py index 13c3e40..25f92bc 100644 --- a/src/ensembl_lite/_emf.py +++ b/src/ensembl_tui/_emf.py @@ -5,8 +5,8 @@ from cogent3 import open_ -from ensembl_lite import _name as elt_name -from ensembl_lite import _util as elt_util +from ensembl_tui import _name as elt_name +from ensembl_tui import _util as elt_util # TODO spaces are optional between columns representing SEQ and SCORE lines @@ -28,7 +28,7 @@ def _get_block_seqnames(data) -> dict[str, str]: # they also include ancestral sequences, which exclude return { n: "".join(s) - for n, *s in zip(names, *seq_data) + for n, *s in zip(names, *seq_data, strict=False) if n.species != "ancestral_sequences" } diff --git a/src/ensembl_lite/_faster_fasta.py b/src/ensembl_tui/_faster_fasta.py similarity index 100% rename from src/ensembl_lite/_faster_fasta.py rename to src/ensembl_tui/_faster_fasta.py diff --git a/src/ensembl_lite/_ftp_download.py b/src/ensembl_tui/_ftp_download.py similarity index 97% rename from src/ensembl_lite/_ftp_download.py rename to src/ensembl_tui/_ftp_download.py index 57047c2..30a59e2 100644 --- a/src/ensembl_lite/_ftp_download.py +++ b/src/ensembl_tui/_ftp_download.py @@ -1,11 +1,11 @@ import pathlib +from collections.abc import Callable from ftplib import FTP -from typing import Callable from rich.progress import Progress, track from unsync import unsync -from ensembl_lite import _util as elt_util +from ensembl_tui import _util as elt_util def configured_ftp(host: str = "ftp.ensembl.org") -> FTP: diff --git a/src/ensembl_lite/_genome.py b/src/ensembl_tui/_genome.py similarity index 96% rename from src/ensembl_lite/_genome.py rename to src/ensembl_tui/_genome.py index d3e0ea3..2919d51 100644 --- a/src/ensembl_lite/_genome.py +++ b/src/ensembl_tui/_genome.py @@ -7,7 +7,7 @@ import sqlite3 import typing from abc import ABC, abstractmethod -from typing import Any, Optional +from typing import Any import click import h5py @@ -28,11 +28,11 @@ from cogent3.util.table import Table from numpy.typing import NDArray -from ensembl_lite import _config as elt_config -from ensembl_lite import _species as elt_species -from ensembl_lite import _storage_mixin as elt_mixin -from ensembl_lite import _util as elt_util -from ensembl_lite._faster_fasta import quicka_parser +from ensembl_tui import _config as elt_config +from ensembl_tui import _species as elt_species +from ensembl_tui import _storage_mixin as elt_mixin +from ensembl_tui import _util as elt_util +from ensembl_tui._faster_fasta import quicka_parser SEQ_STORE_NAME = "genome.seqs-hdf5_blosc2" ANNOT_STORE_NAME = "genome.annots-sqlitedb" @@ -84,7 +84,7 @@ class EnsemblGffRecord(GffRecord): __slots__ = GffRecord.__slots__ + ("feature_id", "_is_updated") - def __init__(self, feature_id: Optional[int] = None, **kwargs): + def __init__(self, feature_id: int | None = None, **kwargs): is_canonical = kwargs.pop("is_canonical", None) symbol = kwargs.pop("symbol", None) descr = kwargs.pop("description", None) @@ -288,7 +288,7 @@ class EnsemblGffDb(elt_mixin.SqliteDbMixin): def __init__( self, source: elt_util.PathType = ":memory:", - db: typing.Optional[DbTypes] = None, + db: DbTypes | None = None, ): self.source = source if isinstance(db, self.__class__): @@ -377,7 +377,7 @@ def _build_feature(self, kwargs) -> EnsemblGffRecord: def add_feature( self, *, - feature: typing.Optional[EnsemblGffRecord] = None, + feature: EnsemblGffRecord | None = None, **kwargs, ) -> None: """updates the feature_id attribute""" @@ -466,7 +466,7 @@ def get_features_matching( columns=columns, **query_args, ): - result = dict(zip(columns, result)) + result = dict(zip(columns, result, strict=False)) result["spans"] = [ tuple(c) for c in elt_mixin.blob_to_array(result["spans"]) ] @@ -477,7 +477,7 @@ def get_feature_children( *, name: str, **kwargs, - ) -> typing.List[FeatureDataType]: + ) -> list[FeatureDataType]: cols = "seqid", "biotype", "spans", "strand", "name" results = {} for result in self._get_records_matching( @@ -486,7 +486,7 @@ def get_feature_children( parent_stableid=name, **kwargs, ): - result = dict(zip(cols, result)) + result = dict(zip(cols, result, strict=False)) result["spans"] = [ tuple(c) for c in elt_mixin.blob_to_array(result["spans"]) ] @@ -498,7 +498,7 @@ def get_feature_parent( *, name: str, **kwargs, - ) -> typing.List[FeatureDataType]: + ) -> list[FeatureDataType]: cols = "seqid", "biotype", "spans", "strand", "name" results = {} for result in self._get_records_matching( @@ -506,7 +506,7 @@ def get_feature_parent( columns=cols, child_stableid=name, ): - result = dict(zip(cols, result)) + result = dict(zip(cols, result, strict=False)) result["spans"] = [ tuple(c) for c in elt_mixin.blob_to_array(result["spans"]) ] @@ -533,7 +533,7 @@ def get_records_matching( for result in self._execute_sql(sql, values=vals): if cols is None: cols = result.keys() - result = dict(zip(cols, result)) + result = dict(zip(cols, result, strict=False)) result["spans"] = [ tuple(c) for c in elt_mixin.blob_to_array(result["spans"]) ] @@ -576,7 +576,7 @@ def subset( for r in self._get_records_matching(table_name="gff", **kwargs): if cols is None: cols = r.keys() - r = dict(zip(cols, r)) + r = dict(zip(cols, r, strict=False)) feature_id = r.pop("feature_id") feature = EnsemblGffRecord(**r) feature_ids[feature_id] = feature @@ -707,7 +707,7 @@ class SeqsDataABC(ABC): species: str mode: str # as per standard file opening modes, r, w, a _is_open = False - _file: Optional[Any] = None + _file: Any | None = None @abstractmethod def __hash__(self): ... @@ -723,8 +723,8 @@ def get_seq_str( self, *, seqid: str, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ) -> str: ... @abstractmethod @@ -732,8 +732,8 @@ def get_seq_arr( self, *, seqid: str, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ) -> NDArray[numpy.uint8]: ... @abstractmethod @@ -794,7 +794,7 @@ class SeqsDataHdf5(elt_mixin.Hdf5Mixin, SeqsDataABC): def __init__( self, source: elt_util.PathType, - species: Optional[str] = None, + species: str | None = None, mode: str = "r", in_memory: bool = False, ): @@ -870,8 +870,8 @@ def get_seq_str( self, *, seqid: str, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ) -> str: return self._arr2str(self.get_seq_arr(seqid=seqid, start=start, stop=stop)) @@ -879,8 +879,8 @@ def get_seq_arr( self, *, seqid: str, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, ) -> NDArray[numpy.uint8]: if not self._is_open: raise OSError(f"{self.source.name!r} is closed") @@ -937,8 +937,8 @@ def get_seq( self, *, seqid: str, - start: Optional[int] = None, - stop: Optional[int] = None, + start: int | None = None, + stop: int | None = None, namer: typing.Callable | None = None, with_annotations: bool = True, ) -> str: @@ -1055,7 +1055,7 @@ def get_seqs_for_ids( config: elt_config.InstalledConfig, species: str, names: list[str], - make_seq_name: typing.Optional[typing.Callable] = None, + make_seq_name: typing.Callable | None = None, ) -> typing.Iterable[Sequence]: genome = load_genome(config=config, species=species) # is it possible to do batch query for all names? @@ -1092,7 +1092,7 @@ def load_annotations_for_species(*, path: pathlib.Path) -> EnsemblGffDb: def _get_all_gene_segments( *, annot_db: EnsemblGffDb, - limit: Optional[int], + limit: int | None, ) -> list[dict]: result = [] for i, record in enumerate(annot_db.get_records_matching(biotype="gene")): @@ -1105,7 +1105,7 @@ def _get_all_gene_segments( def _get_selected_gene_segments( *, annot_db: EnsemblGffDb, - limit: Optional[int], + limit: int | None, stableids: list[str], ) -> list[dict]: result = [] @@ -1120,9 +1120,9 @@ def _get_selected_gene_segments( def get_gene_segments( *, annot_db: EnsemblGffDb, - limit: Optional[int] = None, - species: Optional[str] = None, - stableids: Optional[list[str]] = None, + limit: int | None = None, + species: str | None = None, + stableids: list[str] | None = None, ) -> list[genome_segment]: """return genome segment information for genes @@ -1159,8 +1159,8 @@ def get_gene_segments( def get_gene_table_for_species( *, annot_db: EnsemblGffDb, - limit: Optional[int], - species: Optional[str] = None, + limit: int | None, + species: str | None = None, ) -> Table: """ returns gene data from a GffDb @@ -1202,7 +1202,7 @@ def get_gene_table_for_species( def get_species_summary( *, annot_db: EnsemblGffDb, - species: Optional[str] = None, + species: str | None = None, ) -> Table: """ returns the Table summarising data for species_name diff --git a/src/ensembl_lite/_homology.py b/src/ensembl_tui/_homology.py similarity index 98% rename from src/ensembl_lite/_homology.py rename to src/ensembl_tui/_homology.py index 98ccf59..525801b 100644 --- a/src/ensembl_lite/_homology.py +++ b/src/ensembl_tui/_homology.py @@ -13,9 +13,9 @@ from cogent3.parse.table import FilteringParser from cogent3.util.io import PathType, iter_splitlines -from ensembl_lite import _config as elt_config -from ensembl_lite import _genome as elt_genome -from ensembl_lite import _storage_mixin as elt_mixin +from ensembl_tui import _config as elt_config +from ensembl_tui import _genome as elt_genome +from ensembl_tui import _storage_mixin as elt_mixin HOMOLOGY_STORE_NAME = "homologies.homology-sqlitedb" @@ -31,7 +31,7 @@ class species_genes: """contains gene IDs for species""" species: str - gene_ids: typing.Optional[list[str]] = None + gene_ids: list[str] | None = None def __hash__(self): return hash(self.species) @@ -57,7 +57,7 @@ class homolog_group: relationship: str # gene id -> species - gene_ids: typing.Optional[dict[str, str]] = None + gene_ids: dict[str, str] | None = None source: str | None = None def __post_init__(self): @@ -490,7 +490,7 @@ class collect_seqs: def __init__( self, config: elt_config.InstalledConfig, - make_seq_name: typing.Optional[typing.Callable] = None, + make_seq_name: typing.Callable | None = None, verbose: bool = False, ): self._config = config diff --git a/src/ensembl_lite/_install.py b/src/ensembl_tui/_install.py similarity index 93% rename from src/ensembl_lite/_install.py rename to src/ensembl_tui/_install.py index bea62d6..57ed6e9 100644 --- a/src/ensembl_lite/_install.py +++ b/src/ensembl_tui/_install.py @@ -1,17 +1,16 @@ from __future__ import annotations import shutil -import typing from rich.progress import Progress -from ensembl_lite import _align as elt_align -from ensembl_lite import _config as elt_config -from ensembl_lite import _genome as elt_genome -from ensembl_lite import _homology as elt_homology -from ensembl_lite import _maf as elt_maf -from ensembl_lite import _species as elt_species -from ensembl_lite import _util as elt_util +from ensembl_tui import _align as elt_align +from ensembl_tui import _config as elt_config +from ensembl_tui import _genome as elt_genome +from ensembl_tui import _homology as elt_homology +from ensembl_tui import _maf as elt_maf +from ensembl_tui import _species as elt_species +from ensembl_tui import _util as elt_util def _make_src_dest_annotation_paths( @@ -29,7 +28,7 @@ def local_install_genomes( force_overwrite: bool, max_workers: int | None, verbose: bool = False, - progress: typing.Optional[Progress] = None, + progress: Progress | None = None, ): if force_overwrite: shutil.rmtree(config.install_genomes, ignore_errors=True) @@ -107,7 +106,7 @@ def local_install_alignments( force_overwrite: bool, max_workers: int | None, verbose: bool = False, - progress: typing.Optional[Progress] = None, + progress: Progress | None = None, ): if force_overwrite: shutil.rmtree(config.install_aligns, ignore_errors=True) @@ -164,7 +163,7 @@ def local_install_homology( force_overwrite: bool, max_workers: int | None, verbose: bool = False, - progress: typing.Optional[Progress] = None, + progress: Progress | None = None, ): if force_overwrite: shutil.rmtree(config.install_homologies, ignore_errors=True) diff --git a/src/ensembl_lite/_maf.py b/src/ensembl_tui/_maf.py similarity index 96% rename from src/ensembl_lite/_maf.py rename to src/ensembl_tui/_maf.py index cfef268..c3c77b6 100644 --- a/src/ensembl_lite/_maf.py +++ b/src/ensembl_tui/_maf.py @@ -9,9 +9,9 @@ from cogent3.app.composable import LOADER, define_app from cogent3.app.typing import IdentifierType -from ensembl_lite import _align as elt_align -from ensembl_lite import _name as elt_name -from ensembl_lite import _util as elt_util +from ensembl_tui import _align as elt_align +from ensembl_tui import _name as elt_name +from ensembl_tui import _util as elt_util _id_pattern = re.compile(r"(?<=id[:])\s*\d+") diff --git a/src/ensembl_lite/_name.py b/src/ensembl_tui/_name.py similarity index 98% rename from src/ensembl_lite/_name.py rename to src/ensembl_tui/_name.py index 4c4ff2c..8cfcca9 100644 --- a/src/ensembl_lite/_name.py +++ b/src/ensembl_tui/_name.py @@ -1,7 +1,6 @@ from __future__ import annotations import re -import typing from dataclasses import dataclass from ._species import Species @@ -137,7 +136,7 @@ class MafName: start: int stop: int strand: str - coord_length: typing.Optional[str | int] + coord_length: str | int | None def __post_init__(self): # adjust the lengths to be ints diff --git a/src/ensembl_lite/_site_map.py b/src/ensembl_tui/_site_map.py similarity index 100% rename from src/ensembl_lite/_site_map.py rename to src/ensembl_tui/_site_map.py diff --git a/src/ensembl_lite/_species.py b/src/ensembl_tui/_species.py similarity index 98% rename from src/ensembl_lite/_species.py rename to src/ensembl_tui/_species.py index dd301b5..4a09033 100644 --- a/src/ensembl_lite/_species.py +++ b/src/ensembl_tui/_species.py @@ -6,7 +6,7 @@ from cogent3.core.tree import TreeNode from cogent3.util.table import Table -from ensembl_lite import _util as elt_util +from ensembl_tui import _util as elt_util SPECIES_NAME = "species.tsv" StrOrNone = typing.Union[str, type(None)] @@ -79,7 +79,7 @@ def get_common_name(self, name: str, level="raise") -> StrOrNone: msg = f"Unknown species name: {name}" if level == "raise": raise ValueError(msg) - elif level == "warn": + if level == "warn": print(f"WARN: {msg}") return common_name @@ -99,7 +99,7 @@ def get_species_name(self, name: str, level="ignore") -> StrOrNone: msg = f"Unknown common name: {name}" if level == "raise": raise ValueError(msg) - elif level == "warn": + if level == "warn": print(f"WARN: {msg}") return species_name diff --git a/src/ensembl_lite/_storage_mixin.py b/src/ensembl_tui/_storage_mixin.py similarity index 99% rename from src/ensembl_lite/_storage_mixin.py rename to src/ensembl_tui/_storage_mixin.py index ddbeb97..5d67439 100644 --- a/src/ensembl_lite/_storage_mixin.py +++ b/src/ensembl_tui/_storage_mixin.py @@ -7,7 +7,7 @@ import numpy -from ensembl_lite import _util as elt_util +from ensembl_tui import _util as elt_util ReturnType = tuple[str, tuple] # the sql statement and corresponding values diff --git a/src/ensembl_lite/_util.py b/src/ensembl_tui/_util.py similarity index 97% rename from src/ensembl_lite/_util.py rename to src/ensembl_tui/_util.py index e8a1870..82b688e 100644 --- a/src/ensembl_lite/_util.py +++ b/src/ensembl_tui/_util.py @@ -9,9 +9,10 @@ import sys import typing import uuid +from collections.abc import Callable from hashlib import md5 from tempfile import mkdtemp -from typing import IO, Callable, Union +from typing import IO, Union import blosc2 import hdf5plugin @@ -59,7 +60,7 @@ def _get_resource_dir() -> PathType: if "ENSEMBLDBRC" in os.environ: path = os.environ["ENSEMBLDBRC"] else: - from ensembl_lite import data + from ensembl_tui import data path = pathlib.Path(data.__file__).parent @@ -306,7 +307,7 @@ def rich_display(c3t, title_justify="left"): j = "right" if numeric_type else "left" rich_table.add_column(col, justify=j, no_wrap=numeric_type) - for row in zip(*columns): + for row in zip(*columns, strict=False): rich_table.add_row(*row) console = Console() @@ -323,7 +324,7 @@ def _name_parts(path: str) -> list[str]: def _simple_check(align_parts: str, tree_parts: str) -> int: """evaluates whether the start of the two paths match""" matches = 0 - for a, b in zip(align_parts, tree_parts): + for a, b in zip(align_parts, tree_parts, strict=False): if a != b: break matches += 1 @@ -413,13 +414,12 @@ def get_iterable_tasks( *, func: typing.Callable, series: typing.Sequence, - max_workers: typing.Optional[int], + max_workers: int | None, **kwargs, ) -> typing.Iterator: if max_workers == 1: return map(func, series) - else: - return as_completed(func, series, max_workers=max_workers, **kwargs) + return as_completed(func, series, max_workers=max_workers, **kwargs) # From http://mart.ensembl.org/info/genome/stable_ids/prefixes.html diff --git a/src/ensembl_lite/cli.py b/src/ensembl_tui/cli.py similarity index 97% rename from src/ensembl_lite/cli.py rename to src/ensembl_tui/cli.py index 1e834f4..89f0c15 100644 --- a/src/ensembl_lite/cli.py +++ b/src/ensembl_tui/cli.py @@ -8,16 +8,16 @@ try: from wakepy.keep import running as keep_running except ImportError: - from ensembl_lite._util import fake_wake as keep_running + from ensembl_tui._util import fake_wake as keep_running from trogon import tui -from ensembl_lite import __version__ -from ensembl_lite import _config as elt_config -from ensembl_lite import _download as elt_download -from ensembl_lite import _genome as elt_genome -from ensembl_lite import _species as elt_species -from ensembl_lite import _util as elt_util +from ensembl_tui import __version__ +from ensembl_tui import _config as elt_config +from ensembl_tui import _download as elt_download +from ensembl_tui import _genome as elt_genome +from ensembl_tui import _species as elt_species +from ensembl_tui import _util as elt_util try: # trap flaky behaviour on linux @@ -25,7 +25,7 @@ ... except NotImplementedError: - from ensembl_lite._util import fake_wake as keep_running + from ensembl_tui._util import fake_wake as keep_running def _get_installed_config_path(ctx, param, path) -> elt_util.PathType: @@ -259,7 +259,7 @@ def install(download, num_procs, force_overwrite, verbose): """create the local representations of the data""" from rich import progress - from ensembl_lite._install import ( + from ensembl_tui._install import ( local_install_alignments, local_install_genomes, local_install_homology, @@ -399,7 +399,7 @@ def alignments( from cogent3 import load_table from rich import progress - from ensembl_lite import _align as elt_align + from ensembl_tui import _align as elt_align # TODO support genomic coordinates, e.g. coord_name:start-stop, for # a reference species @@ -525,7 +525,7 @@ def homologs( """exports CDS sequence data in fasta format for homology type relationship""" from rich import progress - from ensembl_lite import _homology as elt_homology + from ensembl_tui import _homology as elt_homology LOGGER = CachingLogger() LOGGER.log_args() diff --git a/src/ensembl_lite/data/__init__.py b/src/ensembl_tui/data/__init__.py similarity index 100% rename from src/ensembl_lite/data/__init__.py rename to src/ensembl_tui/data/__init__.py diff --git a/src/ensembl_lite/data/sample.cfg b/src/ensembl_tui/data/sample.cfg similarity index 100% rename from src/ensembl_lite/data/sample.cfg rename to src/ensembl_tui/data/sample.cfg diff --git a/src/ensembl_lite/data/species.tsv b/src/ensembl_tui/data/species.tsv similarity index 100% rename from src/ensembl_lite/data/species.tsv rename to src/ensembl_tui/data/species.tsv diff --git a/tests/conftest.py b/tests/conftest.py index afd0fc1..e4a2590 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,7 +2,8 @@ from configparser import ConfigParser import pytest -from ensembl_lite._util import get_resource_path + +from ensembl_tui._util import get_resource_path @pytest.fixture(scope="session") @@ -36,6 +37,6 @@ def name_as_seqid(species, seqid, start, end): return seqid -@pytest.fixture() +@pytest.fixture def namer(): return name_as_seqid diff --git a/tests/test_align.py b/tests/test_align.py index d40bdab..0c7fdd7 100644 --- a/tests/test_align.py +++ b/tests/test_align.py @@ -1,7 +1,8 @@ import numpy import pytest -from ensembl_lite import _align as elt_align -from ensembl_lite import _genome as elt_genome + +from ensembl_tui import _align as elt_align +from ensembl_tui import _genome as elt_genome def small_seqs(): @@ -65,7 +66,7 @@ def make_records(start, end, block_id): return records -@pytest.fixture() +@pytest.fixture def small_records(): records = make_records(1, 5, 0) return records @@ -110,7 +111,7 @@ def _get_expected_seqindex(data: str, align_index: int) -> int: # fixture to make synthetic GenomeSeqsDb and alignment db # based on a given alignment -@pytest.fixture() +@pytest.fixture def genomedbs_aligndb(small_records): align_db = elt_align.AlignDb(source=":memory:") align_db.add_records(records=small_records) @@ -330,7 +331,9 @@ def test_select_alignment_minus_strand(start_end, namer): ), ) def test_get_alignment_features(coord): - kwargs = dict(zip(("ref_species", "seqid", "ref_start", "ref_end"), coord)) + kwargs = dict( + zip(("ref_species", "seqid", "ref_start", "ref_end"), coord, strict=False), + ) genomes, align_db = make_sample(two_aligns=False) got = list(elt_align.get_alignment(align_db=align_db, genomes=genomes, **kwargs))[0] assert len(got.annotation_db) == 1 @@ -346,7 +349,9 @@ def test_get_alignment_features(coord): ), ) def test_get_alignment_masked_features(coord): - kwargs = dict(zip(("ref_species", "seqid", "ref_start", "ref_end"), coord)) + kwargs = dict( + zip(("ref_species", "seqid", "ref_start", "ref_end"), coord, strict=False), + ) kwargs["mask_features"] = ["gene"] genomes, align_db = make_sample(two_aligns=False) got = list(elt_align.get_alignment(align_db=align_db, genomes=genomes, **kwargs))[0] @@ -363,7 +368,7 @@ def test_get_alignment_masked_features(coord): ), ) def test_align_db_get_records(coord): - kwargs = dict(zip(("species", "seqid", "start", "stop"), coord)) + kwargs = dict(zip(("species", "seqid", "start", "stop"), coord, strict=False)) # records are, we should get a single hit from each query # [('blah', 0, 'human', 's1', 1, 12, '+', array([], dtype=int32)), _, align_db = make_sample(two_aligns=True) @@ -380,7 +385,7 @@ def test_align_db_get_records(coord): ), ) def test_align_db_get_records_required_only(coord): - kwargs = dict(zip(("species", "seqid"), coord)) + kwargs = dict(zip(("species", "seqid"), coord, strict=False)) # two hits for each species _, align_db = make_sample(two_aligns=True) got = list(align_db.get_records_matching(**kwargs)) @@ -396,7 +401,7 @@ def test_align_db_get_records_required_only(coord): ), ) def test_align_db_get_records_no_matches(coord): - kwargs = dict(zip(("species", "seqid"), coord)) + kwargs = dict(zip(("species", "seqid"), coord, strict=False)) # no hits at all _, align_db = make_sample() got = list(align_db.get_records_matching(**kwargs)) @@ -469,7 +474,7 @@ def test_gapstore_add_invalid_duplicate(): gap_store.add_record(index=20, gaps=a[:1]) -@pytest.fixture() +@pytest.fixture def small_db(small_records): import copy diff --git a/tests/test_cli.py b/tests/test_cli.py index 6a1e024..a7143b1 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -4,14 +4,15 @@ import pytest from click.testing import CliRunner -from ensembl_lite import cli as elt_cli + +from ensembl_tui import cli as elt_cli if sys.platform.startswith("linux"): pytest.skip("skipping cli on linux due to wakepy", allow_module_level=True) -@pytest.mark.slow() -@pytest.mark.internet() +@pytest.mark.slow +@pytest.mark.internet def test_download(tmp_config): """runs download, install, drop according to a special test cfg""" tmp_dir = tmp_config.parent diff --git a/tests/test_config.py b/tests/test_config.py index 923cd3d..808362e 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,8 +1,9 @@ import pathlib import pytest -from ensembl_lite import _align as elt_align -from ensembl_lite import _config as elt_config + +from ensembl_tui import _align as elt_align +from ensembl_tui import _config as elt_config def test_installed_genome(): @@ -36,7 +37,7 @@ def test_installed_config_hash(): assert len(v) == 1 -@pytest.fixture() +@pytest.fixture def installed_aligns(tmp_path): align_dir = tmp_path / elt_config._COMPARA_NAME / elt_config._ALIGNS_NAME align_dir.mkdir(parents=True, exist_ok=True) diff --git a/tests/test_dbs.py b/tests/test_dbs.py index 587191f..bf5a10c 100644 --- a/tests/test_dbs.py +++ b/tests/test_dbs.py @@ -3,10 +3,11 @@ import numpy import pytest from cogent3 import load_table -from ensembl_lite import _align as elt_align -from ensembl_lite import _homology as elt_homology -from ensembl_lite import _maf as elt_maf -from ensembl_lite import _storage_mixin as elt_mixin + +from ensembl_tui import _align as elt_align +from ensembl_tui import _homology as elt_homology +from ensembl_tui import _maf as elt_maf +from ensembl_tui import _storage_mixin as elt_mixin @pytest.fixture(scope="function") @@ -59,7 +60,7 @@ def test_db_align_repr(db_align, func): func(db_align) -@pytest.fixture() +@pytest.fixture def hom_dir(DATA_DIR, tmp_path): path = DATA_DIR / "small_protein_homologies.tsv.gz" table = load_table(path) diff --git a/tests/test_emf.py b/tests/test_emf.py index 7dd5cbe..9f25823 100644 --- a/tests/test_emf.py +++ b/tests/test_emf.py @@ -1,6 +1,7 @@ import pytest -from ensembl_lite import _emf as elt_emf -from ensembl_lite import _name as elt_name + +from ensembl_tui import _emf as elt_emf +from ensembl_tui import _name as elt_name def test_load(DATA_DIR): diff --git a/tests/test_genome.py b/tests/test_genome.py index 5eceb14..046a160 100644 --- a/tests/test_genome.py +++ b/tests/test_genome.py @@ -3,10 +3,11 @@ import numpy import pytest from cogent3 import make_unaligned_seqs -from ensembl_lite import _genome as elt_genome -from ensembl_lite import _storage_mixin as elt_mixin from numpy.testing import assert_allclose +from ensembl_tui import _genome as elt_genome +from ensembl_tui import _storage_mixin as elt_mixin + @pytest.fixture(scope="function") def small_data(): @@ -83,7 +84,7 @@ def h5_genome(tmp_path): ) -@pytest.fixture() +@pytest.fixture def small_h5_genome(small_data, h5_genome): # in memory db h5_genome.add_records(records=small_data.items()) @@ -436,7 +437,7 @@ def test_gff_record_to_record_selected_fields(exclude_null): assert got == expect -@pytest.fixture() +@pytest.fixture def ensembl_gff_records(DATA_DIR): records, _ = elt_genome.custom_gff_parser( DATA_DIR / "c_elegans_WS199_shortened.gff3", @@ -445,12 +446,12 @@ def ensembl_gff_records(DATA_DIR): return records -@pytest.fixture() +@pytest.fixture def non_canonical_related(ensembl_gff_records): return elt_genome.make_gene_relationships(ensembl_gff_records.values()) -@pytest.fixture() +@pytest.fixture def canonical_related(ensembl_gff_records): transcript = ensembl_gff_records["transcript:B0019.1"] transcript.attrs = f"Ensembl_canonical;{transcript.attrs}" @@ -577,7 +578,8 @@ def fasta_data(DATA_DIR, tmp_path, request): def test_faster_fasta(fasta_data): from cogent3.parse.fasta import MinimalFastaParser - from ensembl_lite._faster_fasta import bytes_to_array, quicka_parser + + from ensembl_tui._faster_fasta import bytes_to_array, quicka_parser expect = { n: bytes_to_array(s.encode("utf8")) for n, s in MinimalFastaParser(fasta_data) diff --git a/tests/test_homology.py b/tests/test_homology.py index fbf924c..321ced8 100644 --- a/tests/test_homology.py +++ b/tests/test_homology.py @@ -1,6 +1,7 @@ import pytest from cogent3 import load_table -from ensembl_lite import _homology as elt_homology + +from ensembl_tui import _homology as elt_homology def _make_expected_o2o(table): @@ -16,7 +17,7 @@ def _make_expected_o2o(table): return result -@pytest.fixture() +@pytest.fixture def o2o_db(DATA_DIR, tmp_dir): raw = DATA_DIR / "one2one_homologies.tsv" @@ -62,7 +63,7 @@ def test_hdb(o2o_db, gene_id): assert got.gene_ids.keys() == expect[gene_id] -@pytest.fixture() +@pytest.fixture def orth_records(): return [ ("ortholog_one2one", {"1": "sp1", "2": "sp2"}), # grp 1 @@ -71,7 +72,7 @@ def orth_records(): ] -@pytest.fixture() +@pytest.fixture def hom_records(orth_records): return orth_records + [("ortholog_one2many", {"6": "sp2", "7": "sp3"})] @@ -82,7 +83,7 @@ def test_hdb_get_related_groups(o2o_db): assert len(got) == 5 -@pytest.fixture() +@pytest.fixture def hom_hdb(hom_records): groups = elt_homology.grouped_related(hom_records) hdb = elt_homology.HomologyDb(source=":memory:") diff --git a/tests/test_installed.py b/tests/test_installed.py index e2b3755..8d3b346 100644 --- a/tests/test_installed.py +++ b/tests/test_installed.py @@ -1,11 +1,12 @@ # this will be used to test integrated features import pytest from cogent3 import load_seq -from ensembl_lite import _config as elt_config -from ensembl_lite import _genome as elt_genome +from ensembl_tui import _config as elt_config +from ensembl_tui import _genome as elt_genome -@pytest.fixture() + +@pytest.fixture def one_genome(DATA_DIR, tmp_dir): cfg = elt_config.InstalledConfig(release="110", install_path=tmp_dir) # we're only making a genomes directory diff --git a/tests/test_maf.py b/tests/test_maf.py index 5c69806..d960f91 100644 --- a/tests/test_maf.py +++ b/tests/test_maf.py @@ -1,5 +1,6 @@ import pytest -from ensembl_lite import _maf as elt_maf + +from ensembl_tui import _maf as elt_maf def test_read(DATA_DIR): diff --git a/tests/test_name.py b/tests/test_name.py index b57dc89..19ef0f3 100644 --- a/tests/test_name.py +++ b/tests/test_name.py @@ -1,5 +1,6 @@ import pytest -from ensembl_lite._name import EnsemblDbName + +from ensembl_tui._name import EnsemblDbName def test_cmp_name(): diff --git a/tests/test_site_map.py b/tests/test_site_map.py index 0586684..535a699 100644 --- a/tests/test_site_map.py +++ b/tests/test_site_map.py @@ -1,5 +1,6 @@ import pytest -from ensembl_lite._site_map import get_site_map + +from ensembl_tui._site_map import get_site_map @pytest.mark.parametrize("site", ("ftp.ensembl.org",)) diff --git a/tests/test_species.py b/tests/test_species.py index 3e18078..8d8bfda 100644 --- a/tests/test_species.py +++ b/tests/test_species.py @@ -2,7 +2,8 @@ import pytest from cogent3.util.table import Table -from ensembl_lite._species import Species + +from ensembl_tui._species import Species class TestSpeciesNamemaps(TestCase): diff --git a/tests/test_util.py b/tests/test_util.py index 365c8ce..7a289fa 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -2,8 +2,9 @@ from random import shuffle import pytest -from ensembl_lite import _config as elt_config -from ensembl_lite import _util as elt_util + +from ensembl_tui import _config as elt_config +from ensembl_tui import _util as elt_util @pytest.fixture(scope="function") @@ -70,7 +71,7 @@ def test_parse_config_gorilla(gorilla_cfg): ), ) def test_invalid_seq(name): - from ensembl_lite._download import valid_seq_file + from ensembl_tui._download import valid_seq_file assert not valid_seq_file(name) @@ -86,7 +87,7 @@ def test_invalid_seq(name): ), ) def test_valid_seq(name): - from ensembl_lite._download import valid_seq_file + from ensembl_tui._download import valid_seq_file assert valid_seq_file(name) @@ -106,7 +107,7 @@ def just_compara_cfg(tmp_config): return tmp_config -@pytest.mark.internet() +@pytest.mark.internet def test_just_compara(just_compara_cfg): # get species names from the alignment ref tree cfg = elt_config.read_config(just_compara_cfg) @@ -137,7 +138,7 @@ def test_match_align_tree(tmp_config): "pub/release-110/maf/ensembl-compara/multiple_alignments/65_amniotes.pecan", ] - expect = dict(zip(aligns, trees)) + expect = dict(zip(aligns, trees, strict=False)) shuffle(aligns) result = elt_util.trees_for_aligns(aligns, trees) assert result == expect @@ -173,7 +174,7 @@ def test_config_update_species(tmp_config): assert set(config.db_names) == {"homo_sapiens", "saccharomyces_cerevisiae"} -@pytest.mark.internet() +@pytest.mark.internet def test_cfg_to_dict(just_compara_cfg): cfg = elt_config.read_config(just_compara_cfg) data = cfg.to_dict() From c8ffdf15424bd6d749cb77f6a2855a9d6c599398 Mon Sep 17 00:00:00 2001 From: Gavin Huttley Date: Tue, 3 Sep 2024 08:32:40 +1000 Subject: [PATCH 2/2] DEV: update docs and toml to renamed github repo --- README.md | 30 +++++++++++++++++------------- pyproject.toml | 6 +++--- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index e6f4f0d..6dadd5f 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,26 @@ -[![CI](https://github.com/cogent3/EnsemblLite/actions/workflows/testing_develop.yml/badge.svg)](https://github.com/cogent3/EnsemblLite/actions/workflows/testing_develop.yml) -[![CodeQL](https://github.com/cogent3/EnsemblLite/actions/workflows/codeql.yml/badge.svg)](https://github.com/cogent3/EnsemblLite/actions/workflows/codeql.yml) -[![Coverage Status](https://coveralls.io/repos/github/cogent3/EnsemblLite/badge.svg?branch=develop)](https://coveralls.io/github/cogent3/EnsemblLite?branch=develop) +[![CI](https://github.com/cogent3/ensembl_tui/actions/workflows/testing_develop.yml/badge.svg)](https://github.com/cogent3/ensembl_tui/actions/workflows/testing_develop.yml) +[![CodeQL](https://github.com/cogent3/ensembl_tui/actions/workflows/codeql.yml/badge.svg)](https://github.com/cogent3/ensembl_tui/actions/workflows/codeql.yml) +[![Coverage Status](https://coveralls.io/repos/github/cogent3/ensembl_tui/badge.svg?branch=develop)](https://coveralls.io/github/cogent3/ensembl_tui?branch=develop) +[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) # ensembl-tui -ensembl-tui provides the `eti` command line application for obtaining a subset of the data provided by Ensembl which can then be queried locally. You can have multiple such subsets on your machine, each corresponding to a different selection of species and data types. +ensembl-tui provides the `eti` terminal application for obtaining a subset of the data provided by Ensembl which can then be queried locally. You can have multiple such subsets on your machine, each corresponding to a different selection of species and data types. > **Warning** -> ensembl-tui is in a preliminary phase of development with a limited feature set and incomplete test coverage! Please validate results against the web version. If you discover errors, please post a [bug report](https://github.com/cogent3/EnsemblLite/issues). +> ensembl-tui is in a preliminary phase of development with a limited feature set and incomplete test coverage! Please validate results against the web version. If you discover errors, please post a [bug report](https://github.com/cogent3/ensembl_tui/issues). ## Installing the software +
+ General user installation instructions + + ``` + $ pip install ensembl-tui + ``` + +
+
Developer installation instructions Fork the repo and clone your fork to your local machine. In the terminal, create either a python virtual environment or a new conda environment and activate it. In that virtual environment @@ -26,17 +36,11 @@ ensembl-tui provides the `eti` command line application for obtaining a subset o ```
-
- General user installation instructions - - We have not yet released on pypi. We will provide instructions here for a Docker based installation soon! -
- ## Resources required to subset Ensembl data -Ensembl hosts some very large data sets. You need to have a machine with sufficient disk space to store the data you want to download. At present we do not have support for predicting how much storage would be required for a given selection of species and data types. We advise you to experiment. +Ensembl hosts some very large data sets. You need to have a machine with sufficient disk space to store the data you want to download. At present we do not have support for predicting how much storage would be required for a given selection of species and data types. You will need to experiment. -Some commands can be run in parallel but have moderate memory requirements. If you have a machine with limited RAM, you may need to reduce the number of parallel processes. Again, we advise you to experiment. +Some commands can be run in parallel but have moderate memory requirements. If you have a machine with limited RAM, you may need to reduce the number of parallel processes. Again, run some experiments. ## Getting setup diff --git a/pyproject.toml b/pyproject.toml index bd8df0d..109f4bf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,9 +40,9 @@ classifiers = [ dynamic = ["version", "description"] [project.urls] -Documentation = "https://github.com/cogent3/EnsemblLite" -"Bug Tracker" = "https://github.com/cogent3/EnsemblLite/issues" -"Source Code" = "https://github.com/cogent3/EnsemblLite" +Documentation = "https://github.com/cogent3/ensembl_tui" +"Bug Tracker" = "https://github.com/cogent3/ensembl_tui/issues" +"Source Code" = "https://github.com/cogent3/ensembl_tui" [project.scripts] eti = "ensembl_tui.cli:main"