From 75741bffe89b92cde9b84f3f3c2426dc3e0e3954 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Wed, 29 May 2024 13:05:15 -0700 Subject: [PATCH] Wrapper to upload a reference and fix bugs in search (#6) --- src/celldex/__init__.py | 1 + src/celldex/search_references.py | 12 ++--- src/celldex/upload_reference.py | 88 ++++++++++++++++++++++++++++++++ tests/test_search_refs.py | 6 +-- 4 files changed, 95 insertions(+), 12 deletions(-) create mode 100644 src/celldex/upload_reference.py diff --git a/src/celldex/__init__.py b/src/celldex/__init__.py index 2ac565f..8cf719e 100644 --- a/src/celldex/__init__.py +++ b/src/celldex/__init__.py @@ -20,3 +20,4 @@ from .list_versions import fetch_latest_version, list_versions from .save_reference import save_reference from .search_references import search_references +from .upload_reference import upload_reference diff --git a/src/celldex/search_references.py b/src/celldex/search_references.py index c87634b..5e4ca90 100644 --- a/src/celldex/search_references.py +++ b/src/celldex/search_references.py @@ -48,18 +48,12 @@ def search_references( .. code-block:: python - res = search_references("brain") + res = search_references("human") - res = search_references(define_text_query("Neuro%", partial=True") + res = search_references(define_text_query("Immun%", partial=True") res = search_references(define_text_query("10090", field="taxonomy_id") - res = search_references( - define_text_query("GRCm38", field="genome") & - (define_text_query("neuro%", partial=True) | - define_text_query("pancrea%", partial=True)) - ) - Args: query: The search query string or a @@ -97,7 +91,7 @@ def search_references( stmt += ", versions.latest AS latest" key_names.append("latest") - stmt += " FROM paths LEFT JOIN versions ON paths.vid = versions.vid WHERE versions.project = 'scRNAseq'" + stmt += " FROM paths LEFT JOIN versions ON paths.vid = versions.vid WHERE versions.project = 'celldex'" if latest: stmt += " AND versions.latest = 1" diff --git a/src/celldex/upload_reference.py b/src/celldex/upload_reference.py new file mode 100644 index 0000000..5fd676e --- /dev/null +++ b/src/celldex/upload_reference.py @@ -0,0 +1,88 @@ +from gypsum_client import cache_directory, rest_url, upload_directory + +__author__ = "Jayaram Kancherla" +__copyright__ = "Jayaram Kancherla" +__license__ = "MIT" + + +def upload_reference( + directory: str, + name: str, + version: str, + package: str = "celldex", + cache_dir: str = cache_directory(), + deduplicate: bool = True, + probation: bool = False, + url: str = rest_url(), + token: str = None, + concurrent: int = 1, + abort_failed: bool = True, +): + """Upload the reference dataset to the gypsum bucket. + + This is a wrapper around + :py:func:`~gypsum_client.upload_file_actions.upload_directory` + specific to the `celldex` package. + + See Also: + :py:func:`~gypsum_client.upload_file_actions.upload_directory`, + to upload a directory to the gypsum backend. + + Args: + Name: + Reference dataset name. + + version: + Version name for the reference. + + directory: + Path to a directory containing the ``files`` to be uploaded. + This directory is assumed to correspond to a version of an asset. + + cache_dir: + Path to the cache for saving files, e.g., in + :py:func:`~gypsum_client.save_operations.save_version`. + + Used to convert symbolic links to upload links,see + :py:func:`~gypsum_client.prepare_directory_for_upload.prepare_directory_upload`. + + deduplicate: + Whether the backend should attempt deduplication of ``files`` + in the immediately previous version. + Defaults to True. + + probation: + Whether to perform a probational upload. + Defaults to False. + + url: + URL of the gypsum REST API. + + token: + GitHub access token to authenticate to the gypsum REST API. + + concurrent: + Number of concurrent downloads. + Defaults to 1. + + abort_failed: + Whether to abort the upload on any failure. + + Setting this to `False` can be helpful for diagnosing upload problems. + + Returns: + `True` if successfull, otherwise `False`. + """ + return upload_directory( + directory, + package, + name, + version, + cache_dir=cache_dir, + deduplicate=deduplicate, + probation=probation, + url=url, + token=token, + concurrent=concurrent, + abort_failed=abort_failed, + ) diff --git a/tests/test_search_refs.py b/tests/test_search_refs.py index 9eac96d..e824d07 100644 --- a/tests/test_search_refs.py +++ b/tests/test_search_refs.py @@ -8,11 +8,11 @@ def test_search_references(): - res = search_references("brain") - assert len(res) > 10 + res = search_references("human") + assert len(res) > 3 assert isinstance(res, pd.DataFrame) - res = search_references(define_text_query("Neuro%", partial=True)) + res = search_references(define_text_query("Immun%", partial=True)) assert isinstance(res, pd.DataFrame) assert len(res) > 0