Skip to content

Commit

Permalink
Wrapper to upload a reference and fix bugs in search (#6)
Browse files Browse the repository at this point in the history
  • Loading branch information
jkanche authored May 29, 2024
1 parent 4da1496 commit 75741bf
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 12 deletions.
1 change: 1 addition & 0 deletions src/celldex/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@
from .list_versions import fetch_latest_version, list_versions
from .save_reference import save_reference
from .search_references import search_references
from .upload_reference import upload_reference
12 changes: 3 additions & 9 deletions src/celldex/search_references.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,18 +48,12 @@ def search_references(
.. code-block:: python
res = search_references("brain")
res = search_references("human")
res = search_references(define_text_query("Neuro%", partial=True")
res = search_references(define_text_query("Immun%", partial=True")
res = search_references(define_text_query("10090", field="taxonomy_id")
res = search_references(
define_text_query("GRCm38", field="genome") &
(define_text_query("neuro%", partial=True) |
define_text_query("pancrea%", partial=True))
)
Args:
query:
The search query string or a
Expand Down Expand Up @@ -97,7 +91,7 @@ def search_references(
stmt += ", versions.latest AS latest"
key_names.append("latest")

stmt += " FROM paths LEFT JOIN versions ON paths.vid = versions.vid WHERE versions.project = 'scRNAseq'"
stmt += " FROM paths LEFT JOIN versions ON paths.vid = versions.vid WHERE versions.project = 'celldex'"

if latest:
stmt += " AND versions.latest = 1"
Expand Down
88 changes: 88 additions & 0 deletions src/celldex/upload_reference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from gypsum_client import cache_directory, rest_url, upload_directory

__author__ = "Jayaram Kancherla"
__copyright__ = "Jayaram Kancherla"
__license__ = "MIT"


def upload_reference(
directory: str,
name: str,
version: str,
package: str = "celldex",
cache_dir: str = cache_directory(),
deduplicate: bool = True,
probation: bool = False,
url: str = rest_url(),
token: str = None,
concurrent: int = 1,
abort_failed: bool = True,
):
"""Upload the reference dataset to the gypsum bucket.
This is a wrapper around
:py:func:`~gypsum_client.upload_file_actions.upload_directory`
specific to the `celldex` package.
See Also:
:py:func:`~gypsum_client.upload_file_actions.upload_directory`,
to upload a directory to the gypsum backend.
Args:
Name:
Reference dataset name.
version:
Version name for the reference.
directory:
Path to a directory containing the ``files`` to be uploaded.
This directory is assumed to correspond to a version of an asset.
cache_dir:
Path to the cache for saving files, e.g., in
:py:func:`~gypsum_client.save_operations.save_version`.
Used to convert symbolic links to upload links,see
:py:func:`~gypsum_client.prepare_directory_for_upload.prepare_directory_upload`.
deduplicate:
Whether the backend should attempt deduplication of ``files``
in the immediately previous version.
Defaults to True.
probation:
Whether to perform a probational upload.
Defaults to False.
url:
URL of the gypsum REST API.
token:
GitHub access token to authenticate to the gypsum REST API.
concurrent:
Number of concurrent downloads.
Defaults to 1.
abort_failed:
Whether to abort the upload on any failure.
Setting this to `False` can be helpful for diagnosing upload problems.
Returns:
`True` if successfull, otherwise `False`.
"""
return upload_directory(
directory,
package,
name,
version,
cache_dir=cache_dir,
deduplicate=deduplicate,
probation=probation,
url=url,
token=token,
concurrent=concurrent,
abort_failed=abort_failed,
)
6 changes: 3 additions & 3 deletions tests/test_search_refs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@


def test_search_references():
res = search_references("brain")
assert len(res) > 10
res = search_references("human")
assert len(res) > 3
assert isinstance(res, pd.DataFrame)

res = search_references(define_text_query("Neuro%", partial=True))
res = search_references(define_text_query("Immun%", partial=True))
assert isinstance(res, pd.DataFrame)
assert len(res) > 0

Expand Down

0 comments on commit 75741bf

Please sign in to comment.