Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…to main
  • Loading branch information
canergen committed Dec 12, 2024
2 parents 81ec545 + 97a291a commit 44d04f2
Show file tree
Hide file tree
Showing 33 changed files with 911 additions and 182 deletions.
4 changes: 4 additions & 0 deletions .dvc/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
/config.local
/tmp
/cache
/new
5 changes: 5 additions & 0 deletions .dvc/config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[core]
autostage = true
remote = gdrive_remote
['remote "gdrive_remote"']
url = gdrive://1MbX5Au5QRgagdXUaV9xvgLB9lOEVbfbQ
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ __pycache__/
/.vscode/

/node_modules/
/test/
13 changes: 12 additions & 1 deletion data/.gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,13 @@
/mouse_thymus_cite.h5mu
/test.txt
/mouse_thymus_cite_totalvi
/haniffa_covid_pbmc.h5mu
/neurips_bone_marrow_cite.h5mu
/heart_cell_atlas.h5mu
/bone_marrow_cite_totalvi
/heart_cell_atlas_scvi
/heart_cell_atlas.h5ad
/hlca_core.h5ad
/hlca_reference_scanvi
/test_data.h5ad
/test_scvi
/haniffa_covid_pbmc
6 changes: 6 additions & 0 deletions data/bone_marrow_cite_totalvi.dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
outs:
- md5: 78ef07339eacb072b5adcbee125c9acc.dir
size: 19834273
nfiles: 1
hash: md5
path: bone_marrow_cite_totalvi
6 changes: 6 additions & 0 deletions data/haniffa_covid_pbmc.dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
outs:
- md5: 9727ed6842ec135c97bdaf9fe9475a10.dir
size: 30431585
nfiles: 1
hash: md5
path: haniffa_covid_pbmc
5 changes: 5 additions & 0 deletions data/haniffa_covid_pbmc.h5mu.dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
outs:
- md5: fcb4c382ba3695eba206c4714fca3d45
size: 9357617754
hash: md5
path: haniffa_covid_pbmc.h5mu
5 changes: 5 additions & 0 deletions data/heart_cell_atlas.h5ad.dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
outs:
- md5: 767bf9739d979b6a808164528f70802b
size: 28528362
hash: md5
path: heart_cell_atlas.h5ad
5 changes: 5 additions & 0 deletions data/heart_cell_atlas.h5mu.dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
outs:
- md5: 767bf9739d979b6a808164528f70802b
size: 28528362
hash: md5
path: heart_cell_atlas.h5mu
6 changes: 6 additions & 0 deletions data/heart_cell_atlas_scvi.dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
outs:
- md5: 75670e08aee1039bc163245eb6fc2152.dir
size: 3371610
nfiles: 1
hash: md5
path: heart_cell_atlas_scvi
5 changes: 5 additions & 0 deletions data/hlca_core.h5ad.dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
outs:
- md5: 923ab873d03cff49cfd53a9063e6c1ed
size: 1940424488
hash: md5
path: hlca_core.h5ad
6 changes: 6 additions & 0 deletions data/hlca_reference_scanvi.dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
outs:
- md5: 5c77b351ae8e005c3180d45cf1281586.dir
size: 5825950
nfiles: 1
hash: md5
path: hlca_reference_scanvi
4 changes: 2 additions & 2 deletions data/mouse_thymus_cite.h5mu.dvc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
outs:
- md5: 0932a99a0e1571da676b2cc4881d189a
size: 6628168757
- md5: 27ae88708a0e2e42591f8665601ca915
size: 1807038906
hash: md5
path: mouse_thymus_cite.h5mu
6 changes: 6 additions & 0 deletions data/mouse_thymus_cite_totalvi.dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
outs:
- md5: a2e5acb9b535c112698ea6cad43a6a95.dir
size: 19781665
nfiles: 1
hash: md5
path: mouse_thymus_cite_totalvi
5 changes: 5 additions & 0 deletions data/neurips_bone_marrow_cite.h5mu.dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
outs:
- md5: 3f68254232dfe342a45a612684f36d92
size: 881794010
hash: md5
path: neurips_bone_marrow_cite.h5mu
5 changes: 0 additions & 5 deletions data/test.txt.dvc

This file was deleted.

5 changes: 5 additions & 0 deletions data/test_data.h5ad.dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
outs:
- md5: 91bb92467e4f291e0ba0c1663dfabc96
size: 1008864
hash: md5
path: test_data.h5ad
6 changes: 6 additions & 0 deletions data/test_scvi.dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
outs:
- md5: 0155f7292347d68142c70584a7607eae.dir
size: 309658
nfiles: 1
hash: md5
path: test_scvi
12 changes: 10 additions & 2 deletions src/scvi_hub_models/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,15 @@
@click.option("--dry_run", type=bool, default=False, help="Dry run the workflow.")
@click.option("--config_key", type=str, help="Use a different config file, e.g. for test purpose.")
@click.option("--save_dir", type=str, help="Directory to save intermediate results (defaults temporary).")
def run_workflow(model_name: str, dry_run: bool, config_key: str = None, save_dir: str = None) -> None:
@click.option("--reload_data", type=bool, help="Reload the data or get from DVC.")
@click.option("--reload_model", type=bool, help="Reload the model or get from DVC.")
def run_workflow(
model_name: str,
dry_run: bool,
config_key: str = None,
save_dir: str = None,
reload_data: bool = False,
reload_model: bool = False) -> None:
"""Run the workflow for a specific model."""
from importlib import import_module
if not config_key:
Expand All @@ -22,7 +30,7 @@ def run_workflow(model_name: str, dry_run: bool, config_key: str = None, save_di
Workflow = workflow_module._Workflow
config = json_data_store[config_key]

workflow = Workflow(save_dir=save_dir, dry_run=dry_run, config=config)
workflow = Workflow(save_dir=save_dir, dry_run=dry_run, config=config, reload_data=reload_data, reload_model=reload_model)
workflow.run()


Expand Down
5 changes: 4 additions & 1 deletion src/scvi_hub_models/config/haniffa_covid_pbmc.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
"model_dir": "haniffa_covid_pbmc",
"model_class": "TOTALVI",
"repo_name": "scvi-tools/haniffa_covid_pbmc_totalvi",
"reload_data": true,
"extra_data_kwargs": {
"reference_adata_cxg_id": "c7775e88-49bf-4ba2-a03b-93f00447c958",
"reference_adata_fname": "haniffa_covid_pbmc.h5ad"
"reference_adata_fname": "haniffa_covid_pbmc.h5ad",
"large_training_file_name": "haniffa_covid_pbmc.h5mu"
},
"metadata": {
"training_data_url": "https://datasets.cellxgene.cziscience.com/5ad66a4f-d619-4cb3-8015-a87c755647b3.h5ad",
Expand All @@ -16,6 +18,7 @@
"description": "CITE-seq to measure RNA and surface proteins in thymocytes from wild-type and T cell lineage-restricted mice to generate a comprehensive timeline of cell state for each T cell lineage.",
"references": "Steier, Z., Aylard, D.A., McIntyre, L.L. et al. Single-cell multiomic analysis of thymocyte development reveals drivers of CD4+ T cell and CD8+ T cell lineage commitment. Nat Immunol 24, 1579–1590 (2023). https://doi.org/10.1038/s41590-023-01584-0."
},

"criticism_settings": {
"n_samples": 3,
"cell_type_key": "cell_type"
Expand Down
4 changes: 4 additions & 0 deletions src/scvi_hub_models/config/heart_cell_atlas.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
"model_dir": "heart_cell_atlas_scvi",
"model_class": "SCVI",
"repo_name": "scvi-tools/heart-cell-atlas-scvi",
"extra_data_kwargs": {
"reference_adata_fname": "heart_cell_atlas.h5ad",
"large_training_file_name": "heart_cell_atlas.h5ad"
},
"metadata": {
"training_data_url": "https://www.heartcellatlas.org/#DataSources",
"tissues": ["heart"],
Expand Down
5 changes: 3 additions & 2 deletions src/scvi_hub_models/config/human_lung_cell_atlas.json
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
{
"model_dir": "hlca_scanvi_reference",
"model_dir": "hlca_reference_scanvi",
"model_class": "SCANVI",
"repo_name": "scvi-tools/human-lung-cell-atlas-scanvi",
"extra_data_kwargs": {
"legacy_model_url": "https://zenodo.org/records/7599104/files/HLCA_reference_model.zip",
"legacy_model_hash": "a7cd60f4342292b3cba54545bcd8a34decdc8e6b82163f009273d543e7e3910e",
"legacy_model_dir": "hlca_scanvi_reference_legacy",
"reference_adata_cxg_id": "066943a2-fdac-4b29-b348-40cede398e4e",
"reference_adata_fname": "hlca_core.h5ad"
"reference_adata_fname": "hlca_core.h5ad",
"large_training_file_name": "hlca_core.h5ad"
},
"metadata": {
"training_data_url": "https://cellxgene.cziscience.com/collections/6f6d381a-7701-4781-935c-db10d30de293",
Expand Down
7 changes: 3 additions & 4 deletions src/scvi_hub_models/config/mouse_thymus_cite.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
{
"model_dir": "mouse_thymus_cite",
"model_dir": "mouse_thymus_cite_totalvi",
"model_class": "TOTALVI",
"repo_name": "scvi-tools/mouse_thymus_totalvi",
"reload_data": true,
"repo_name": "scvi-tools/mouse_thymus_cite_totalvi",
"extra_data_kwargs": {
"reference_adata_cxg_id": "c14c54f8-85d8-45db-9de7-6ab572cc748a",
"reference_adata_fname": "thymus_cite.h5ad",
"reference_adata_fname": "mouse_thymus_cite.h5ad",
"large_training_file_name": "mouse_thymus_cite.h5mu"
},
"metadata": {
Expand Down
7 changes: 5 additions & 2 deletions src/scvi_hub_models/config/neurips_cite.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
{
"model_dir": "bone_marrow_cite",
"model_dir": "bone_marrow_cite_totalvi",
"model_class": "TOTALVI",
"repo_name": "scvi-tools/bone_marrow_cite_totalvi",
"extra_data_kwargs": {
"reference_adata_fname": "bmmc_cite.h5ad"
"url": "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE194122&format=file&file=GSE194122%5Fopenproblems%5Fneurips2021%5Fcite%5FBMMC%5Fprocessed%2Eh5ad%2Egz",
"hash": "b9b50fade9349719cba23c97c6515d3501a32ee3735fe95fe51221d2e8a5f361",
"reference_adata_fname": "bmmc_cite.h5ad.gz",
"large_training_file_name": "neurips_bone_marrow_cite.h5mu"
},
"metadata": {
"training_data_url": "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE194122&format=file&file=GSE194122%5Fopenproblems%5Fneurips2021%5Fcite%5FBMMC%5Fprocessed%2Eh5ad%2Egz",
Expand Down
4 changes: 3 additions & 1 deletion src/scvi_hub_models/config/test_scvi.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
"model_dir": "test_scvi",
"model_class": "SCVI",
"repo_name": "scvi-tools/test-scvi",
"extra_data_kwargs": {
"large_training_file_name": "test_data.h5ad"
},
"collection_name": "test",
"minify_model": false,
"extra_data_kwargs": {},
"metadata": {
"tissues": ["synthetic"],
"data_modalities": ["rna"],
Expand Down
Loading

0 comments on commit 44d04f2

Please sign in to comment.