Skip to content

Commit

Permalink
Initial model files
Browse files Browse the repository at this point in the history
  • Loading branch information
martinkim0 committed Feb 7, 2024
1 parent 169ccec commit 558e859
Show file tree
Hide file tree
Showing 16 changed files with 835 additions and 0 deletions.
Empty file added src/scvi_hub_models/__init__.py
Empty file.
27 changes: 27 additions & 0 deletions src/scvi_hub_models/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from scvi_hub_models.utils import wrap_kwargs


@wrap_kwargs
def run_workflow(
model_name: str,
dry_run: bool = False,
repo_create: bool = False,
) -> None:
"""Run the workflow for a specific model."""
import logging

logger = logging.getLogger(__name__)

if model_name == "heart_cell_atlas":
from scvi_hub_models.models.heart_cell_atlas import model_workflow
elif model_name == "human_lung_cell_atlas":
from scvi_hub_models.models.human_lung_cell_atlas import model_workflow
elif model_name == "tabula_sapiens":
from scvi_hub_models.models.tabula_sapiens import model_workflow

logger.info(f"Started running {model_name} workflow with `dry_run={dry_run}` and " f"`repo_create={repo_create}`.")
model_workflow(dry_run=dry_run, repo_create=repo_create)


if __name__ == "__main__":
run_workflow()
9 changes: 9 additions & 0 deletions src/scvi_hub_models/config/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from ._heart_cell_atlas import _CONFIG as HEART_CELL_ATLAS_CONFIG
from ._human_lung_cell_atlas import _CONFIG as HUMAN_LUNG_CELL_ATLAS_CONFIG
from ._tabula_sapiens import _CONFIG as TABULA_SAPIENS_CONFIG

__all__ = [
"HEART_CELL_ATLAS_CONFIG",
"HUMAN_LUNG_CELL_ATLAS_CONFIG",
"TABULA_SAPIENS_CONFIG",
]
13 changes: 13 additions & 0 deletions src/scvi_hub_models/config/_heart_cell_atlas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
_CONFIG = {
"model_dir": "heart_cell_atlas_scvi",
"repo_name": "scvi-tools/heart-cell-atlas-scvi",
"metadata": {
"training_data_url": "https://www.heartcellatlas.org/#DataSources",
"tissues": ["heart"],
"data_modalities": ["rna"],
"data_is_annotated": True,
"license_info": "cc-by-4.0",
"description": "Combined single cell and single nuclei RNA-Seq data of 485K cardiac cells with annotations.",
"references": "Kazumasa Kanemaru, James Cranley, Daniele Muraro, Antonio M. A. Miranda, Siew Yen Ho, Anna Wilbrey-Clark, Jan Patrick Pett, Krzysztof Polanski, Laura Richardson, Monika Litvinukova, Natsuhiko Kumasaka, Yue Qin, Zuzanna Jablonska, Claudia I. Semprich, Lukas Mach, Monika Dabrowska, Nathan Richoz, Liam Bolt, Lira Mamanova, Rakeshlal Kapuge, Sam N. Barnett, Shani Perera, Carlos Talavera-López, Ilaria Mulas, Krishnaa T. Mahbubani, Liz Tuck, Lu Wang, Margaret M. Huang, Martin Prete, Sophie Pritchard, John Dark, Kourosh Saeb-Parsy, Minal Patel, Menna R. Clatworthy, Norbert Hübner, Rasheda A. Chowdhury, Michela Noseda & Sarah A. Teichmann. Spatially resolved multiomics of human cardiac niches. Nature, July 2023. doi:10.1038/s41586-023-06311-1.",
},
}
23 changes: 23 additions & 0 deletions src/scvi_hub_models/config/_human_lung_cell_atlas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
_CONFIG = {
"legacy_model_url": "https://zenodo.org/records/7599104/files/HLCA_reference_model.zip",
"legacy_model_hash": "a7cd60f4342292b3cba54545bcd8a34decdc8e6b82163f009273d543e7e3910e",
"legacy_model_dir": "hlca_scanvi_reference_legacy",
"model_dir": "hlca_scanvi_reference",
"reference_adata_cxg_id": "066943a2-fdac-4b29-b348-40cede398e4e",
"reference_adata_fname": "hlca_core.h5ad",
"embedding_adata_url": "https://zenodo.org/records/7599104/files/HLCA_full_v1.1_emb.h5ad",
"embedding_adata_hash": "3e2c4da281b6883464b2a70bcc1562d1c4246de32093e7b08090673fbad56a97",
"embedding_adata_fname": "hlca_all_emb.h5ad",
"mini_model_dir": "hlca_scanvi_reference_mini",
"repo_name": "scvi-tools/human-lung-cell-atlas",
"metadata": {
"training_data_url": "https://cellxgene.cziscience.com/collections/6f6d381a-7701-4781-935c-db10d30de293",
"training_code_url": "https://github.com/LungCellAtlas/HLCA_reproducibility",
"tissues": ["nose", "respiratory airway", "lung parenchyma"],
"data_modalities": ["rna"],
"data_is_annotated": True,
"license_info": "cc-by-4.0",
"description": "The integrated Human Lung Cell Atlas (HLCA) represents the first large-scale, integrated single-cell reference atlas of the human lung.",
"references": "Lisa Sikkema, Ciro Ramírez-Suástegui, Daniel C. Strobl, Tessa E. Gillett, Luke Zappia, Elo Madissoon, Nikolay S. Markov, Laure-Emmanuelle Zaragosi, Yuge Ji, Meshal Ansari, Marie-Jeanne Arguel, Leonie Apperloo, Martin Banchero, Christophe Bécavin, Marijn Berg, Evgeny Chichelnitskiy, Mei-i Chung, Antoine Collin, Aurore C. A. Gay, Janine Gote-Schniering, Baharak Hooshiar Kashani, Kemal Inecik, Manu Jain, Theodore S. Kapellos, Tessa M. Kole, Sylvie Leroy, Christoph H. Mayr, Amanda J. Oliver, Michael von Papen, Lance Peter, Chase J. Taylor, Thomas Walzthoeni, Chuan Xu, Linh T. Bui, Carlo De Donno, Leander Dony, Alen Faiz, Minzhe Guo, Austin J. Gutierrez, Lukas Heumos, Ni Huang, Ignacio L. Ibarra, Nathan D. Jackson, Preetish Kadur Lakshminarasimha Murthy, Mohammad Lotfollahi, Tracy Tabib, Carlos Talavera-López, Kyle J. Travaglini, Anna Wilbrey-Clark, Kaylee B. Worlock, Masahiro Yoshida, Lung Biological Network Consortium, Maarten van den Berge, Yohan Bossé, Tushar J. Desai, Oliver Eickelberg, Naftali Kaminski, Mark A. Krasnow, Robert Lafyatis, Marko Z. Nikolic, Joseph E. Powell, Jayaraj Rajagopal, Mauricio Rojas, Orit Rozenblatt-Rosen, Max A. Seibold, Dean Sheppard, Douglas P. Shepherd, Don D. Sin, Wim Timens, Alexander M. Tsankov, Jeffrey Whitsett, Yan Xu, Nicholas E. Banovich, Pascal Barbry, Thu Elizabeth Duong, Christine S. Falk, Kerstin B. Meyer, Jonathan A. Kropski, Dana Pe’er, Herbert B. Schiller, Purushothama Rao Tata, Joachim L. Schultze, Sara A. Teichmann, Alexander V. Misharin, Martijn C. Nawijn, Malte D. Luecken, and Fabian J. Theis. An integrated cell atlas of the lung in health and disease. Nature Medicine, June 2023. doi:10.1038/s41591-023-02327-2.",
},
}
88 changes: 88 additions & 0 deletions src/scvi_hub_models/config/_tabula_sapiens.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
_CONFIG = {
"tissues": [
"Bladder",
"Blood",
"Bone_Marrow",
"Eye",
"Fat",
"Heart",
"Large_Intestine",
"Liver",
"Lung",
"Lymph_Node",
"Mammary",
"Muscle",
"Pancreas",
"Prostate",
"Salivary_Gland",
"Skin",
"Small_Intestine",
"Spleen",
"Thymus",
"Tongue",
"Trachea",
"Uterus",
"Vasculature",
],
"model_hashes": {
"Bladder": "3cbdba7afe4f18e13e19228c38ffd57f0606f5524393e6c108ab15eba0da4042",
"Blood": "a7dc61b6604842a157b2a16180eb53df805fe7f3a8096985c4775fb893a7d2a3",
"Bone_Marrow": "e46f20f64f404dcbb2b5ae814061849c22e9b0b59d03a2dff65bbb6e6f792888",
"Eye": "82578a5d0867c72a6415fc77c96231617ae1799d21e33cac96f40ab1293819e1",
"Fat": "ee1befdd03313d65c6d5089b503277c347ea495df55b7282c7149eb11cc66df1",
"Heart": "3cedbb01b451ce6b1668f057ebb7fed493d1bd49dec4cbda4f8d4363944dee47",
"Large_Intestine": "ae78f8166d9aeddb9abb8fee64a9d72581db6bd002f009d3f0a69e3de2f3218e",
"Liver": "c9b55e28d589a27b97a8205746d51388eecbe02b043862b89d4511c48ca78af2",
"Lung": "8f5425470e624ce15db7bbc92cc9ede56e3893e8367812b4c991bb4dedc14f19",
"Lymph_Node": "925a1fccf1d20e97cf84d1f5b78ea07b59bfe38a60920c6db4265158654b573c",
"Mammary": "0ad9dd4e91c2b5d88f9a9c8804c379af47207aa0823a2bcb2c93c7bf9d5e9b18",
"Muscle": "2710c00173f9dd247f89e595867151ab799370797f19c3dc9f7dabc4e9d68ab4",
"Pancreas": "e2a9487863b6b218dde014f38b1e6477f041614726d5448fa23041c6d6d7916a",
"Prostate": "34c4a8362cba4000ebaf0fec2af151f8f93eeed1f0001308a732190357a5f437",
"Salivary_Gland": "d444cb54b99f1e13e920d2dba7ba2aece33d20917d19d87668c78c42ec482293",
"Skin": "9103e61226183ac4ced7fc49be081538335be2eded0ee7b9ba9dea35a5a1acbe",
"Small_Intestine": "b469b6d27bb25d2d04d8e55109a72fac08b71260969d22b3d1ccd306e6c34666",
"Spleen": "7933f10778d237e15fffaae70ea872f29f56a40e227c6e1c4f8c4d6603c0211b",
"Thymus": "d1238909376beb6ca1bf3e99a75023a5522d395f6887b0a710aad08ffc735517",
"Tongue": "c2ad649e5d9856964fe2b0472fe00cb3a85b8c429bef88e17f7ae2a95e9e28cd",
"Trachea": "b0525106022ff29c5777ed65e643f5089d44b846fde76fb1d3b7206a1d9799dd",
"Uterus": "85a878008bf2c2d91613ed5086ec6c2a5e95628be31e5a049192916f7e327191",
"Vasculature": "79e2a4a93a024ded96c8c294c328236ca8c83839c171d7adc49f95c781cf9503",
},
"adata_hashes": {
"Bladder": "2767fd7400b1a0b24f1eef7b29f942108707756644a514e1859c97e2c85cfd7e",
"Blood": "786b052eaac01debdec526da18f8517e636698c1e08fa4015f1524ec861eb5e6",
"Bone_Marrow": "ebdeb204ed2e25b67b3fcb0d088b91efa893e72de0b94e3edd5e5eae2d637ccc",
"Eye": "6f9339aac973be7cec467dbc8b18d5cfaac45f495d14f6b1c0a814008331813e",
"Fat": "428286a39c52793c421773cb0f5e1855d459eb718f570c798b583ab2a78e2ca3",
"Heart": "57c269b0e7fbd33049bcf6f672f94390ccf47ce911d80cd7c05c8e655132ba09",
"Large_Intestine": "9360da72338356be60c185c6affe1bbd0714a6bb7a007dcafc34648534c43f77",
"Liver": "849386b699bd891f1ce9e40f6ba746ee3ac77c226b919a63a8388d0027937735",
"Lung": "65b52ee9495612e78fa7d587e3f1e3533a8a9342bc4f0742fbeca82d3d652186",
"Lymph_Node": "d1191951df41de146b52d407c2db95d02dbc09104c0b4be9d67603a28dc23d9b",
"Mammary": "ccc4804016ce554a1e934cd72ebf9a7f4609228f036b24493df0bf2f4c9029e8",
"Muscle": "89d802a4f6f8118813adcb4444e013f349012e32931b1771259d644b3a9015cb",
"Pancreas": "f1408ea16b82da49764296d5c12901ea5e9474fc53920f2838d7d85e07cb7eea",
"Prostate": "5cec0b4f30b649ab1c9455ce48ae99c0f4d424d7f03d3c71e5fa5072bfecb0f9",
"Salivary_Gland": "649b0735b14f6d1a817427ade96307e69e07f1736584c867a305beccce927748",
"Skin": "8a6ec2ef56963642a84cc94605789e16977b645cfd4824e89d5d395783b3f233",
"Small_Intestine": "03536949c92d9f80097d8b8065384ca00658a5f3ac1a8a0b8cfffeeaabc8c845",
"Spleen": "9acfb41142d1c28e1138fcefbcb8685094472dab63c0cb229691c0995447db8d",
"Thymus": "ce51cda4944adbc5e3c506bfed811bd795d7df45849e5844e26101637883f910",
"Tongue": "9a6d83487c0339b26dc0c2ad04c0b0eec6cfbd4e2ba08e5fe966843797c150be",
"Trachea": "88d43f668b6701b9eb7756c2d504b0110bf11d7d7d3970c74018b72f82f18b0b",
"Uterus": "ad6f10fd24203fa60beda94fa0175eebaf65b0f61df89b278532c531d19193fc",
"Vasculature": "bf4ce2a88de4fdab1e2f07128dc25aa055064c853e767e1cc015ce89d49ddf4c",
},
"base_url": "https://zenodo.org/records/7608635/files/",
"models_suffix": "_pretrained_models.tar.gz",
"adata_suffix": "_training_data.h5ad",
"base_repo_name": "scvi-tools/tabula-sapiens",
"metadata": {
"data_modalities": ["rna"],
"data_is_annotated": True,
"license_info": "cc-by-4.0",
"description": "Tabula Sapiens is a benchmark, first-draft human cell atlas of nearly 500,000 cells from 24 organs of 15 normal human subjects.",
"references": "The Tabula Sapiens Consortium. The Tabula Sapiens: A multiple-organ, single-cell transcriptomic atlas of humans. Science, May 2022. doi:10.1126/science.abl4896",
},
}
13 changes: 13 additions & 0 deletions src/scvi_hub_models/config/heart_cell_atlas.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"model_dir": "heart_cell_atlas_scvi",
"repo_name": "scvi-tools/heart-cell-atlas-scvi",
"metadata": {
"training_data_url": "https://www.heartcellatlas.org/#DataSources",
"tissues": ["heart"],
"data_modalities": ["rna"],
"data_is_annotated": true,
"license_info": "cc-by-4.0",
"description": "Combined single cell and single nuclei RNA-Seq data of 485K cardiac cells with annotations.",
"references": "Kazumasa Kanemaru, James Cranley, Daniele Muraro, Antonio M. A. Miranda, Siew Yen Ho, Anna Wilbrey-Clark, Jan Patrick Pett, Krzysztof Polanski, Laura Richardson, Monika Litvinukova, Natsuhiko Kumasaka, Yue Qin, Zuzanna Jablonska, Claudia I. Semprich, Lukas Mach, Monika Dabrowska, Nathan Richoz, Liam Bolt, Lira Mamanova, Rakeshlal Kapuge, Sam N. Barnett, Shani Perera, Carlos Talavera-López, Ilaria Mulas, Krishnaa T. Mahbubani, Liz Tuck, Lu Wang, Margaret M. Huang, Martin Prete, Sophie Pritchard, John Dark, Kourosh Saeb-Parsy, Minal Patel, Menna R. Clatworthy, Norbert Hübner, Rasheda A. Chowdhury, Michela Noseda & Sarah A. Teichmann. Spatially resolved multiomics of human cardiac niches. Nature, July 2023. doi:10.1038/s41586-023-06311-1."
}
}
23 changes: 23 additions & 0 deletions src/scvi_hub_models/config/human_lung_cell_atlas.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"legacy_model_url": "https://zenodo.org/records/7599104/files/HLCA_reference_model.zip",
"legacy_model_hash": "a7cd60f4342292b3cba54545bcd8a34decdc8e6b82163f009273d543e7e3910e",
"legacy_model_dir": "hlca_scanvi_reference_legacy",
"model_dir": "hlca_scanvi_reference",
"reference_adata_cxg_id": "066943a2-fdac-4b29-b348-40cede398e4e",
"reference_adata_fname": "hlca_core.h5ad",
"embedding_adata_url": "https://zenodo.org/records/7599104/files/HLCA_full_v1.1_emb.h5ad",
"embedding_adata_hash": "3e2c4da281b6883464b2a70bcc1562d1c4246de32093e7b08090673fbad56a97",
"embedding_adata_fname": "hlca_all_emb.h5ad",
"mini_model_dir": "hlca_scanvi_reference_mini",
"repo_name": "scvi-tools/human-lung-cell-atlas",
"metadata": {
"training_data_url": "https://cellxgene.cziscience.com/collections/6f6d381a-7701-4781-935c-db10d30de293",
"training_code_url": "https://github.com/LungCellAtlas/HLCA_reproducibility",
"tissues": ["nose", "respiratory airway", "lung parenchyma"],
"data_modalities": ["rna"],
"data_is_annotated": true,
"license_info": "cc-by-4.0",
"description": "The integrated Human Lung Cell Atlas (HLCA) represents the first large-scale, integrated single-cell reference atlas of the human lung.",
"references": "Lisa Sikkema, Ciro Ramírez-Suástegui, Daniel C. Strobl, Tessa E. Gillett, Luke Zappia, Elo Madissoon, Nikolay S. Markov, Laure-Emmanuelle Zaragosi, Yuge Ji, Meshal Ansari, Marie-Jeanne Arguel, Leonie Apperloo, Martin Banchero, Christophe Bécavin, Marijn Berg, Evgeny Chichelnitskiy, Mei-i Chung, Antoine Collin, Aurore C. A. Gay, Janine Gote-Schniering, Baharak Hooshiar Kashani, Kemal Inecik, Manu Jain, Theodore S. Kapellos, Tessa M. Kole, Sylvie Leroy, Christoph H. Mayr, Amanda J. Oliver, Michael von Papen, Lance Peter, Chase J. Taylor, Thomas Walzthoeni, Chuan Xu, Linh T. Bui, Carlo De Donno, Leander Dony, Alen Faiz, Minzhe Guo, Austin J. Gutierrez, Lukas Heumos, Ni Huang, Ignacio L. Ibarra, Nathan D. Jackson, Preetish Kadur Lakshminarasimha Murthy, Mohammad Lotfollahi, Tracy Tabib, Carlos Talavera-López, Kyle J. Travaglini, Anna Wilbrey-Clark, Kaylee B. Worlock, Masahiro Yoshida, Lung Biological Network Consortium, Maarten van den Berge, Yohan Bossé, Tushar J. Desai, Oliver Eickelberg, Naftali Kaminski, Mark A. Krasnow, Robert Lafyatis, Marko Z. Nikolic, Joseph E. Powell, Jayaraj Rajagopal, Mauricio Rojas, Orit Rozenblatt-Rosen, Max A. Seibold, Dean Sheppard, Douglas P. Shepherd, Don D. Sin, Wim Timens, Alexander M. Tsankov, Jeffrey Whitsett, Yan Xu, Nicholas E. Banovich, Pascal Barbry, Thu Elizabeth Duong, Christine S. Falk, Kerstin B. Meyer, Jonathan A. Kropski, Dana Pe’er, Herbert B. Schiller, Purushothama Rao Tata, Joachim L. Schultze, Sara A. Teichmann, Alexander V. Misharin, Martijn C. Nawijn, Malte D. Luecken, and Fabian J. Theis. An integrated cell atlas of the lung in health and disease. Nature Medicine, June 2023. doi:10.1038/s41591-023-02327-2."
}
}
Loading

0 comments on commit 558e859

Please sign in to comment.