diff --git a/poetry.lock b/poetry.lock index 99bea90..f0fd797 100644 --- a/poetry.lock +++ b/poetry.lock @@ -123,17 +123,17 @@ stringcase = ">=1.2.0,<2.0.0" [[package]] name = "boto3" -version = "1.34.161" +version = "1.35.0" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" files = [ - {file = "boto3-1.34.161-py3-none-any.whl", hash = "sha256:4ef285334a0edc3047e27a04caf00f7742e32c0f03a361101e768014ac5709dd"}, - {file = "boto3-1.34.161.tar.gz", hash = "sha256:a872d8fdb3203c1eb0b12fa9e9d879e6f7fd02983a485f02189e6d5914ccd834"}, + {file = "boto3-1.35.0-py3-none-any.whl", hash = "sha256:ada32dab854c46a877cf967b8a55ab1a7d356c3c87f1c8bd556d446ff03dfd95"}, + {file = "boto3-1.35.0.tar.gz", hash = "sha256:bdc242e3ea81decc6ea551b04b2c122f088c29269d8e093b55862946aa0fcfc6"}, ] [package.dependencies] -botocore = ">=1.34.161,<1.35.0" +botocore = ">=1.35.0,<1.36.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -142,13 +142,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.34.161" +version = "1.35.0" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" files = [ - {file = "botocore-1.34.161-py3-none-any.whl", hash = "sha256:6c606d2da6f62fde06880aff1190566af208875c29938b6b68741e607817975a"}, - {file = "botocore-1.34.161.tar.gz", hash = "sha256:16381bfb786142099abf170ce734b95a402a3a7f8e4016358712ac333c5568b2"}, + {file = "botocore-1.35.0-py3-none-any.whl", hash = "sha256:a3c96fe0b6afe7d00bad6ffbe73f2610953065fcdf0ed697eba4e1e5287cc84f"}, + {file = "botocore-1.35.0.tar.gz", hash = "sha256:6ab2f5a5cbdaa639599e3478c65462c6d6a10173dc8b941bfc69b0c9eb548f45"}, ] [package.dependencies] @@ -941,13 +941,13 @@ test = ["objgraph", "psutil"] [[package]] name = "griffe" -version = "0.49.0" +version = "1.0.0" description = "Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API." optional = false python-versions = ">=3.8" files = [ - {file = "griffe-0.49.0-py3-none-any.whl", hash = "sha256:c0d505f2a444ac342b22f4647d6444c8db64964b6a379c14f401fc467c0741a3"}, - {file = "griffe-0.49.0.tar.gz", hash = "sha256:a7e1235c27d8139e0fd24a5258deef6061bc876a9fda8117a5cf7b53ee940a91"}, + {file = "griffe-1.0.0-py3-none-any.whl", hash = "sha256:7e113220efc489c2e8189656b1f01c1184417ba5395573ff05cdc7d07a1b0a5e"}, + {file = "griffe-1.0.0.tar.gz", hash = "sha256:a9df647e5602fc0f826fca1cda5d9a0e554cdea67eb7948f6629dca7336e6afd"}, ] [package.dependencies] @@ -1533,13 +1533,13 @@ dev = ["Sphinx (==7.2.5)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptio [[package]] name = "markdown" -version = "3.6" +version = "3.7" description = "Python implementation of John Gruber's Markdown." optional = false python-versions = ">=3.8" files = [ - {file = "Markdown-3.6-py3-none-any.whl", hash = "sha256:48f276f4d8cfb8ce6527c8f79e2ee29708508bf4d40aa410fbc3b4ee832c850f"}, - {file = "Markdown-3.6.tar.gz", hash = "sha256:ed4f41f6daecbeeb96e576ce414c41d2d876daa9a16cb35fa8ed8c2ddfad0224"}, + {file = "Markdown-3.7-py3-none-any.whl", hash = "sha256:7eb6df5690b81a1d7942992c97fad2938e956e79df20cbc6186e9c3a77b1c803"}, + {file = "markdown-3.7.tar.gz", hash = "sha256:2ae2471477cfd02dbbf038d5d9bc226d40def84b4fe2986e49b59b6b472bbed2"}, ] [package.extras] @@ -2032,9 +2032,9 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, {version = ">=1.22.4", markers = "python_version < \"3.11\""}, {version = ">=1.23.2", markers = "python_version == \"3.11\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -2313,8 +2313,8 @@ files = [ annotated-types = ">=0.4.0" pydantic-core = "2.20.1" typing-extensions = [ - {version = ">=4.12.2", markers = "python_version >= \"3.13\""}, {version = ">=4.6.1", markers = "python_version < \"3.13\""}, + {version = ">=4.12.2", markers = "python_version >= \"3.13\""}, ] [package.extras] diff --git a/src/kg_bioportal/cli.py b/src/kg_bioportal/cli.py index 57f4a26..00582a8 100644 --- a/src/kg_bioportal/cli.py +++ b/src/kg_bioportal/cli.py @@ -32,6 +32,37 @@ def main(verbose: int, quiet: bool): logger.info(f"Logger {logger.name} set to level {logger.level}") +@main.command() +@click.option("output_dir", "-o", required=True, default="data/raw") +@click.option( + "api_key", + "-k", + required=False, + type=str, + help="API key for BioPortal", +) +def get_ontology_list(output_dir, api_key) -> None: + """Downloads the list of all BioPortal ontologies and saves to a file in the data directory (default: data/raw). + + Args: + + output_dir: A string pointing to the directory to download data to. + Defaults to data/raw. + + api_key: BioPortal / NCBO API key. + + Returns: + None. + + """ + + dl = Downloader(output_dir=output_dir, api_key=api_key) + + dl.get_ontology_list() + + return None + + @main.command() @click.option( "ontologies", @@ -67,7 +98,9 @@ def main(verbose: int, quiet: bool): type=str, help="API key for BioPortal", ) -def download(ontologies, ontology_file, output_dir, snippet_only, ignore_cache, api_key) -> None: +def download( + ontologies, ontology_file, output_dir, snippet_only, ignore_cache, api_key +) -> None: """Downloads specified ontologies into data directory (default: data/raw). Args: @@ -81,9 +114,11 @@ def download(ontologies, ontology_file, output_dir, snippet_only, ignore_cache, output_dir: A string pointing to the directory to download data to. Defaults to data/raw. - snippet_only: Downloads only the first 5 kB of the source, for testing and file checks. + snippet_only: (Not yet implemented) Downloads only the first 5 kB of the source, for testing and file checks. + + ignore_cache: (Not yet implemented) If specified, will ignore existing files and download again. - ignore_cache: If specified, will ignore existing files and download again. + api_key: BioPortal / NCBO API key. Returns: None. @@ -105,7 +140,12 @@ def download(ontologies, ontology_file, output_dir, snippet_only, ignore_cache, logging.info(f"{len(onto_list)} ontologies to retrieve.") - dl = Downloader(output_dir, snippet_only, ignore_cache, api_key) + dl = Downloader( + output_dir=output_dir, + snippet_only=snippet_only, + ignore_cache=ignore_cache, + api_key=api_key, + ) dl.download(onto_list) diff --git a/src/kg_bioportal/downloader.py b/src/kg_bioportal/downloader.py index 81add2a..1a626e0 100644 --- a/src/kg_bioportal/downloader.py +++ b/src/kg_bioportal/downloader.py @@ -4,9 +4,12 @@ import os import requests +ONTOLOGY_LIST_NAME = "ontologylist.tsv" class Downloader: + # TODO: implement ignore_cache and snippet_only + # Directory to save the downloaded files output_dir: str = "data/raw" @@ -87,3 +90,26 @@ def download(self, onto_list: list = []) -> None: return None + + def get_ontology_list(self) -> None: + """Get the list of ontologies from BioPortal. + + Args: + None. + + Returns: + None. + """ + headers = {"Authorization": f"apikey token={self.api_key}"} + + logging.info("Getting set of all ontologies...") + + analytics_url = "https://data.bioontology.org/analytics" + + ontologies = requests.get(analytics_url, headers=headers, allow_redirects=True).json() + + with open(f"{self.output_dir}/{ONTOLOGY_LIST_NAME}", "w") as outfile: + for name in ontologies: + outfile.write(f"{name}\n") + + logging.info(f"Wrote to {self.output_dir}/{ONTOLOGY_LIST_NAME}")