Skip to content

Commit

Permalink
Merge pull request #52 from ncbo/restructure_2
Browse files Browse the repository at this point in the history
Restructuring - get all ontologies
  • Loading branch information
caufieldjh authored Aug 16, 2024
2 parents f99f783 + 136c23a commit b323dd5
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 19 deletions.
30 changes: 15 additions & 15 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

48 changes: 44 additions & 4 deletions src/kg_bioportal/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,37 @@ def main(verbose: int, quiet: bool):
logger.info(f"Logger {logger.name} set to level {logger.level}")


@main.command()
@click.option("output_dir", "-o", required=True, default="data/raw")
@click.option(
"api_key",
"-k",
required=False,
type=str,
help="API key for BioPortal",
)
def get_ontology_list(output_dir, api_key) -> None:
"""Downloads the list of all BioPortal ontologies and saves to a file in the data directory (default: data/raw).
Args:
output_dir: A string pointing to the directory to download data to.
Defaults to data/raw.
api_key: BioPortal / NCBO API key.
Returns:
None.
"""

dl = Downloader(output_dir=output_dir, api_key=api_key)

dl.get_ontology_list()

return None


@main.command()
@click.option(
"ontologies",
Expand Down Expand Up @@ -67,7 +98,9 @@ def main(verbose: int, quiet: bool):
type=str,
help="API key for BioPortal",
)
def download(ontologies, ontology_file, output_dir, snippet_only, ignore_cache, api_key) -> None:
def download(
ontologies, ontology_file, output_dir, snippet_only, ignore_cache, api_key
) -> None:
"""Downloads specified ontologies into data directory (default: data/raw).
Args:
Expand All @@ -81,9 +114,11 @@ def download(ontologies, ontology_file, output_dir, snippet_only, ignore_cache,
output_dir: A string pointing to the directory to download data to.
Defaults to data/raw.
snippet_only: Downloads only the first 5 kB of the source, for testing and file checks.
snippet_only: (Not yet implemented) Downloads only the first 5 kB of the source, for testing and file checks.
ignore_cache: (Not yet implemented) If specified, will ignore existing files and download again.
ignore_cache: If specified, will ignore existing files and download again.
api_key: BioPortal / NCBO API key.
Returns:
None.
Expand All @@ -105,7 +140,12 @@ def download(ontologies, ontology_file, output_dir, snippet_only, ignore_cache,

logging.info(f"{len(onto_list)} ontologies to retrieve.")

dl = Downloader(output_dir, snippet_only, ignore_cache, api_key)
dl = Downloader(
output_dir=output_dir,
snippet_only=snippet_only,
ignore_cache=ignore_cache,
api_key=api_key,
)

dl.download(onto_list)

Expand Down
26 changes: 26 additions & 0 deletions src/kg_bioportal/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@
import os
import requests

ONTOLOGY_LIST_NAME = "ontologylist.tsv"

class Downloader:

# TODO: implement ignore_cache and snippet_only

# Directory to save the downloaded files
output_dir: str = "data/raw"

Expand Down Expand Up @@ -87,3 +90,26 @@ def download(self, onto_list: list = []) -> None:


return None

def get_ontology_list(self) -> None:
"""Get the list of ontologies from BioPortal.
Args:
None.
Returns:
None.
"""
headers = {"Authorization": f"apikey token={self.api_key}"}

logging.info("Getting set of all ontologies...")

analytics_url = "https://data.bioontology.org/analytics"

ontologies = requests.get(analytics_url, headers=headers, allow_redirects=True).json()

with open(f"{self.output_dir}/{ONTOLOGY_LIST_NAME}", "w") as outfile:
for name in ontologies:
outfile.write(f"{name}\n")

logging.info(f"Wrote to {self.output_dir}/{ONTOLOGY_LIST_NAME}")

0 comments on commit b323dd5

Please sign in to comment.