From 8b66be3e96f61dc1be4fb14fbc7bbf3137f2aa7a Mon Sep 17 00:00:00 2001 From: Sean Kim <33474168+seankim658@users.noreply.github.com> Date: Mon, 10 Jun 2024 15:41:18 -0400 Subject: [PATCH] add additional indexes (#30) --- id/helpers/misc_functions.py | 25 ++++++++++++++++++++----- id/load_data.py | 17 ++++++++++++++++- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/id/helpers/misc_functions.py b/id/helpers/misc_functions.py index cf294e8..d29dedd 100644 --- a/id/helpers/misc_functions.py +++ b/id/helpers/misc_functions.py @@ -159,7 +159,12 @@ def create_connection_string( def setup_index( - dbh, index_col: str, collection_name: str, index_name: str = "" + dbh, + index_col: str, + collection_name: str, + index_name: str = "", + unique: bool = True, + order: int = 1, ) -> None: """Sets up an index on the specified index_name in the specified collection. @@ -173,13 +178,22 @@ def setup_index( The name of the collection to create the index in. index_name: str (default = f'{index_col}_1') The name of the index to create. + unique : bool (default = True) + Whether the index should be a unique index. + order : int (default = 1) + The sort order of the index (1 for ascending, -1 for descending). """ if not index_name: - index_name = f"{index_col}_1" + index_name = f"{index_col}_{order}" if index_name not in dbh[collection_name].index_information(): - dbh[collection_name].create_index( - [(index_col, pymongo.ASCENDING)], name=index_name, unique=True - ) + if order == 1: + dbh[collection_name].create_index( + [(index_col, pymongo.ASCENDING)], name=index_name, unique=unique + ) + elif order == -1: + dbh[collection_name].create_index( + [(index_col, pymongo.DESCENDING)], name=index_name, unique=unique + ) logging.info( f"Created index {index_name} on {index_col} in {collection_name} collection." ) @@ -384,6 +398,7 @@ def get_user_confirmation() -> None: else: print("Please enter 'y' for yes or 'n' for no.") + def preprocess_checks(data: list) -> bool: """Performs preprocessing checks on the data by ensuring ID format is valid and collision key is present (essentially chekcing that the diff --git a/id/load_data.py b/id/load_data.py index 954478d..b03fc85 100644 --- a/id/load_data.py +++ b/id/load_data.py @@ -302,8 +302,23 @@ def main(): misc_fns.setup_logging(f"./logs/load_data_{server}.log") logging.info(f"Loading data for server: {server}. #####################") - ### setup first run biomarker_id index + ### setup first indexes + paths = [ + "biomarker_component.biomarker", + "biomarker_component.assessed_biomarker_entity.recommended_name", + "biomarker_component.assessed_biomarker_entity_id", + "biomarker_component.assessed_entity_type", + "condition.recommended_name.name", + "best_biomarker_role.role", + ] misc_fns.setup_index(dbh, "biomarker_id", data_collection, "biomarker_id_1") + for path in paths: + misc_fns.setup_index( + dbh, path, data_collection, f"{path}_1", unique=False, order=1 + ) + misc_fns.setup_index( + dbh, path, data_collection, f"{path}_-1", unique=False, order=-1 + ) ### load the load map try: