From a2c90e2d905907c5d9107af6d80e15086e773eea Mon Sep 17 00:00:00 2001 From: luissian Date: Wed, 28 Aug 2024 13:54:59 +0200 Subject: [PATCH 1/6] Fixing errors and update to allow to upload to skylims --- relecov_tools/__main__.py | 22 +-- relecov_tools/conf/configuration.json | 86 ++++----- .../{feed_database.py => upload_database.py} | 167 ++++++++++-------- 3 files changed, 151 insertions(+), 124 deletions(-) rename relecov_tools/{feed_database.py => upload_database.py} (74%) diff --git a/relecov_tools/__main__.py b/relecov_tools/__main__.py index a03ca9b0..4aa1b2ba 100755 --- a/relecov_tools/__main__.py +++ b/relecov_tools/__main__.py @@ -16,7 +16,7 @@ import relecov_tools.download_manager import relecov_tools.json_validation import relecov_tools.map_schema -import relecov_tools.feed_database +import relecov_tools.upload_database import relecov_tools.read_bioinfo_metadata import relecov_tools.metadata_homogeneizer import relecov_tools.gisaid_upload @@ -60,7 +60,7 @@ def run_relecov_tools(): ) # stderr.print("[green] `._,._,'\n", highlight=False) - __version__ = "0.0.5" + __version__ = "0.0.6" stderr.print( "\n" "[grey39] RELECOV-tools version {}".format(__version__), highlight=False ) @@ -373,7 +373,6 @@ def launch(user): pass -# update_db TODO: Include types of data and database servers in config file @relecov_tools_cli.command(help_priority=9) @click.option("-j", "--json", help="data in json format") @click.option( @@ -385,8 +384,8 @@ def launch(user): help="Select the type of information to upload to database", ) @click.option( - "-d", - "databaseServer", + "-plat", + "--platform", type=click.Choice( [ "iskylims", @@ -395,10 +394,11 @@ def launch(user): ), multiple=False, default=None, - help="name of the server which information is defined in config file", + help="name of the platform where data is uploaded", ) @click.option("-u", "--user", help="user name for login") @click.option("-p", "--password", help="password for the user to login") +@click.option("-s", "--server_url", help="url of the platform server") @click.option( "-f", "--full_update", @@ -406,12 +406,12 @@ def launch(user): default=False, help="Sequentially run every update option", ) -def update_db(user, password, json, type, databaseServer, full_update): - """feed database with json""" - feed_database = relecov_tools.feed_database.FeedDatabase( - user, password, json, type, databaseServer, full_update +def update_db(user, password, json, type, platform, server_url, full_update): + """upload the information included in json file to the database""" + update_database_obj = relecov_tools.upload_database.UpdateDatabase( + user, password, json, type, platform, server_url, full_update ) - feed_database.update_db() + update_database_obj.update_db() # read metadata bioinformatics diff --git a/relecov_tools/conf/configuration.json b/relecov_tools/conf/configuration.json index 84503966..2c4c20f7 100755 --- a/relecov_tools/conf/configuration.json +++ b/relecov_tools/conf/configuration.json @@ -270,54 +270,56 @@ "GISAID_configuration": { "submitter": "GISAID_ID" }, - "external_url": { - "iskylims": { - "server": "http://relecov-iskylims.isciiides.es", - "url": "/wetlab/api/", - "store_samples": "createSampleData", - "url_project_fields": "sampleProjectFields", - "url_sample_fields": "sampleFields", - "param_sample_project": "project", - "project_name": "relecov", - "token": "" - }, - "relecov": { - "server": "http://relecov-platform.isciiides.es", - "url": "/api/", + "upload_database": { + "platform":{ + "iskylims": { + "server_url": "http://relecov-iskylims.isciiides.es", + "api_url": "/wetlab/api/", + "store_samples": "create-sample", + "url_project_fields": "projects-fields", + "url_sample_fields": "sample-fields", + "param_sample_project": "project", + "project_name": "relecov", + "token": "" + }, + "relecov": { + "server_url": "http://relecov-platform.isciiides.es", + "api_url": "/api/", "store_samples": "createSampleData", "bioinfodata": "createBioinfoData", "variantdata": "createVariantData", "sftp_info": "sftpInfo", "token": "" - } - }, - "iskylims_fixed_values": { - "patientCore": "", - "sampleProject": "Relecov", - "onlyRecorded": "Yes", - "sampleLocation": "Not defined" + } + }, + "iskylims_fixed_values": { + "patient_core": "", + "sample_project": "Relecov", + "only_recorded": "Yes", + "sample_location": "Not defined" + }, + "relecov_sample_metadata": [ + "authors", + "collecting_institution", + "collecting_lab_sample_id", + "ena_broker_name", + "ena_sample_accession", + "gisaid_accession_id", + "gisaid_virus_name", + "microbiology_lab_sample_id", + "r1_fastq_filepath", + "r2_fastq_filepath", + "schema_name", + "schema_version", + "sequencing_date", + "sequence_file_R1_md5", + "sequence_file_R2_md5", + "sequence_file_R1_fastq", + "sequence_file_R2_fastq", + "sequencing_sample_id", + "submitting_lab_sample_id" + ] }, - "relecov_sample_metadata": [ - "authors", - "collecting_institution", - "collecting_lab_sample_id", - "ena_broker_name", - "ena_sample_accession", - "gisaid_accession_id", - "gisaid_virus_name", - "microbiology_lab_sample_id", - "r1_fastq_filepath", - "r2_fastq_filepath", - "schema_name", - "schema_version", - "sequencing_date", - "sequence_file_R1_md5", - "sequence_file_R2_md5", - "sequence_file_R1_fastq", - "sequence_file_R2_fastq", - "sequencing_sample_id", - "submitting_lab_sample_id" - ], "ENA_fields": { "ENA_configuration": { "study_alias": "RELECOV", diff --git a/relecov_tools/feed_database.py b/relecov_tools/upload_database.py similarity index 74% rename from relecov_tools/feed_database.py rename to relecov_tools/upload_database.py index 076b827d..cf54715f 100644 --- a/relecov_tools/feed_database.py +++ b/relecov_tools/upload_database.py @@ -22,16 +22,18 @@ ) -class FeedDatabase: +class UpdateDatabase: def __init__( self, user=None, passwd=None, json_file=None, type_of_info=None, - database_server=None, - full_update=None, + platform=None, + server_url=None, + full_update=False, ): + # Get the user and password for the database if user is None: user = relecov_tools.utils.prompt_text( msg="Enter username for upload data to server" @@ -40,6 +42,7 @@ def __init__( if passwd is None: passwd = relecov_tools.utils.prompt_text(msg="Enter credential password") self.passwd = passwd + # get the default coonfiguration used the instance self.config_json = ConfigJson() if json_file is None: json_file = relecov_tools.utils.prompt_path( @@ -57,23 +60,36 @@ def __init__( self.config_json.get_topic_data("json_schemas", "relecov_schema"), ) self.schema = relecov_tools.utils.read_json_file(schema) - self.full_update = full_update - # TODO: Include types_of_data and database_servers as config fields - self.types_of_data = ["sample", "bioinfodata", "variantdata"] - self.db_servers_names = ["iskylims", "relecov"] - if not full_update: + if full_update is True: + self.full_update = True + self.server_url = None + else: + self.full_update = False if type_of_info is None: type_of_info = relecov_tools.utils.prompt_selection( - "Select type of data to upload:", - self.types_of_data, + "Select:", + ["sample", "bioinfodata", "variantdata"], ) - if database_server is None: - database_server = relecov_tools.utils.prompt_selection( - "Select target database server:", - self.db_servers_names, + self.type_of_info = type_of_info + # collect data for plarform to upload data + if platform is None: + platform = relecov_tools.utils.prompt_selection( + "Select:", + ["iskylims", "relecov"], ) - self.server_name = database_server - self.type_of_info = type_of_info + self.platform = platform + if server_url is None: + self.server_url = server_url + # Get configuration settings for upload database + try: + self.platform_settings = self.config_json.get_topic_data( + "upload_database", "platform" + ) + except KeyError as e: + logtxt = f"Unable to fetch parameters for {platform} {e}" + stderr.print(f"[red]{logtxt}") + log.error(logtxt) + sys.exit(1) json_dir = os.path.dirname(os.path.realpath(self.json_file)) lab_code = json_dir.split("/")[-2] @@ -111,12 +127,26 @@ def map_iskylims_sample_fields_values(self, sample_fields, s_project_fields): # be included in iSkyLIMS request log.info("not key %s in iSkyLIMS", key) # include the fixed value - fixed_value = self.config_json.get_configuration("iskylims_fixed_values") + fixed_value = self.config_json.get_topic_data( + "upload_database", "iskylims_fixed_values" + ) for prop, val in fixed_value.items(): s_dict[prop] = val - # Adding tha specimen_source field to set sampleType - s_dict["sampleType"] = row["specimen_source"] + # Adding tha specimen_source field to set sample_type + try: + s_dict["sample_type"] = row["specimen_source"] + except KeyError as e: + logtxt = f"Unable to fetch specimen_source from json file {e}" + self.logsum.add_warning(entry=logtxt) + s_dict["sample_type"] = "Other" sample_list.append(s_dict) + # if sample_entry_date is not set then, add the current date + if "sample_entry_date" not in row: + logtxt = "sample_entry_date is not in the sample fields" + self.logsum.add_warning(entry=logtxt) + stderr.print(f"[yellow]{logtxt}") + s_dict["sample_entry_date"] = time.strftime("%Y-%m-%d") + return sample_list def get_iskylims_fields_sample(self): @@ -130,20 +160,15 @@ def get_iskylims_fields_sample(self): s_project_fields = [] # get the ontology values for mapping values in sample fields ontology_dict = self.get_schema_ontology_values() - sample_url = self.database_settings["url_sample_fields"] - try: - sample_fields_raw = self.database_rest_api.get_request(sample_url, "", "") - except AttributeError: - logtxt = f"Unable to connect to {self.db_server} server" - self.logsum.add_error(entry=logtxt) - stderr.print(f"[red]{logtxt}") - return + sample_url = self.platform_settings["iskylims"]["url_sample_fields"] + sample_fields_raw = self.platform_rest_api.get_request(sample_url, "", "") + if "ERROR" in sample_fields_raw: - logtxt1 = f"Unable to fetch data from {self.db_server}." + logtxt1 = f"Unable to fetch data from {self.platform}." logtxt2 = f" Received error {sample_fields_raw['ERROR']}" self.logsum.add_error(entry=str(logtxt1 + logtxt2)) stderr.print(f"[red]{logtxt1 + logtxt2}") - return + sys.exit(1) for _, values in sample_fields_raw["DATA"].items(): if "ontology" in values: @@ -162,19 +187,22 @@ def get_iskylims_fields_sample(self): self.logsum.add_warning(entry=logtxt) log.info(logtxt) # fetch label for sample Project - s_project_url = self.database_settings["url_project_fields"] - param = self.database_settings["param_sample_project"] - p_name = self.database_settings["project_name"] - s_project_fields_raw = self.database_rest_api.get_request( + s_project_url = self.platform_settings["iskylims"]["url_project_fields"] + param = self.platform_settings["iskylims"]["param_sample_project"] + p_name = self.platform_settings["iskylims"]["project_name"] + s_project_fields_raw = self.platform_rest_api.get_request( s_project_url, param, p_name ) if "ERROR" in s_project_fields_raw: - logtxt1 = f"Unable to fetch data from {self.db_server}." + logtxt1 = f"Unable to fetch data from {self.platform}." logtxt2 = f" Received error {s_project_fields_raw['ERROR']}" self.logsum.add_error(entry=str(logtxt1 + logtxt2)) return + else: + log.info("Fetched sample project fields from iSkyLIMS") + stderr.print("[blue] Fetched sample project fields from iSkyLIMS") for field in s_project_fields_raw["DATA"]: - s_project_fields.append(field["sampleProjectFieldName"]) + s_project_fields.append(field["sample_project_field_name"]) return [sample_fields, s_project_fields] def map_relecov_sample_data(self): @@ -194,12 +222,12 @@ def map_relecov_sample_data(self): def update_database(self, field_values, post_url): """Send the request to update database""" + post_url = self.platform_settings[self.platform][post_url] suces_count = 0 request_count = 0 for chunk in field_values: req_sample = "" request_count += 1 - # TODO: Include these fields in config file if "sample_name" in chunk: stderr.print( f"[blue] sending request for sample {chunk['sample_name']}" @@ -211,10 +239,10 @@ def update_database(self, field_values, post_url): ) req_sample = chunk["sequencing_sample_id"] self.logsum.feed_key(sample=req_sample) - result = self.database_rest_api.post_request( + result = self.platform_rest_api.post_request( json.dumps(chunk), {"user": self.user, "pass": self.passwd}, - self.database_settings[post_url], + post_url, ) if "ERROR" in result: if result["ERROR"] == "Server not available": @@ -222,44 +250,45 @@ def update_database(self, field_values, post_url): for i in range(10): # wait 5 sec before resending the request time.sleep(5) - result = self.database_rest_api.post_request( + result = self.platform_rest_api.post_request( json.dumps(chunk), {"user": self.user, "pass": self.passwd}, - self.database_settings[post_url], + self.platform_settings[post_url], ) if "ERROR" not in result: break if i == 9 and "ERROR" in result: - logtxt = f"Unable to sent the request to {self.db_server}" + logtxt = f"Unable to sent the request to {self.platform}" self.logsum.add_error(entry=logtxt, sample=req_sample) stderr.print(f"[red]{logtxt}") continue elif "is not defined" in result["ERROR_TEST"].lower(): - logtxt = f"{req_sample} is not defined in {self.db_server}" + error_txt = result["ERROR_TEST"] + logtxt = f"Sample {req_sample}: {error_txt}" self.logsum.add_error(entry=logtxt, sample=req_sample) stderr.print(f"[yellow]Warning: {logtxt}") continue elif "already defined" in result["ERROR_TEST"].lower(): - logtxt = f"Request to {self.db_server} already defined" + logtxt = f"Request to {self.platform} already defined" self.logsum.add_warning(entry=logtxt, sample=req_sample) stderr.print(f"[yellow]{logtxt} for sample {req_sample}") continue else: - logtxt = f"Error {result['ERROR']} in request to {self.db_server}" + logtxt = f"Error {result['ERROR']} in request to {self.platform}" self.logsum.add_error(entry=logtxt, sample=req_sample) stderr.print(f"[red]{logtxt}") continue log.info( "stored data in %s iskylims for sample %s", - self.db_server, + self.platform, req_sample, ) stderr.print(f"[green] Successful request for {req_sample}") suces_count += 1 if request_count == suces_count: stderr.print( - f"All {self.type_of_info} data sent sucessfuly to {self.db_server}" + f"All {self.type_of_info} data sent sucessfuly to {self.platform}" ) else: logtxt = "%s of the %s requests were sent to %s" @@ -275,16 +304,16 @@ def store_data(self, type_of_info, server_name): """Collect data from json file and split them to store data in iSkyLIMS and in Relecov Platform """ + map_fields = {} - map_fields = {} # - # TODO: Include all these hard-coded fields in config file - if type_of_info not in self.types_of_info: + """ if type_of_info not in self.types_of_info: self.logsum.add_error(entry=f"Invalid datatype {type_of_info} to upload") stderr.print(f"[red]Invalid datatype {type_of_info} to upload") - return + return """ if type_of_info == "sample": if server_name == "iskylims": stderr.print(f"[blue] Getting sample fields from {server_name}") + sample_fields, s_project_fields = self.get_iskylims_fields_sample() stderr.print("[blue] Selecting sample fields") map_fields = self.map_iskylims_sample_fields_values( @@ -304,29 +333,25 @@ def store_data(self, type_of_info, server_name): map_fields = self.json_data self.update_database(map_fields, post_url) - stderr.print(f"[green]Upload process to {self.server_name} completed") + stderr.print(f"[green]Upload process to {self.platform} completed") - def start_api(self, database_server): + def start_api(self, platform): """Open connection torwards database server API""" # Get database settings - if database_server: - try: - self.database_settings = self.config_json.get_topic_data( - "external_url", database_server - ) - except KeyError: - logtxt = f"Unable to fetch parameters for {database_server}" - self.logsum.add_error(entry=logtxt) - stderr.print(f"[red]{logtxt}") - return - self.db_server = self.database_settings["server"] - self.db_url = self.database_settings["url"] - self.db_rest_api = RestApi(self.db_server, self.db_url) - else: - logtxt = f"No database server was selected for {self.type_of_info}. Skipped" - self.logsum.add_error(entry=logtxt) + try: + p_settings = self.platform_settings[platform] + except KeyError as e: + logtxt = f"Unable to fetch parameters for {platform} {e}" stderr.print(f"[red]{logtxt}") - return + log.error(logtxt) + sys.exit(1) + if self.server_url is None: + server_url = p_settings["server_url"] + else: + server_url = self.server_url + self.platform = platform + self.api_url = p_settings["api_url"] + self.platform_rest_api = RestApi(server_url, self.api_url) return def update_db(self): @@ -358,6 +383,6 @@ def update_db(self): self.json_data = relecov_tools.utils.read_json_file(json_file) self.store_data(datatype, self.server_name) else: - self.start_api(self.server_name) - self.store_data(self.type_of_info, self.server_name) + self.start_api(self.platform) + self.store_data(self.type_of_info, self.platform) self.logsum.create_error_summary(called_module="update-db") From dd631e11abff7d2ecb5db07d5f9281b303dcf138 Mon Sep 17 00:00:00 2001 From: luissian Date: Thu, 29 Aug 2024 11:02:24 +0200 Subject: [PATCH 2/6] fixing errors --- relecov_tools/upload_database.py | 52 +++++++++++++++----------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/relecov_tools/upload_database.py b/relecov_tools/upload_database.py index cf54715f..25b168d0 100644 --- a/relecov_tools/upload_database.py +++ b/relecov_tools/upload_database.py @@ -11,7 +11,6 @@ import relecov_tools.utils from relecov_tools.config_json import ConfigJson from relecov_tools.rest_api import RestApi -from relecov_tools.log_summary import LogSum log = logging.getLogger(__name__) stderr = rich.console.Console( @@ -93,9 +92,6 @@ def __init__( json_dir = os.path.dirname(os.path.realpath(self.json_file)) lab_code = json_dir.split("/")[-2] - self.logsum = LogSum( - output_location=json_dir, unique_key=lab_code, path=json_dir - ) def get_schema_ontology_values(self): """Read the schema and extract the values of ontology with the label""" @@ -137,13 +133,13 @@ def map_iskylims_sample_fields_values(self, sample_fields, s_project_fields): s_dict["sample_type"] = row["specimen_source"] except KeyError as e: logtxt = f"Unable to fetch specimen_source from json file {e}" - self.logsum.add_warning(entry=logtxt) + log.warning(logtxt) s_dict["sample_type"] = "Other" sample_list.append(s_dict) # if sample_entry_date is not set then, add the current date if "sample_entry_date" not in row: logtxt = "sample_entry_date is not in the sample fields" - self.logsum.add_warning(entry=logtxt) + log.warning(logtxt) stderr.print(f"[yellow]{logtxt}") s_dict["sample_entry_date"] = time.strftime("%Y-%m-%d") @@ -166,7 +162,7 @@ def get_iskylims_fields_sample(self): if "ERROR" in sample_fields_raw: logtxt1 = f"Unable to fetch data from {self.platform}." logtxt2 = f" Received error {sample_fields_raw['ERROR']}" - self.logsum.add_error(entry=str(logtxt1 + logtxt2)) + log.error(str(logtxt1 + logtxt2)) stderr.print(f"[red]{logtxt1 + logtxt2}") sys.exit(1) @@ -178,13 +174,12 @@ def get_iskylims_fields_sample(self): # the field name for the sample sample_fields[property] = values["field_name"] except KeyError as e: - self.logsum.add_warning(entry=f"Error mapping ontology {e}") - stderr.print(f"[red]Error mapping ontology {e}") + log.info(f"Error mapping ontology {e}") + # stderr.print(f"[red]Error mapping ontology {e}") else: # for the ones that do not have ontology label in the sample field # and have an empty value: sample_fields[key] = "" logtxt = f"No ontology found for {values.get('field_name')}" - self.logsum.add_warning(entry=logtxt) log.info(logtxt) # fetch label for sample Project s_project_url = self.platform_settings["iskylims"]["url_project_fields"] @@ -196,7 +191,7 @@ def get_iskylims_fields_sample(self): if "ERROR" in s_project_fields_raw: logtxt1 = f"Unable to fetch data from {self.platform}." logtxt2 = f" Received error {s_project_fields_raw['ERROR']}" - self.logsum.add_error(entry=str(logtxt1 + logtxt2)) + log.error(str(logtxt1 + logtxt2)) return else: log.info("Fetched sample project fields from iSkyLIMS") @@ -208,7 +203,12 @@ def get_iskylims_fields_sample(self): def map_relecov_sample_data(self): """Select the values from self.json_data""" field_values = [] - r_fields = self.config_json.get_configuration("relecov_sample_metadata") + import pdb + + pdb.set_trace() + r_fields = self.config_json.get_topic_data( + "upload_database", "relecov_sample_metadata" + ) for row in self.json_data: s_dict = {} @@ -238,7 +238,6 @@ def update_database(self, field_values, post_url): f"[blue] sending request for sample {chunk['sequencing_sample_id']}" ) req_sample = chunk["sequencing_sample_id"] - self.logsum.feed_key(sample=req_sample) result = self.platform_rest_api.post_request( json.dumps(chunk), {"user": self.user, "pass": self.passwd}, @@ -259,24 +258,24 @@ def update_database(self, field_values, post_url): break if i == 9 and "ERROR" in result: logtxt = f"Unable to sent the request to {self.platform}" - self.logsum.add_error(entry=logtxt, sample=req_sample) + log.error(logtxt) stderr.print(f"[red]{logtxt}") continue elif "is not defined" in result["ERROR_TEST"].lower(): error_txt = result["ERROR_TEST"] logtxt = f"Sample {req_sample}: {error_txt}" - self.logsum.add_error(entry=logtxt, sample=req_sample) + log.error(logtxt) stderr.print(f"[yellow]Warning: {logtxt}") continue elif "already defined" in result["ERROR_TEST"].lower(): logtxt = f"Request to {self.platform} already defined" - self.logsum.add_warning(entry=logtxt, sample=req_sample) + log.warning(logtxt) stderr.print(f"[yellow]{logtxt} for sample {req_sample}") continue else: logtxt = f"Error {result['ERROR']} in request to {self.platform}" - self.logsum.add_error(entry=logtxt, sample=req_sample) + log.error(logtxt) stderr.print(f"[red]{logtxt}") continue log.info( @@ -291,12 +290,14 @@ def update_database(self, field_values, post_url): f"All {self.type_of_info} data sent sucessfuly to {self.platform}" ) else: - logtxt = "%s of the %s requests were sent to %s" - self.logsum.add_warning( - entry=logtxt % (suces_count, request_count, self.server_name) + log.warning( + "%s of the %s requests were sent to %s", + suces_count, + request_count, + self.platform, ) stderr.print( - f"[yellow]{logtxt % (suces_count, request_count, self.server_name)}" + f"[yellow]logtxt % {suces_count} {request_count} {self.platform})" ) return @@ -306,15 +307,12 @@ def store_data(self, type_of_info, server_name): """ map_fields = {} - """ if type_of_info not in self.types_of_info: - self.logsum.add_error(entry=f"Invalid datatype {type_of_info} to upload") - stderr.print(f"[red]Invalid datatype {type_of_info} to upload") - return """ if type_of_info == "sample": if server_name == "iskylims": + log.info("Getting sample fields from %s", server_name) stderr.print(f"[blue] Getting sample fields from {server_name}") - sample_fields, s_project_fields = self.get_iskylims_fields_sample() + log.info("Selecting sample fields") stderr.print("[blue] Selecting sample fields") map_fields = self.map_iskylims_sample_fields_values( sample_fields, s_project_fields @@ -385,4 +383,4 @@ def update_db(self): else: self.start_api(self.platform) self.store_data(self.type_of_info, self.platform) - self.logsum.create_error_summary(called_module="update-db") + return From 1498fac08f17399eeb7ffe2e5a812133c77d6cd6 Mon Sep 17 00:00:00 2001 From: luissian Date: Thu, 29 Aug 2024 11:04:48 +0200 Subject: [PATCH 3/6] fixing litin --- relecov_tools/upload_database.py | 1 - 1 file changed, 1 deletion(-) diff --git a/relecov_tools/upload_database.py b/relecov_tools/upload_database.py index 25b168d0..e7b76039 100644 --- a/relecov_tools/upload_database.py +++ b/relecov_tools/upload_database.py @@ -91,7 +91,6 @@ def __init__( sys.exit(1) json_dir = os.path.dirname(os.path.realpath(self.json_file)) - lab_code = json_dir.split("/")[-2] def get_schema_ontology_values(self): """Read the schema and extract the values of ontology with the label""" From 3bd13099219b51d235335b3a7d390d510313586f Mon Sep 17 00:00:00 2001 From: luissian Date: Thu, 29 Aug 2024 11:07:26 +0200 Subject: [PATCH 4/6] remove variable does not used --- relecov_tools/upload_database.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/relecov_tools/upload_database.py b/relecov_tools/upload_database.py index e7b76039..26f61f8f 100644 --- a/relecov_tools/upload_database.py +++ b/relecov_tools/upload_database.py @@ -90,8 +90,6 @@ def __init__( log.error(logtxt) sys.exit(1) - json_dir = os.path.dirname(os.path.realpath(self.json_file)) - def get_schema_ontology_values(self): """Read the schema and extract the values of ontology with the label""" ontology_dict = {} From af598233c028b441376b0aef65e021c8df58644b Mon Sep 17 00:00:00 2001 From: luissian Date: Thu, 29 Aug 2024 16:08:24 +0200 Subject: [PATCH 5/6] defined back the log summary file --- relecov_tools/upload_database.py | 39 +++++++++++++++++--------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/relecov_tools/upload_database.py b/relecov_tools/upload_database.py index 26f61f8f..29145a5c 100644 --- a/relecov_tools/upload_database.py +++ b/relecov_tools/upload_database.py @@ -11,6 +11,7 @@ import relecov_tools.utils from relecov_tools.config_json import ConfigJson from relecov_tools.rest_api import RestApi +from relecov_tools.log_summary import LogSum log = logging.getLogger(__name__) stderr = rich.console.Console( @@ -89,6 +90,12 @@ def __init__( stderr.print(f"[red]{logtxt}") log.error(logtxt) sys.exit(1) + # create the instance for logging the summary information + json_dir = os.path.dirname(os.path.realpath(self.json_file)) + lab_code = json_dir.split("/")[-2] + self.logsum = LogSum( + output_location=json_dir, unique_key=lab_code, path=json_dir + ) def get_schema_ontology_values(self): """Read the schema and extract the values of ontology with the label""" @@ -118,7 +125,7 @@ def map_iskylims_sample_fields_values(self, sample_fields, s_project_fields): if key not in s_project_fields and key not in s_fields: # just for debugging, write the fields that will not # be included in iSkyLIMS request - log.info("not key %s in iSkyLIMS", key) + log.debug("not key %s in iSkyLIMS", key) # include the fixed value fixed_value = self.config_json.get_topic_data( "upload_database", "iskylims_fixed_values" @@ -130,13 +137,13 @@ def map_iskylims_sample_fields_values(self, sample_fields, s_project_fields): s_dict["sample_type"] = row["specimen_source"] except KeyError as e: logtxt = f"Unable to fetch specimen_source from json file {e}" - log.warning(logtxt) + self.logsum.add_warning(entry=logtxt) s_dict["sample_type"] = "Other" sample_list.append(s_dict) # if sample_entry_date is not set then, add the current date if "sample_entry_date" not in row: logtxt = "sample_entry_date is not in the sample fields" - log.warning(logtxt) + self.logsum.add_warning(entry=logtxt) stderr.print(f"[yellow]{logtxt}") s_dict["sample_entry_date"] = time.strftime("%Y-%m-%d") @@ -159,7 +166,7 @@ def get_iskylims_fields_sample(self): if "ERROR" in sample_fields_raw: logtxt1 = f"Unable to fetch data from {self.platform}." logtxt2 = f" Received error {sample_fields_raw['ERROR']}" - log.error(str(logtxt1 + logtxt2)) + self.logsum.add_error(entry=str(logtxt1 + logtxt2)) stderr.print(f"[red]{logtxt1 + logtxt2}") sys.exit(1) @@ -188,7 +195,7 @@ def get_iskylims_fields_sample(self): if "ERROR" in s_project_fields_raw: logtxt1 = f"Unable to fetch data from {self.platform}." logtxt2 = f" Received error {s_project_fields_raw['ERROR']}" - log.error(str(logtxt1 + logtxt2)) + self.logsum.add_error(entry=str(logtxt1 + logtxt2)) return else: log.info("Fetched sample project fields from iSkyLIMS") @@ -200,9 +207,6 @@ def get_iskylims_fields_sample(self): def map_relecov_sample_data(self): """Select the values from self.json_data""" field_values = [] - import pdb - - pdb.set_trace() r_fields = self.config_json.get_topic_data( "upload_database", "relecov_sample_metadata" ) @@ -255,24 +259,24 @@ def update_database(self, field_values, post_url): break if i == 9 and "ERROR" in result: logtxt = f"Unable to sent the request to {self.platform}" - log.error(logtxt) + self.logsum.add_error(entry=logtxt) stderr.print(f"[red]{logtxt}") continue elif "is not defined" in result["ERROR_TEST"].lower(): error_txt = result["ERROR_TEST"] logtxt = f"Sample {req_sample}: {error_txt}" - log.error(logtxt) + self.logsum.add_error(entry=logtxt) stderr.print(f"[yellow]Warning: {logtxt}") continue elif "already defined" in result["ERROR_TEST"].lower(): logtxt = f"Request to {self.platform} already defined" - log.warning(logtxt) + self.logsum.add_warning(entry=logtxt) stderr.print(f"[yellow]{logtxt} for sample {req_sample}") continue else: logtxt = f"Error {result['ERROR']} in request to {self.platform}" - log.error(logtxt) + self.logsum.add_error(entry=logtxt) stderr.print(f"[red]{logtxt}") continue log.info( @@ -287,11 +291,9 @@ def update_database(self, field_values, post_url): f"All {self.type_of_info} data sent sucessfuly to {self.platform}" ) else: - log.warning( - "%s of the %s requests were sent to %s", - suces_count, - request_count, - self.platform, + logtxt = "%s of the %s requests were sent to %s" + self.logsum.add_warning( + entry=logtxt % (suces_count, request_count, self.platform) ) stderr.print( f"[yellow]logtxt % {suces_count} {request_count} {self.platform})" @@ -338,7 +340,7 @@ def start_api(self, platform): except KeyError as e: logtxt = f"Unable to fetch parameters for {platform} {e}" stderr.print(f"[red]{logtxt}") - log.error(logtxt) + self.logsum.add_error(entry=logtxt) sys.exit(1) if self.server_url is None: server_url = p_settings["server_url"] @@ -380,4 +382,5 @@ def update_db(self): else: self.start_api(self.platform) self.store_data(self.type_of_info, self.platform) + self.logsum.create_error_summary(called_module="update-db") return From 24e86b0ca73c97f01fbafbb5794ec3235a7e085d Mon Sep 17 00:00:00 2001 From: luissian Date: Fri, 30 Aug 2024 10:02:12 +0200 Subject: [PATCH 6/6] updated log summary comments from the PR review --- relecov_tools/upload_database.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/relecov_tools/upload_database.py b/relecov_tools/upload_database.py index 29145a5c..4c41681b 100644 --- a/relecov_tools/upload_database.py +++ b/relecov_tools/upload_database.py @@ -239,6 +239,7 @@ def update_database(self, field_values, post_url): f"[blue] sending request for sample {chunk['sequencing_sample_id']}" ) req_sample = chunk["sequencing_sample_id"] + self.logsum.feed_key(sample=req_sample) result = self.platform_rest_api.post_request( json.dumps(chunk), {"user": self.user, "pass": self.passwd}, @@ -259,24 +260,24 @@ def update_database(self, field_values, post_url): break if i == 9 and "ERROR" in result: logtxt = f"Unable to sent the request to {self.platform}" - self.logsum.add_error(entry=logtxt) + self.logsum.add_error(entry=logtxt, sample=req_sample) stderr.print(f"[red]{logtxt}") continue elif "is not defined" in result["ERROR_TEST"].lower(): error_txt = result["ERROR_TEST"] logtxt = f"Sample {req_sample}: {error_txt}" - self.logsum.add_error(entry=logtxt) + self.logsum.add_error(entry=logtxt, sample=req_sample) stderr.print(f"[yellow]Warning: {logtxt}") continue elif "already defined" in result["ERROR_TEST"].lower(): logtxt = f"Request to {self.platform} already defined" - self.logsum.add_warning(entry=logtxt) + self.logsum.add_warning(entry=logtxt, sample=req_sample) stderr.print(f"[yellow]{logtxt} for sample {req_sample}") continue else: logtxt = f"Error {result['ERROR']} in request to {self.platform}" - self.logsum.add_error(entry=logtxt) + self.logsum.add_error(entry=logtxt, sample=req_sample) stderr.print(f"[red]{logtxt}") continue log.info( @@ -293,7 +294,8 @@ def update_database(self, field_values, post_url): else: logtxt = "%s of the %s requests were sent to %s" self.logsum.add_warning( - entry=logtxt % (suces_count, request_count, self.platform) + entry=logtxt % (suces_count, request_count, self.platform), + sample=req_sample, ) stderr.print( f"[yellow]logtxt % {suces_count} {request_count} {self.platform})"