Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Now json_summary custom log filename includes module called #278

Merged
merged 5 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions relecov_tools/log_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import json
import os
import inspect
from rich.console import Console
from datetime import datetime
from collections import OrderedDict
Expand Down Expand Up @@ -86,7 +87,7 @@ def update_summary(self, key, log_type, entry, sample=None):
self.logs[current_key]["samples"][sample][log_type].append(entry)
return

def create_error_summary(self, filename=None):
def create_error_summary(self, called_module=None, filename=None):
"""Dump the log summary dictionary into a file with json format. If any of
the 'errors' key is not empty, the parent key value 'valid' is set to false.

Expand All @@ -100,8 +101,16 @@ def create_error_summary(self, filename=None):
for sample in self.logs[key]["samples"].keys():
if self.logs[key]["samples"][sample]["errors"]:
self.logs[key]["samples"][sample]["valid"] = False
if not called_module:
try:
called_module = [
f.function for f in inspect.stack() if "__main__.py" in f.filename
][0]
except IndexError:
called_module = ""
if not filename:
filename = datetime.today().strftime("%Y%m%d%-H%M%S") + "_log_summary.json"
date = datetime.today().strftime("%Y%m%d%-H%M%S")
filename = "_".join([date, called_module, "log_summary.json"])
summary_path = os.path.join(self.output_location, filename)
with open(summary_path, "w", encoding="utf-8") as f:
f.write(json.dumps(self.logs, indent=4, sort_keys=True, ensure_ascii=False))
Expand Down
11 changes: 6 additions & 5 deletions relecov_tools/read_lab_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ def process_from_json(self, m_data, json_fields):
)
self.logsum.add_warning(key=sample_id, entry=log_text)
continue
# TODO: Include Not Provided as a configuration field
fields_to_add = {
x: "Not Provided [GENEPIO:0001668]"
for x in json_fields["adding_fields"]
Expand Down Expand Up @@ -304,10 +305,10 @@ def read_metadata_file(self):
dtime.strptime(str(row[key]), "%Y-%m-%d").date()
)
except ValueError:
log_text = f"Invalid date format in sample {str(key)}"
log_text = f"Invalid date format in {key}: {row[key]}"
self.logsum.add_error(sample_id, log_text)
stderr.print(f"[red]{log_text}")
row[key] = None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the validation process checks for date format? If so this is ok as we'll get that information in the validation process, but if not, maybe we need to keep the "None" so we don't have a validated date field it it is not a date?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does not check date format, but the functionality of this "continue" is to not include this field in the final json. I don't think it makes much sense to include it as "None" instead of just removing it

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, so then if it is required it won't validate. In any case make sure this information is not lost and is kept in the json-log so when we ask the hospital for modifications the problem is clear

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm to save the exact field in json logs i'd need a small change, I'll update it

stderr.print(f"[red]{log_text} for sample {sample_id}")
continue
elif "sample id" in key.lower():
if isinstance(row[key], float) or isinstance(row[key], int):
row[key] = str(int(row[key]))
Expand All @@ -321,8 +322,8 @@ def read_metadata_file(self):
log_text = f"Error when mapping the label {str(e)}"
self.logsum.add_error(sample_id, log_text)
stderr.print(f"[red]{log_text}")

continue

valid_metadata_rows.append(property_row)
return valid_metadata_rows

Expand Down Expand Up @@ -353,7 +354,7 @@ def create_metadata_json(self):
)
stderr.print("[blue]Writting output json file")
os.makedirs(self.output_folder, exist_ok=True)
self.logsum.create_error_summary()
self.logsum.create_error_summary(called_module="read-lab-metadata")
file_path = os.path.join(self.output_folder, file_name)
relecov_tools.utils.write_json_fo_file(completed_metadata, file_path)
return True
Loading