Skip to content

Commit

Permalink
more review changes
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisAta committed Dec 11, 2024
1 parent 5897845 commit ac3348b
Showing 1 changed file with 33 additions and 25 deletions.
58 changes: 33 additions & 25 deletions mgnify_pipelines_toolkit/analysis/shared/study_summary_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,38 +86,46 @@ def get_tax_file(
:rtype: Union[Path, List[Path]]
"""

tax_file = ""
tax_file = None

db_path = Path(f"{analyses_dir}/{run_acc}/taxonomy-summary/{db_label}")

if db_path.exists():
if db_label in TAXDB_LABELS:
tax_file = Path(
f"{analyses_dir}/{run_acc}/taxonomy-summary/{db_label}/{run_acc}_{db_label}.txt"
if not db_path.exists():
logging.debug(
f"DB {db_path} doesn't exist for {run_acc}. Skipping"
) # or error?
return

if db_label in TAXDB_LABELS:
tax_file = Path(
f"{analyses_dir}/{run_acc}/taxonomy-summary/{db_label}/{run_acc}_{db_label}.txt"
)
if not tax_file.exists():
logging.error(
f"DB path exists but file doesn't - exiting. Path: {tax_file}"
)
if not tax_file.exists():
logging.error(
f"DB path exists but file doesn't - exiting. Path: {tax_file}"
)
exit(1)
exit(1)

file_size = tax_file.stat().st_size
if (
file_size == 0
): # Pipeline can generate files that are empty for ITS DBs (UNITE and ITSoneDB),
# so need to skip those. Should probably fix that at some point
tax_file = ""
elif db_label in ASV_TAXDB_LABELS:
# ASV tax files could have up to two files, one for each amplified region (maximum two from the pipeline).
# So will need to handle this differently to closed-reference files
asv_tax_files = glob.glob(
f"{analyses_dir}/{run_acc}/taxonomy-summary/{db_label}/*.txt"
file_size = tax_file.stat().st_size
if (
file_size == 0
): # Pipeline can generate files that are empty for ITS DBs (UNITE and ITSoneDB),
# so need to skip those. Should probably fix that at some point
logging.debug(
f"File {tax_file} exists but is empty, so will be ignoring it."
)
asv_tax_files = [
Path(file) for file in asv_tax_files if "concat" not in file
] # Have to filter out concatenated file if it exists
tax_file = None
elif db_label in ASV_TAXDB_LABELS:
# ASV tax files could have up to two files, one for each amplified region (maximum two from the pipeline).
# So will need to handle this differently to closed-reference files
asv_tax_files = glob.glob(
f"{analyses_dir}/{run_acc}/taxonomy-summary/{db_label}/*.txt"
)
asv_tax_files = [
Path(file) for file in asv_tax_files if "concat" not in file
] # Have to filter out concatenated file if it exists

tax_file = asv_tax_files
tax_file = asv_tax_files

return tax_file

Expand Down

0 comments on commit ac3348b

Please sign in to comment.