From 99ec32d05671cf3196ca8df10cc9e4e0bf8f4eb7 Mon Sep 17 00:00:00 2001 From: baileythegreen Date: Mon, 14 Mar 2022 13:56:14 -0900 Subject: [PATCH 1/5] Add `try/except` around extraction to prevent `pyani` falling over --- pyani/scripts/subcommands/subcmd_download.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/pyani/scripts/subcommands/subcmd_download.py b/pyani/scripts/subcommands/subcmd_download.py index ac991bed..b1f6ca80 100644 --- a/pyani/scripts/subcommands/subcmd_download.py +++ b/pyani/scripts/subcommands/subcmd_download.py @@ -40,6 +40,7 @@ """Provides the download subcommand for pyani.""" import logging +import subprocess from argparse import Namespace from typing import Dict, List, NamedTuple, Optional, Tuple @@ -98,7 +99,9 @@ def dl_info_to_str(esummary, uid_class) -> str: def download_data( - args: Namespace, api_key: Optional[str], asm_dict: Dict[str, List], + args: Namespace, + api_key: Optional[str], + asm_dict: Dict[str, List], ) -> Tuple[List, List, List]: """Download the accessions indicated in the passed dictionary. @@ -131,7 +134,14 @@ def download_data( exc_info=True, ) skippedlist.append( - Skipped(tid, uid, "", "", None, "RefSeq",) + Skipped( + tid, + uid, + "", + "", + None, + "RefSeq", + ) ) # pylint: disable=no-member continue @@ -182,7 +192,11 @@ def extract_genomes(args: Namespace, dlstatus: download.DLStatus, esummary) -> N logger.warning("Output file %s exists, not extracting", ename) else: logger.debug("Extracting archive %s to %s", dlstatus.outfname, ename) - download.extract_contigs(dlstatus.outfname, ename) + try: + download.extract_contigs(dlstatus.outfname, ename) + except subprocess.CalledProcessError: + logger.warning("Could not extract %s; continuing", dlstatus.outfname) + pass # Modify sequence ID header if Kraken option active if args.kraken: From 78df70bd2263a8f36dfb329d5515e8a2ad692683 Mon Sep 17 00:00:00 2001 From: baileythegreen Date: Tue, 29 Mar 2022 12:47:46 +0100 Subject: [PATCH 2/5] Add test for failed extraction exception handling --- tests/test_subcmd_01_download.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/test_subcmd_01_download.py b/tests/test_subcmd_01_download.py index 095494ad..dc7ec3c2 100644 --- a/tests/test_subcmd_01_download.py +++ b/tests/test_subcmd_01_download.py @@ -58,6 +58,7 @@ """ import logging +import subprocess from argparse import Namespace from pathlib import Path @@ -123,6 +124,19 @@ def test_create_hash(): download.create_hash(test_file) +@pytest.fixture +def mock_failed_extraction(monkeypatch): + def mock_extraction(*args, **kwargs): + raise subprocess.CalledProcessError + + monkeypatch.setattr(download, "extract_contigs", mock_extraction) + + +def test_failed_extract_genomes(base_download_namespace, mock_failed_extraction): + with assertions.assertRaises(subprocess.CalledProcessError): + subcommands.subcmd_download(base_download_namespace) + + def test_download_dry_run(dryrun_namespace): """Dry run of C. blochmannia download.""" subcommands.subcmd_download(dryrun_namespace) From 98f71acaeb81889f524b0007a43d1412489cdc85 Mon Sep 17 00:00:00 2001 From: baileythegreen Date: Wed, 30 Mar 2022 12:00:51 +0100 Subject: [PATCH 3/5] Create and use new `PyaniDownloadException` to warn of extraction failures --- pyani/scripts/subcommands/subcmd_download.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/pyani/scripts/subcommands/subcmd_download.py b/pyani/scripts/subcommands/subcmd_download.py index b1f6ca80..98185151 100644 --- a/pyani/scripts/subcommands/subcmd_download.py +++ b/pyani/scripts/subcommands/subcmd_download.py @@ -47,11 +47,16 @@ from Bio import SeqIO -from pyani import download +from pyani import download, PyaniException from pyani.pyani_tools import termcolor from pyani.scripts import make_outdir +class PyaniDownloadException(PyaniException): + + """Exception raised when a download or archive extraction fails.""" + + class Skipped(NamedTuple): """Convenience struct for holding information about skipped genomes.""" @@ -164,7 +169,13 @@ def download_data( ) skippedlist.extend(skipped_genomes) if not dlstatus.skipped: - extract_genomes(args, dlstatus, esummary) + try: + extract_genomes(args, dlstatus, esummary) + except PyaniDownloadException: + logger.warning( + "Could not extract %s; continuing", dlstatus.outfname + ) + continue labeltxt, classtxt = hash_genomes(args, dlstatus, filestem, uid_class) classes.append(classtxt) labels.append(labeltxt) @@ -195,8 +206,7 @@ def extract_genomes(args: Namespace, dlstatus: download.DLStatus, esummary) -> N try: download.extract_contigs(dlstatus.outfname, ename) except subprocess.CalledProcessError: - logger.warning("Could not extract %s; continuing", dlstatus.outfname) - pass + raise PyaniDownloadException # Modify sequence ID header if Kraken option active if args.kraken: From 65f90b3b32c4918a7fc011fdb11749c12aef218e Mon Sep 17 00:00:00 2001 From: baileythegreen Date: Wed, 30 Mar 2022 12:02:59 +0100 Subject: [PATCH 4/5] Rewrite and simplify test for failed archive extraction Uses a different entry point into the code, which makes the test much neater. --- tests/conftest.py | 6 ++++++ tests/test_subcmd_01_download.py | 13 +++---------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 10e1ca21..de3f3109 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -130,6 +130,12 @@ def dir_anim_in(): return FIXTUREPATH / "anim" +@pytest.fixture +def dir_download_out(): + """Output directory for download tests.""" + return TESTSPATH / "test_output/subcmd_download" + + @pytest.fixture def dir_fastani_in(): """Input files for fastANI tests.""" diff --git a/tests/test_subcmd_01_download.py b/tests/test_subcmd_01_download.py index dc7ec3c2..d141da9e 100644 --- a/tests/test_subcmd_01_download.py +++ b/tests/test_subcmd_01_download.py @@ -124,17 +124,10 @@ def test_create_hash(): download.create_hash(test_file) -@pytest.fixture -def mock_failed_extraction(monkeypatch): - def mock_extraction(*args, **kwargs): - raise subprocess.CalledProcessError - - monkeypatch.setattr(download, "extract_contigs", mock_extraction) - - -def test_failed_extract_genomes(base_download_namespace, mock_failed_extraction): +def test_failed_extract_contigs(dir_download_out): + """Test for failed extraction of zip file contents.""" with assertions.assertRaises(subprocess.CalledProcessError): - subcommands.subcmd_download(base_download_namespace) + download.extract_contigs("bad/file.gz", dir_download_out / "bad_location.txt") def test_download_dry_run(dryrun_namespace): From 76bf22caee1e6121ab5ec6404b8c846a13e3915b Mon Sep 17 00:00:00 2001 From: baileythegreen Date: Wed, 30 Mar 2022 12:33:41 +0100 Subject: [PATCH 5/5] Move output file location to `fixtures`, which is tracked --- tests/conftest.py | 2 +- tests/fixtures/download/bad_location.txt | 0 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 tests/fixtures/download/bad_location.txt diff --git a/tests/conftest.py b/tests/conftest.py index de3f3109..c207b345 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -133,7 +133,7 @@ def dir_anim_in(): @pytest.fixture def dir_download_out(): """Output directory for download tests.""" - return TESTSPATH / "test_output/subcmd_download" + return FIXTUREPATH / "download" @pytest.fixture diff --git a/tests/fixtures/download/bad_location.txt b/tests/fixtures/download/bad_location.txt new file mode 100644 index 00000000..e69de29b