Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add try/except around extraction to accommodate failing uncompression in pyani download #385

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
34 changes: 29 additions & 5 deletions pyani/scripts/subcommands/subcmd_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,23 @@
"""Provides the download subcommand for pyani."""

import logging
import subprocess

from argparse import Namespace
from typing import Dict, List, NamedTuple, Optional, Tuple

from Bio import SeqIO

from pyani import download
from pyani import download, PyaniException
from pyani.pyani_tools import termcolor
from pyani.scripts import make_outdir


class PyaniDownloadException(PyaniException):

"""Exception raised when a download or archive extraction fails."""


class Skipped(NamedTuple):

"""Convenience struct for holding information about skipped genomes."""
Expand Down Expand Up @@ -98,7 +104,9 @@ def dl_info_to_str(esummary, uid_class) -> str:


def download_data(
args: Namespace, api_key: Optional[str], asm_dict: Dict[str, List],
args: Namespace,
api_key: Optional[str],
asm_dict: Dict[str, List],
) -> Tuple[List, List, List]:
"""Download the accessions indicated in the passed dictionary.

Expand Down Expand Up @@ -131,7 +139,14 @@ def download_data(
exc_info=True,
)
skippedlist.append(
Skipped(tid, uid, "", "", None, "RefSeq",)
Skipped(
baileythegreen marked this conversation as resolved.
Show resolved Hide resolved
tid,
uid,
"",
"",
None,
"RefSeq",
)
) # pylint: disable=no-member
continue

Expand All @@ -154,7 +169,13 @@ def download_data(
)
skippedlist.extend(skipped_genomes)
if not dlstatus.skipped:
extract_genomes(args, dlstatus, esummary)
try:
extract_genomes(args, dlstatus, esummary)
except PyaniDownloadException:
logger.warning(
"Could not extract %s; continuing", dlstatus.outfname
)
continue
labeltxt, classtxt = hash_genomes(args, dlstatus, filestem, uid_class)
classes.append(classtxt)
labels.append(labeltxt)
Expand Down Expand Up @@ -182,7 +203,10 @@ def extract_genomes(args: Namespace, dlstatus: download.DLStatus, esummary) -> N
logger.warning("Output file %s exists, not extracting", ename)
else:
logger.debug("Extracting archive %s to %s", dlstatus.outfname, ename)
download.extract_contigs(dlstatus.outfname, ename)
try:
download.extract_contigs(dlstatus.outfname, ename)
except subprocess.CalledProcessError:
raise PyaniDownloadException

# Modify sequence ID header if Kraken option active
if args.kraken:
Expand Down
6 changes: 6 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,12 @@ def dir_anim_in():
return FIXTUREPATH / "anim"


@pytest.fixture
def dir_download_out():
"""Output directory for download tests."""
return FIXTUREPATH / "download"


@pytest.fixture
def dir_fastani_in():
"""Input files for fastANI tests."""
Expand Down
Empty file.
7 changes: 7 additions & 0 deletions tests/test_subcmd_01_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
"""

import logging
import subprocess

from argparse import Namespace
from pathlib import Path
Expand Down Expand Up @@ -123,6 +124,12 @@ def test_create_hash():
download.create_hash(test_file)


def test_failed_extract_contigs(dir_download_out):
"""Test for failed extraction of zip file contents."""
with assertions.assertRaises(subprocess.CalledProcessError):
download.extract_contigs("bad/file.gz", dir_download_out / "bad_location.txt")


def test_download_dry_run(dryrun_namespace):
"""Dry run of C. blochmannia download."""
subcommands.subcmd_download(dryrun_namespace)
Expand Down