Skip to content

Commit

Permalink
fix naming error in paprica-place_it
Browse files Browse the repository at this point in the history
When running archaea and bacteria on single sample, was attempting to
merge archaeal jplace with bacterial.  Now searches for jplace files
with specified domain in name.
  • Loading branch information
bowmanjeffs committed Dec 6, 2017
1 parent 0a72e33 commit 506ba09
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 4 deletions.
17 changes: 14 additions & 3 deletions paprica-mgt_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,24 @@ def download_assembly(ref_dir_domain, executable, assembly_accession):
try:
strain_ftp = genome_data_virus.loc[assembly_accession, 'ftp_path']

## Required to use proxy server on SIO network. Bafflingly there is bug in wget that
## disallows the use of a wildcard with ftp when a proxy server is used. So ftp
## path must be converted to http. Note that this requires the use of the -nd flag
## because the http protocol will try to create the entire directory structure (grr!)

base_name = strain_ftp.split('/')[-1]

mkdir = subprocess.Popen('mkdir ' + ref_dir_domain + 'refseq/' + assembly_accession, shell = True, executable = executable)
mkdir.communicate()

wget0 = subprocess.Popen('cd ' + ref_dir_domain + 'refseq/' + assembly_accession + ';wget --tries=10 -T30 -q -A "genomic.fna.gz","genomic.gbff.gz","protein.faa.gz" ' + strain_ftp + '/*', shell = True, executable = executable)
wget0.communicate()
for extension in ['_genomic.fna.gz', '_genomic.gbff.gz', '_protein.faa.gz']:
wget0 = subprocess.Popen('cd ' + ref_dir_domain + 'refseq/' + assembly_accession + ';wget \
--tries=10 -q -r -nd -T30 -e robots=off ' \
+ strain_ftp + '/' + base_name + extension, \
shell = True, executable = executable)
wget0.communicate()

gunzip = subprocess.Popen('gunzip ' + ref_dir_domain + 'refseq/' + assembly_accession + '/*gz', shell = True, executable = executable)
gunzip = subprocess.Popen('gunzip ' + ref_dir_domain + 'refseq/' + assembly_accession + '/*', shell = True, executable = executable)
gunzip.communicate()

print assembly_accession + ':' + strain_ftp
Expand Down
2 changes: 1 addition & 1 deletion paprica-place_it.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ def count_unique():
Parallel(n_jobs = splits, verbose = 5)(delayed(place)
(split_query, ref, ref_dir_domain, cm) for split_query in split_list)

guppy_merge = subprocess.Popen('guppy merge ' + cwd + query + '*' + '.jplace -o ' + cwd + query + '.' + ref + '.clean.align.jplace', shell = True, executable = executable)
guppy_merge = subprocess.Popen('guppy merge ' + cwd + query + '*' + domain + '*' + '.jplace -o ' + cwd + query + '.' + ref + '.clean.align.jplace', shell = True, executable = executable)
guppy_merge.communicate()
guppy(cwd + query, ref)

Expand Down

0 comments on commit 506ba09

Please sign in to comment.