diff --git a/bin/mashtree.pl b/bin/mashtree.pl index 0def63e..7b345e6 100755 --- a/bin/mashtree.pl +++ b/bin/mashtree.pl @@ -18,7 +18,7 @@ use FindBin; use lib "$FindBin::RealBin/../lib"; -use Mashtree qw/logmsg @fastqExt @fastaExt @richseqExt _truncateFilename createTreeFromPhylip $MASHTREE_VERSION/; +use Mashtree qw/logmsg @fastqExt @fastaExt @mshExt @richseqExt _truncateFilename createTreeFromPhylip $MASHTREE_VERSION/; use Mashtree::Db; use Bio::Tree::DistanceFactory; use Bio::Matrix::IO; @@ -156,7 +156,7 @@ sub mashSketch{ my @msh; # $fastq is a misnomer: it could be any kind of accepted sequence file for my $fastq(@$genomeArr){ - my($fileName,$filePath,$fileExt)=fileparse($fastq,@fastqExt,@fastaExt,@richseqExt); + my($fileName,$filePath,$fileExt)=fileparse($fastq,@fastqExt,@fastaExt,@richseqExt,@mshExt); # Unzip the file. This temporary file will # only exist if the correct extensions are detected. @@ -179,7 +179,7 @@ sub mashSketch{ if($was_unzipped){ $fastq=$unzipped; - ($fileName,$filePath,$fileExt)=fileparse($fastq,@fastqExt,@fastaExt,@richseqExt); + ($fileName,$filePath,$fileExt)=fileparse($fastq,@fastqExt,@fastaExt,@richseqExt,@mshExt); } # If we see a richseq (e.g., gbk or embl), then convert it to fasta @@ -213,17 +213,23 @@ sub mashSketch{ $sketchXopts.="-m $minDepth -g $$settings{genomesize} "; } elsif(grep {$_ eq $fileExt} @fastaExt) { $sketchXopts.=" "; + } elsif(grep {$_ eq $fileExt} @mshExt){ + $sketchXopts.=" "; } else { logmsg "WARNING: I could not understand what kind of file this is by its extension ($fileExt): $fastq"; } - my $outPrefix="$sketchDir/".basename($fastq); + my $outPrefix="$sketchDir/".basename($fastq, @mshExt); # See if the user already mashed this file locally if(-e "$fastq.msh"){ logmsg "Found locally mashed file $fastq.msh. I will use it."; copy("$fastq.msh","$outPrefix.msh"); } + if(grep {$_ eq $fileExt} @mshExt){ + logmsg "Input file is a sketch file itself and will be used as such: $fastq"; + copy($fastq, "$outPrefix.msh"); + } if(-e "$outPrefix.msh"){ logmsg "WARNING: ".basename($fastq)." was already mashed."; diff --git a/lib/Mashtree.pm b/lib/Mashtree.pm index 4f75bdb..18f36d6 100644 --- a/lib/Mashtree.pm +++ b/lib/Mashtree.pm @@ -15,7 +15,7 @@ use Bio::Matrix::IO; our @EXPORT_OK = qw( logmsg openFastq _truncateFilename distancesToPhylip createTreeFromPhylip sortNames - @fastqExt @fastaExt @bamExt @vcfExt @richseqExt + @fastqExt @fastaExt @bamExt @vcfExt @richseqExt @mshExt $MASHTREE_VERSION ); @@ -24,12 +24,13 @@ local $0=basename $0; ###### # CONSTANTS -our $VERSION = "0.20"; +our $VERSION = "0.21"; our $MASHTREE_VERSION=$VERSION; our @fastqExt=qw(.fastq.gz .fastq .fq .fq.gz); -our @fastaExt=qw(.fasta .fna .faa .mfa .fas .fa); +our @fastaExt=qw(.fasta .fna .faa .mfa .fas .fsa .fa); our @bamExt=qw(.sorted.bam .bam); our @vcfExt=qw(.vcf.gz .vcf); +our @mshExt=qw(.msh); # Richseq extensions were obtained mostly from bioperl under # the genbank, embl, and swissprot entries, under # the source for Bio::SeqIO