From ff265c228d5b749c36de18267423f66a329046a0 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Wed, 20 May 2015 15:44:24 +0100 Subject: [PATCH 1/9] rework qc --- lib/Bio/Roary/JobRunner/Local.pm | 18 ++ lib/Bio/Roary/JobRunner/Parallel.pm | 21 +++ lib/Bio/Roary/JobRunner/Role.pm | 8 +- lib/Bio/Roary/QC/Kraken.pm | 106 ----------- lib/Bio/Roary/QC/Report.pm | 198 +++++++++++++++++--- lib/Bio/Roary/QC/ShredAssemblies.pm | 89 --------- t/Bio/Roary/QC/Report.t | 100 ++++++++-- t/data/expected_query_1.fna | 252 ++++++++++++++++++++++++++ t/data/expected_query_2.fna | 252 ++++++++++++++++++++++++++ t/data/kraken_test/database.idx | Bin 0 -> 8208 bytes t/data/kraken_test/database.jdb | Bin 0 -> 2872 bytes t/data/kraken_test/database.kdb | Bin 0 -> 2872 bytes t/data/kraken_test/taxonomy/names.dmp | 77 ++++++++ t/data/kraken_test/taxonomy/nodes.dmp | 12 ++ 14 files changed, 897 insertions(+), 236 deletions(-) delete mode 100644 lib/Bio/Roary/QC/Kraken.pm delete mode 100644 lib/Bio/Roary/QC/ShredAssemblies.pm mode change 100644 => 100755 t/Bio/Roary/QC/Report.t create mode 100644 t/data/expected_query_1.fna create mode 100644 t/data/expected_query_2.fna create mode 100644 t/data/kraken_test/database.idx create mode 100644 t/data/kraken_test/database.jdb create mode 100644 t/data/kraken_test/database.kdb create mode 100644 t/data/kraken_test/taxonomy/names.dmp create mode 100644 t/data/kraken_test/taxonomy/nodes.dmp diff --git a/lib/Bio/Roary/JobRunner/Local.pm b/lib/Bio/Roary/JobRunner/Local.pm index 04e5d85..e6ce3b2 100644 --- a/lib/Bio/Roary/JobRunner/Local.pm +++ b/lib/Bio/Roary/JobRunner/Local.pm @@ -15,13 +15,17 @@ package Bio::Roary::JobRunner::Local; =cut use Moose; +use Log::Log4perl qw(:easy); has 'commands_to_run' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); +has 'logger' => ( is => 'ro', lazy => 1, builder => '_build_logger'); +has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 ); sub run { my ($self) = @_; for my $command_to_run ( @{ $self->commands_to_run } ) { + $self->logger->info($command_to_run); system($command_to_run ); } 1; @@ -36,9 +40,23 @@ sub _construct_dependancy_params sub submit_dependancy_job { my ( $self,$command_to_run) = @_; + $self->logger->info($command_to_run); system($command_to_run ); } +sub _build_logger +{ + my ($self) = @_; + my $level = $ERROR; + if($self->verbose) + { + $level = $DEBUG; + } + Log::Log4perl->easy_init($level); + my $logger = get_logger(); + return $logger; +} + no Moose; __PACKAGE__->meta->make_immutable; diff --git a/lib/Bio/Roary/JobRunner/Parallel.pm b/lib/Bio/Roary/JobRunner/Parallel.pm index 5c26be1..333de3a 100644 --- a/lib/Bio/Roary/JobRunner/Parallel.pm +++ b/lib/Bio/Roary/JobRunner/Parallel.pm @@ -18,13 +18,20 @@ package Bio::Roary::JobRunner::Parallel; use Moose; use File::Slurp::Tiny qw(read_file write_file); use File::Temp qw/ tempfile /; +use Log::Log4perl qw(:easy); has 'commands_to_run' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); has 'cpus' => ( is => 'ro', isa => 'Int', default => 1 ); +has 'logger' => ( is => 'ro', lazy => 1, builder => '_build_logger'); +has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 ); sub run { my ($self) = @_; + for my $command_to_run(@{ $self->commands_to_run }) + { + $self->logger->info($command_to_run); + } open(my $fh,"|-","parallel -j ".$self->cpus) || die "GNU Parallel failed"; print $fh join("\n", @{ $self->commands_to_run }); close $fh; @@ -39,9 +46,23 @@ sub _construct_dependancy_params sub submit_dependancy_job { my ( $self,$command_to_run) = @_; + $self->logger->info($command_to_run); system($command_to_run ); } +sub _build_logger +{ + my ($self) = @_; + my $level = $ERROR; + if($self->verbose) + { + $level = $DEBUG; + } + Log::Log4perl->easy_init($level); + my $logger = get_logger(); + return $logger; +} + no Moose; __PACKAGE__->meta->make_immutable; diff --git a/lib/Bio/Roary/JobRunner/Role.pm b/lib/Bio/Roary/JobRunner/Role.pm index 9af7214..fd6cda4 100644 --- a/lib/Bio/Roary/JobRunner/Role.pm +++ b/lib/Bio/Roary/JobRunner/Role.pm @@ -19,11 +19,17 @@ has '_queue' => ( is => 'rw', isa => 'Str', default => 'normal has 'dont_wait' => ( is => 'rw', isa => 'Bool', default => 0 ); has 'cpus' => ( is => 'ro', isa => 'Int', default => 1 ); has 'logger' => ( is => 'ro', lazy => 1, builder => '_build_logger'); +has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 ); sub _build_logger { my ($self) = @_; - Log::Log4perl->easy_init($ERROR); + my $level = $ERROR; + if($self->verbose) + { + $level = $DEBUG; + } + Log::Log4perl->easy_init($level); my $logger = get_logger(); return $logger; } diff --git a/lib/Bio/Roary/QC/Kraken.pm b/lib/Bio/Roary/QC/Kraken.pm deleted file mode 100644 index d9ea68b..0000000 --- a/lib/Bio/Roary/QC/Kraken.pm +++ /dev/null @@ -1,106 +0,0 @@ -package Bio::Roary::QC::Kraken; - -# ABSTRACT: run kraken on list of inputs and parse output - -=head1 SYNOPSIS - -=cut - -use Moose; -use File::Basename; -with 'Bio::Roary::JobRunner::Role'; - -has 'assembly_directory' => ( is => 'ro', isa => 'Str', required => 1 ); -has 'glob_search' => ( is => 'ro', isa => 'Str', default => '*.shred.fa' ); -has 'kraken_exec' => ( is => 'ro', isa => 'Str', default => 'kraken' ); -has 'kraken_report_exec' => ( is => 'ro', isa => 'Str', default => 'kraken-report' ); -has 'kraken_db' => ( is => 'ro', isa => 'Str', default => '/lustre/scratch108/pathogen/pathpipe/kraken/minikraken_20140330/' ); -has 'top_hits' => ( is => 'rw', isa => 'ArrayRef', lazy_build => 1 ); -has 'kraken_memory' => ( is => 'rw', isa => 'Int', default => 2000 ); - -sub _build_top_hits { - my $self = shift; - - my @top_hits; - my $file_search = join( '/', ($self->assembly_directory, $self->glob_search) ); - foreach my $shred_ass ( glob $file_search ){ - push( @top_hits, $self->_top_kraken_hit( $shred_ass ) ); - } - - return \@top_hits; -} - -sub _top_kraken_hit { - my ( $self, $assembly ) = @_; - - my $kraken_output = $assembly; - $kraken_output =~ s/fa$/kraken/; - my $kraken_report = "$kraken_output.report"; - - my $kraken_runner_obj = $self->_job_runner_class->new( - commands_to_run => [ $self->_kraken_cmd( $assembly, $kraken_output ) ], - memory_in_mb => $self->kraken_memory, - queue => $self->_queue - ); - $kraken_runner_obj->run(); - - my $kraken_report_runner_obj = $self->_job_runner_class->new( - commands_to_run => [ $self->_kraken_report_cmd( $kraken_output, $kraken_report ) ], - memory_in_mb => $self->kraken_memory, - queue => $self->_queue - ); - $kraken_report_runner_obj->run(); - - # parse report - my ( $top_genus, $top_species ) = @{ $self->_parse_kraken_report($kraken_report) }; - - my $assembly_id = basename( $assembly, '.shred.fa' ); - - return [ $assembly_id, $top_genus, $top_species ]; -} - -sub _parse_kraken_report { - my ( $self, $kraken_report ) = @_; - - # parse report - open( REPORT, '<', $kraken_report ); - my ( $top_genus, $top_species ); - while ( ){ - my @parts = split( "\t" ); - chomp @parts; - - $top_genus = $parts[5] if ( (! defined $top_genus) && $parts[3] eq 'G' ); - $top_species = $parts[5] if ( (! defined $top_species) && $parts[3] eq 'S' ); - - last if (defined $top_genus && defined $top_species); - } - - $top_genus ||= "not_found"; - $top_genus =~ s/^\s+//g; - $top_species ||= "not_found"; - $top_species =~ s/^\s+//g; - - return [ $top_genus, $top_species ]; -} - -sub _kraken_cmd { - my ( $self, $a, $kraken_output ) = @_; - - my $kcmd = $self->kraken_exec . - " --db " . $self->kraken_db . - " --output $kraken_output $a"; - return $kcmd; -} - -sub _kraken_report_cmd { - my ( $self, $k, $report_output ) = @_; - - my $krcmd = $self->kraken_report_exec . - " --db " . $self->kraken_db . - " $k > $report_output"; - return $krcmd; -} - -__PACKAGE__->meta->make_immutable; -no Moose; -1; \ No newline at end of file diff --git a/lib/Bio/Roary/QC/Report.pm b/lib/Bio/Roary/QC/Report.pm index 4173f98..fc7459e 100644 --- a/lib/Bio/Roary/QC/Report.pm +++ b/lib/Bio/Roary/QC/Report.pm @@ -10,39 +10,189 @@ use Moose; use File::Temp; use File::Path 'rmtree'; use Cwd; -use Bio::Roary::QC::ShredAssemblies; -use Bio::Roary::QC::Kraken; - -has 'input_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); -has 'kraken_exec' => ( is => 'ro', isa => 'Str', default => 'kraken' ); -has 'kraken_db' => ( is => 'ro', isa => 'Str', default => '' ); -has 'outfile' => ( is => 'rw', isa => 'Str', default => 'qc_report.csv' ); -has '_kraken_data' => ( is => 'rw', isa => 'ArrayRef', lazy_build => 1 ); -has '_header' => ( is => 'rw', isa => 'Str', lazy_build => 1 ); -has 'job_runner' => ( is => 'rw', isa => 'Str', default => 'Local' ); -has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 ); -has 'cpus' => ( is => 'rw', isa => 'Int', default => 1 ); +use File::Basename; +with 'Bio::Roary::JobRunner::Role'; + +has 'input_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); +has 'kraken_exec' => ( is => 'ro', isa => 'Str', default => 'kraken' ); +has 'kraken_report_exec' => ( is => 'ro', isa => 'Str', default => 'kraken-report' ); +has 'kraken_db' => ( is => 'ro', isa => 'Str', default => '/lustre/scratch108/pathogen/pathpipe/kraken/minikraken_20140330/' ); +has 'outfile' => ( is => 'rw', isa => 'Str', default => 'qc_report.csv' ); +has '_kraken_data' => ( is => 'rw', isa => 'ArrayRef', lazy_build => 1 ); +has '_header' => ( is => 'rw', isa => 'Str', lazy_build => 1 ); +has 'kraken_memory' => ( is => 'rw', isa => 'Int', default => 2000 ); has '_tmp_directory_obj' => ( is => 'rw', lazy_build => 1 ); -has '_tmp_directory' => ( is => 'rw', lazy_build => 1, isa => 'Str', ); +has '_tmp_directory' => ( is => 'rw', lazy_build => 1, isa => 'Str', ); -sub _build__kraken_data { - my $self = shift; - my $shredder = Bio::Roary::QC::ShredAssemblies->new( - gff_files => $self->input_files, - output_directory => $self->_tmp_directory, - job_runner => $self->job_runner +sub _nuc_fasta_filename +{ + my ($self, $gff) = @_; + + my $prefix = basename( $gff, ".gff" ); + my $outfile = $self->_tmp_directory . "/$prefix.fna"; + return $outfile; +} + +sub _extract_nuc_fasta_cmd { + my ($self, $gff) = @_; + my $outfile = $self->_nuc_fasta_filename($gff); + my $cmd = "sed -n '/##FASTA/,//p' $gff | grep -v \'##FASTA\' > $outfile"; + + return $cmd; +} + +sub _extract_nuc_files_from_all_gffs +{ + my ($self) = @_; + my @nuc_files; + my @commands_to_run; + for my $input_file(@{$self->input_files}) + { + push(@nuc_files,$self->_nuc_fasta_filename($input_file)); + push(@commands_to_run,$self->_extract_nuc_fasta_cmd($input_file)); + } + my $kraken_runner_obj = $self->_job_runner_class->new( + commands_to_run => \@commands_to_run, + memory_in_mb => $self->kraken_memory, + verbose => $self->verbose, + cpus => $self->cpus + ); + $kraken_runner_obj->run(); + return \@nuc_files; +} + +sub _kraken_cmd { + my ( $self, $a, $kraken_output ) = @_; + + my $kcmd = $self->kraken_exec . + " --fasta-input ". + " --db " . $self->kraken_db . + " --output $kraken_output $a"; + return $kcmd; +} + +sub _kraken_report_cmd { + my ( $self, $k, $report_output ) = @_; + + my $krcmd = $self->kraken_report_exec . + " --db " . $self->kraken_db . + " $k > $report_output"; + return $krcmd; +} + +sub _kraken_output_filename +{ + my ( $self, $assembly ) = @_; + my $kraken_output = $assembly; + $kraken_output =~ s/fna$/kraken/; + return $kraken_output; +} + +sub _run_kraken_on_nuc_files +{ + my ( $self, $nuc_files ) = @_; + my @kraken_output_files; + my @commands_to_run; + for my $nuc_file(@{$nuc_files}) + { + my $kraken_output = $self->_kraken_output_filename($nuc_file); + push(@kraken_output_files, $kraken_output ); + push(@commands_to_run, $self->_kraken_cmd( $nuc_file, $kraken_output )); + } + + my $kraken_runner_obj = $self->_job_runner_class->new( + commands_to_run => \@commands_to_run, + memory_in_mb => $self->kraken_memory, + verbose => $self->verbose, + cpus => $self->cpus ); - $shredder->shred or die ( "Failed to shred assembly data\n" ); + $kraken_runner_obj->run(); + return \@kraken_output_files; +} + +sub _kraken_report_output_filename +{ + my ( $self, $assembly ) = @_; + return $assembly.".report"; +} - my $kraken = Bio::Roary::QC::Kraken->new( - assembly_directory => $self->_tmp_directory, - job_runner => $self->job_runner +sub _run_kraken_report_on_kraken_files +{ + my ( $self, $kraken_files ) = @_; + + my @kraken_report_output_files; + my @commands_to_run; + for my $nuc_file(@{$kraken_files}) + { + my $kraken_output = $self->_kraken_report_output_filename($nuc_file); + push(@kraken_report_output_files, $kraken_output ); + push(@commands_to_run, $self->_kraken_report_cmd( $nuc_file, $kraken_output )); + } + + my $kraken_runner_obj = $self->_job_runner_class->new( + commands_to_run => \@commands_to_run, + memory_in_mb => $self->kraken_memory, + verbose => $self->verbose, + cpus => $self->cpus ); - return $kraken->top_hits; + $kraken_runner_obj->run(); + return \@kraken_report_output_files; +} + +sub _build__kraken_data { + my $self = shift; + my $nuc_files = $self->_extract_nuc_files_from_all_gffs(); + my $kraken_files = $self->_run_kraken_on_nuc_files($nuc_files); + my $kraken_report_files = $self->_run_kraken_report_on_kraken_files( $kraken_files ); + + return $self->_parse_kraken_reports($kraken_report_files); } +sub _parse_kraken_reports +{ + my ( $self, $kraken_report_files ) = @_; + + my @report_rows; + for my $kraken_report(@{$kraken_report_files}) + { + push(@report_rows, $self->_parse_kraken_report($kraken_report)); + } + return \@report_rows; +} + +sub _parse_kraken_report { + my ( $self, $kraken_report ) = @_; + + # parse report + open( REPORT, '<', $kraken_report ); + + my $sample_name = $kraken_report; + $sample_name =~ s/.report$//; + $sample_name =~ s/.kraken$//; + my($sample_base_name, $dirs, $suffix) = fileparse($sample_name); + + my ( $top_genus, $top_species ); + while ( ){ + my @parts = split( "\t" ); + chomp @parts; + + $top_genus = $parts[5] if ( (! defined $top_genus) && $parts[3] eq 'G' ); + $top_species = $parts[5] if ( (! defined $top_species) && $parts[3] eq 'S' ); + + last if (defined $top_genus && defined $top_species); + } + + $top_genus ||= "not_found"; + $top_genus =~ s/^\s+//g; + $top_species ||= "not_found"; + $top_species =~ s/^\s+//g; + + return [ $sample_base_name, $top_genus, $top_species ]; +} + + sub _build__header { return join( ',', ( 'Sample', 'Genus', 'Species' ) ); } diff --git a/lib/Bio/Roary/QC/ShredAssemblies.pm b/lib/Bio/Roary/QC/ShredAssemblies.pm deleted file mode 100644 index 0226a12..0000000 --- a/lib/Bio/Roary/QC/ShredAssemblies.pm +++ /dev/null @@ -1,89 +0,0 @@ -package Bio::Roary::QC::ShredAssemblies; - -# ABSTRACT: slice .fa assemblies into "reads" for kraken input - -=head1 SYNOPSIS - -=cut - -use Moose; -use Bio::SeqIO; -use File::Basename; -use Cwd; -with 'Bio::Roary::JobRunner::Role'; - -has 'gff_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); -has 'read_size' => ( is => 'rw', isa => 'Int', default => 150 ); -has 'max_reads_per_sequence' => ( is => 'rw', isa => 'Int',default => 20 ); -has 'max_sequences' => ( is => 'rw', isa => 'Int', default => 20 ); -has 'output_directory' => ( is => 'rw', isa => 'Str', lazy_build => 1 ); - -sub _build_output_directory { - return getcwd; -} - -sub _extract_nuc_fasta { - my ($self, $gff) = @_; - - my $prefix = basename( $gff, ".gff" ); - my $outfile = $self->output_directory . "/$prefix.fna"; - my $cmd = "sed -n '/##FASTA/,//p' $gff | grep -v \'##FASTA\' > $outfile"; - - system( $cmd ); - return $outfile; -} - -sub shred { - my $self = shift; - - foreach my $f ( @{ $self->gff_files } ){ - # read seq and shred into reads - my $fasta = $self->_extract_nuc_fasta($f); - my $seqio = Bio::SeqIO->new( -file => $fasta, -format => 'fasta' ); - - my @reads; - my $seq; - my $sequence_counter =0; - while( $seq = $seqio->next_seq() ){ - last if( $sequence_counter > $self->max_sequences) ; - push( @reads, @{ $self->_shredded_seq($seq->{primary_seq}->{seq}) } ); - $sequence_counter++; - } - - # write to file - my $prefix = basename( $f, ".gff" ); - my $outfile = $self->output_directory . "/$prefix.shred.fa"; - open( OUTFH, '>', $outfile ) or die "Couldn't write to $outfile: $!"; - my $c = 1; - foreach my $r ( @reads ){ - print OUTFH ">" . $prefix . "_$c\n"; - print OUTFH "$r\n"; - $c++; - } - close OUTFH; - } - 1; -} - -sub _shredded_seq { - my ( $self, $seq ) = @_; - chomp $seq; - - if(length($seq)> ($self->read_size * $self->max_reads_per_sequence)) - { - $seq = substr $seq,0,($self->read_size * $self->max_reads_per_sequence); - } - - my $size = $self->read_size; - my $unpack = "(A$size)*"; - my @reads = unpack( $unpack, $seq ); - - my $last = pop @reads; - push( @reads, $last ) unless $last eq ''; # deal with trailing empty entries - - return \@reads; -} - -__PACKAGE__->meta->make_immutable; -no Moose; -1; \ No newline at end of file diff --git a/t/Bio/Roary/QC/Report.t b/t/Bio/Roary/QC/Report.t old mode 100644 new mode 100755 index 176283a..c3c5d44 --- a/t/Bio/Roary/QC/Report.t +++ b/t/Bio/Roary/QC/Report.t @@ -3,6 +3,7 @@ use strict; use warnings; use Data::Dumper; use File::Slurp::Tiny qw(read_file write_file); +use File::Which; BEGIN { unshift( @INC, './lib' ) } @@ -11,31 +12,98 @@ BEGIN { use_ok('Bio::Roary::QC::Report'); } -my $kraken_data = [ - ['assembly1', 'Clostridium', 'Clostridium difficile'], - ['assembly2', 'Escherichia', 'Escherichia coli'], - ['assembly3', 'Streptococcus', 'Streptococcus pneumoniae'] + + +my $kraken_data = [ + [ 'assembly1', 'Clostridium', 'Clostridium difficile' ], + [ 'assembly2', 'Escherichia', 'Escherichia coli' ], + [ 'assembly3', 'Streptococcus', 'Streptococcus pneumoniae' ] ]; ok( - my $qc_report_obj = Bio::Roary::QC::Report->new( - input_files => [], - outfile => "kraken_report.csv", - _kraken_data => $kraken_data, - job_runner => "Local" - ), - 'QC report object created' + my $qc_report_obj = Bio::Roary::QC::Report->new( + input_files => [], + outfile => "kraken_report.csv", + _kraken_data => $kraken_data, + job_runner => "Local" + ), + 'QC report object created with no input gff files' ); ok( $qc_report_obj->report, 'report generated' ); ok( -e 'kraken_report.csv', 'report file exists' ); +is( read_file('kraken_report.csv'), read_file("t/data/exp_qc_report.csv"), 'report file correct' ); + +unlink('kraken_report.csv'); + + +ok( + $qc_report_obj = Bio::Roary::QC::Report->new( + input_files => [ 't/data/query_1.gff', 't/data/query_2.gff' ], + outfile => "kraken_report.csv", + job_runner => "Local", + kraken_db => 't/data/kraken_test/', + verbose => 1, + ), + 'QC report object created with data' +); + +is( $qc_report_obj->_tmp_directory . '/abc.fna', $qc_report_obj->_nuc_fasta_filename('abc.gff'), 'filename of nuc from gff' ); +is( + 'sed -n \'/##FASTA/,//p\' abc.gff | grep -v \'##FASTA\' > ' . $qc_report_obj->_tmp_directory . '/abc.fna', + $qc_report_obj->_extract_nuc_fasta_cmd('abc.gff'), + 'extract nuc command' +); + +ok( my $nuc_files = $qc_report_obj->_extract_nuc_files_from_all_gffs(), 'extract nuc files from gffs' ); + +is_deeply( [ $qc_report_obj->_tmp_directory . '/query_1.fna', $qc_report_obj->_tmp_directory . '/query_2.fna' ], + $nuc_files, 'check extracted nuc files from gffs list' ); + is( - read_file('kraken_report.csv'), - read_file("t/data/exp_qc_report.csv"), - 'report file correct' + read_file( $qc_report_obj->_tmp_directory . '/query_1.fna' ), + read_file('t/data/expected_query_1.fna'), + 'Check FASTA file 1 extracted as expected' ); +is( + read_file( $qc_report_obj->_tmp_directory . '/query_2.fna' ), + read_file('t/data/expected_query_2.fna'), + 'Check FASTA file 2 extracted as expected' +); + +SKIP: +{ + + skip "kraken not installed", 2 unless ( which('kraken') ); + skip "kraken-report not installed", 2 unless ( which('kraken-report') ); + + ok( my $kraken_files = $qc_report_obj->_run_kraken_on_nuc_files($nuc_files), 'run kraken over everything' ); + is_deeply( [ $qc_report_obj->_tmp_directory . '/query_1.kraken', $qc_report_obj->_tmp_directory . '/query_2.kraken' ], + $kraken_files, 'check kraken files are created from nuc files' ); + + ok(my $kraken_report_files = $qc_report_obj->_run_kraken_report_on_kraken_files( $kraken_files ), 'build reports'); + is_deeply( [ $qc_report_obj->_tmp_directory . '/query_1.kraken.report', $qc_report_obj->_tmp_directory . '/query_2.kraken.report' ], + $kraken_report_files, 'check kraken report files are created from kraken files' ); + + is_deeply([['query_1','Staphylococcus', 'aureus'],['query_2','Staphylococcus', 'aureus']],$qc_report_obj->_parse_kraken_reports($kraken_report_files),'check output report'); +} + +done_testing(); + + + + + + + + + + + + + + + -unlink( 'kraken_report.csv' ); -done_testing(); \ No newline at end of file diff --git a/t/data/expected_query_1.fna b/t/data/expected_query_1.fna new file mode 100644 index 0000000..70ca9a0 --- /dev/null +++ b/t/data/expected_query_1.fna @@ -0,0 +1,252 @@ +>abc|SC|contig000001 +ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT +ATACCCACCACATTTGGTGGAGAACCAAAAATTAGCCGAAAAACATCATTTCTGAAGTTA +TCGGCTAAAGTTATAAATTATATTTATTTGTACATGAACAAATAATTTACATTAATTTGT +CATTTCTTCTTTTTCCCAATCGATTTTATATCTTTCTGAAGAACGATCTGTCCATTTATC +TTTAGTATTGGTACCTTTCCAATTTGTTGAAGTCCAATGCAATTGGTAGTCATCACGAAC +TCGTTCGTATATTACATCTATATTTGTTTGTTGTTTGGATGCTTTTCTATCCATAGTAAT +AACTGTAGCGAAGTCTGGTGAAAACCCTGAAGATAATAGAGAACTTGCTTTGTTAGGATC +AAGGAAGTTCTCTGCTGCTTTCATAGAACCATTTCTAGTTTTCATGAAAAGTTGATTGCC +ATATACCGGGTTCCAAGAATCTCTATCATATGGTCCCCAATTTTGATTCACCATATTGTT +AAATATCACTTTCCAGCCTACTTTTTTATCAGTTGGGCTCTCTAAAATTGTTTTGAAATC +AGGTTGAACATATTTCAGTGTATGACCAATCGAAACATTTGCACCAATAAGGCCGCCAAT +TTTTCCTGTATCATCACCAGTAACATTACCGTTGAATCCATAAGTTAAAGTACTCATATA +CTCTTTTGTATCAATCGAATTTCTTGGATAGTAATCAGATATTTGAGCTACTTCATTATC +AGGTAGTTGCAACTGTACCTTAAAGGCTGAAGGCCAGGCTAAACCACTTTTGTTAGCACC +TTCTTCGCTATAAACTCTATATTGACCAGCAATGGTACCTTTCGTTCTAATAACTAGCAG +TTTTTTATTGTGATTTTTATCATCGATAAAACTATAAAATACTTTTTTGTGCATGCCATT +TTCTTTATCATAAGTGACTAAATCACCTGTTTTTACTGTAGTATTGCTTCCAATATCTGT +AGTACCGGTTTTAATATTAATATCAGAATCTGCGGCATTAGCGACAGGATTCATTAATAT +GGAACCTAGCAATAGTGTTGTTGTTACTGAGCTGACTATACGTGTTTTCATTTTCATCAT +CCTTCTATTTTTTTTAACGATTTGAGGAAACAATAATCAATATGTCAATTTAGAATATTG +CAGTTGAGACATATCAAATATTTATAATCGTTTATATTAGTATATTTAATGTAGCAACTG +ATAAATTACTGAGTGATGATGAGTGATTATTTTAAGAATATGTTTTTAACTTTTATTTAA +AATTTGAAAGGAAGCATTTCAATTTCGAGGGTTAGTCAAAGTTGAATAAATTCTTTATGA +AACAAGGAAAAGACATAGCTAATTTTATTGATTAATTTCTTTAAAACTAATGATTTGTTT +GATTTAAAAATGTAATCGATTACAATATAAAAATACAAATATCTTAGAATTAAATCAATT +AATTAACTATTAAATAAAAATTAAATATATATTAACTACTGTAAATTAATAAATAGAAAT +AGAGAAAAAGGGTATTAATTATGTTTGGAATTTCGTCGGGAATAATTTTTGCCATTACAT +AGAAATATCTAATAGATAATGAAAAAGTATCGTATGTATTTTTAATATAGTGTAAAATAT +CATATGTAAAATAAAATGTAGATTTTTAGTTAGAGGCATTATAAGAAAATTTTGAGTATA +GGTTAGCTTTTAATTATGAATCTTATTGAAATTTGATTAATAAATATATGATAGGGGATT +AAAATGAAACTATTTTATATCGTATTTCTTATTATTATATGGCTGAATATATTTTTAGGA +AATGAAATTATCCATACACTGACTGTTTTAATAACAACATTGTATATTGTTAATTCAAGA +AAGGGGATTAAAAATGACAGAGTTGAATAATATTATAAACTCTCTTCAATCTTTGTTTGA +GTCTGAATCAGGCTATAAAATTTCGAAAAATTCAGGAGTTCCATATCAAACAGTACAAGA +TTTAAGAAATGGGAAAACCAAACTAGAAGATGCTAGATTTAGAACGATTATTAAACTTTA +TAGTTACTATGTCTCATTAAAAGAACATTAATCATGGGACAATAAAGTATTGCTATAATA +ATTGAATCATTAAATGAAGGAATAGAGCCTAAGACATAATAAATCAATGTCTTAGGCTCT +ACAATATTATATTGGTAGTAGTTGACCGAATGAAAATGACCATGTAACAAGCATTTTTGG +GGCCCCAACACAGAAGCTGACGAAAAGTCAGCTTACAATAATGTGCAAGTTGGGGATGGG +CCCCAACACAGAGAATTTCAAAAAGAAATTCTACAGACAATGCAAGTTGGCGGGGCCCCA +ACATAGAGAAATTCAAAAAGAAATTCTACAGACAATGCAAGTTGGGGAGTGACAACGAAA +TAAATTTTATAAAATACCATTTCTGTCACATTCCTACTCCCAATTCCATTTAAATATATA +GAAAATTTCACCTATTATATACATCATACACTTTAAAAATTATCATTTAAATAATCTGAT +TTGGAATTAAAGTGCATGAAGTATAAGTCACCTTCATATACTAATCAAAGAGGACGTCAA +CAGTTATTTTATTAGGATTTTTAACATAAACATTTGCTAGATCTGAATGTAATCTTTTGC +TTAAATCAATAGTGTAGTTATTACCGCCACCGGTGATCTTAAGCTTACCTTTATTACGAT +TTTCGGTATATAATATTTTATTTTTTATTAACGCTTCTCGTGCACGGAAATCGATTTCTT +TCAATGTTAATACTGGTTTATTGCCTTGGTATATTTTATGCGCACCAATAATCGTTTGTA +GTTTATCTTTGTATTGCACAAAAAGATTATAAGTTTTATCAGAAGGTTTTGCGGCTGGTG +TAACGCCACCTGTAAATGTCTCTCTATAAGACCACCATAACTGATCAGTATCTTTGTCTT +TTAGTCCAAACACATCTACGTAACGATCTTTTAACTGATTAATATTTCCCCAACTTTCAG +CGCCCCATAAAGATATATGTGCTGACCATGAATATCTCTTAAGTTCAACATAAATGTTTC +CATTATCATATTGATATAGCCATTTATTTGAAAATGAAAAATGAGGCTGTGTGTAATATT +TAATTAATTCATTGATGTTAGTTTCATCTTGACCAATGCTATAAGCTTTAGCTTCAGAGT +AAAAACTAAATACTGGTTTTTGATTTTGAGGTAATACAGTACCTAATAGTAATAATGTTG +TCGTTAAAATTATATTTTTCGTGATGTTCTTACTCATTAGAACATCTCCTTTCAGAGGAA +TCATGATACGAGGAATAAGAAATTTAAATGTGAGCGAAGTCAATATAGTATTTGCGATTA +TTTTTATTAACTCCATTATTATTGTTAGTTTGATTTTTCGAGGATAACTTCAATTTTTGC +ATTTTGAGGTTTTTTAACATATCTATTTGCATCAGTTGATGGCAACCTTTTACTTAAATC +TATTGTGTAGTTATTGTCTGTACCTGTTATTTTAATTTGTCCTTTATTATAAGAATTATT +ATATAATTTTTTACTTTTAATTAATGTTTGACGAATACGAAAATCTAATTCTTTTAAAGT +TAAAACAGGCTTATTGCCTTCATAAACTGGAAATCCGCCAGTAAACGTTTCTGCTTTATC +TTTATATGTTACATTCAGTTTATAGTGTTTATCGTTAGATGTTGCTGCAGGAGTAACACC +ACCAGTAAACGTTTCTTGAGATAATGCAAAAGAATCAATGGTTTCTTGGTCTTTTATGCC +AAAAATATCAACGCTTTTATTTCTTAATTGGTTGATATTGCCCCAACTTTCAGGTCCATA +AACTTGAATATGACTATACCAAGAAAACTGTAACAACGTTGCATGAATCGTACCGTTATC +TTTTTGCCATAACGTACTGTTAGAGAAGGTTAAATATTTTTGCGAGTAATATTTAGTTAA +CTCATTAACGTTAGTTTCGTTTTGATTTATATAATAAGCTTTCGCTTCAGATGAAGAATT +GATAGGTGTATTAGGAAATTGTGTAAATGCTGTACCTAATAGTAACAATGTTGTTGATAA +AATAATTTTTTTCGTGATGTTATTGTTCATTCGAATTTCTCCTTTGAGTATTGTTGGAAT +GTTTAATTATAAAAAGGGGTTAATTAGATAATTGAAATTATCCGCATTTACAAAAGGTAA +TAGGTTAGTTAGATTTTTCGAGTATGACTTCAATTTGTGCATTTTTAGGATTTTTAACAT +AACGGTTTGTGTCAGTTAATTTTAACTTTTTACTTAAATCAATCGTGTAATTATTTCCAT +CAGCAGTTATCTTAATTTGACCTTTATTAAATTCTCCGTTATATAACTTTTTATTCTTTA +TTAATGTTTGACGAATACGGAAATCTAATTCTTTTAAAGTTAATACTGGTTTATTTCCTT +TGTAAAATTCATGTCCACCGATGATAGTTTGTTGTTTATCACTATATTTTAAAAATAGTC +TATAAGGTTTATCAGATGAAGTAGCTGCTGGCGTAACACCACCAGTAAATGTTTCATCAT +AAGTCCAGTAACCTTCAACTGTGTCCTCATCTTTAGTTCCAAATATATCAACGTATTTAT +TTCTTAACTGATTAATGTTTCCCCAACTCTCGGATCCAAACACTTGAATATGACTATACC +AAACCCACGTTTGCAATGTTGCATGAATGCTACCATTGGGCTTTTGCCATAGCCATTTTC +CAGATAATGAAAAATGCGGCTGAGTGTAATATTTGATTAACTCATTGATATTAGTCTCGT +TTTCACTGATATTATAGGCTTTTGCTTCAGATGAAAAACTGATTGGTGTTTTAGGAAGTT +GTGTTGATGTGGTTCCTAAAAGTAACAATGCTGTTGATAAAACTAATTTATTCATGATGT +TCTTTTTCATATGAAAATCTCCTTTGCGTGAATTACCCAAAGTATATAAGCTATTACACC +GATTCGGAATTAAATAAAAGCTAAAACTATGTTAAATAAACTTAAACAGTTAGTAGTGTT +ATTTAAGCAAAACTTATCATTTTTAAGTTTTGGACAGAAACAGTACTTAATAAAGTAGGC +GGGAGTTATATATTAAAAACGACACGTTACAATTATTCTCTAATCAATTGCATTAAATTG +TTTGATAATTGAATTTTCTAACTACTTGAAAAATAGTTATACTTTAAATGTAGTACTTAT +TTTAATTATTTCCTACTACTTAAATTTAATATTAATAAAATGTTCATTTAATTATTGATA +AAATATTACAAATTTTAATAGTAGGTTGTGTTTATTTTGTATGCGCTTACAATTTAGGTG +TAACTAAAATAAAAGGAGTTGTTATTAATGAAAAATTTACGAAACAGAAGTTTTTTAACT +TTATTAGACTTTTCACGACAAGAGGTAGAATTCTTATTAACACTCTCCGAGGATTTAAAA +CGTGCTAAATATATTGGCACTGAAAAGCCTATGTTAAAAAATAAAAATATTGCACTGTTA +TTTGAAAAAGATTCTACAAGAACGCGATGTGCATTTGAAGTTGCAGCGCATGATCAAGGT +GCAAATGTAACTTATTTAGGCCCAACTGGATCACAAATGGGTAAAAAAGAAACAACTAAA +GATACTGCACGTGTGCTTGGTGGAATGTATGATGGCATTGAATACCGTGGTTTTTCACAA +AGAACAGTAGAAACTTTAGCTGAAAATTCAGGCGTACCAGTGTGGAATGGTTTAACTGAT +GAAGATCATCCTACTCAAGTTCTTGCTGATTTCTTAACAGCAAAAGAAGTCTTAAAAAAA +GATTATGCAGATATTAACTTTACATATGTTGGAGATGGTCGTAATAACGTTGCAAATGCA +TTAATGCAAGGTGCTGCCATTATGGGTATGAACTTCCATTTAGTTTGTCCAAAAGAATTA +AATCCAACAGATGAATTATTAAATCGCTGTAAAAATATTGCCGCTGAAAATGGTGGCAAC +ATATTAATCACAGATGATATTGACCAAGGTGTAAAAGGTTCGGATGTAATTTACACTGAT +GTTTGGGTATCAATGGGTGAACCTGATGAAGTATGGAAAGAACGACTTGAATTATTGAAA +CCATATCAAGTAAATAAAGAAATGATGGATAAAACTGGTAATCCAAATGTTATTTTTGAG +CATTGCTTACCATCTTTCCATAATGCTGATACGAAAATTGGTCAACAAATTTTTGAAAAA +TATGGTATTCGAGAAATGGAAGTTACAGATGAAGTATTCGAAAGTAAAGCTTCAGTTGTA +TTCCAAGAAGCTGAGAACAGAATGCATACAATCAAAGCAGTCATGGTTGCTACATTGGGT +GAATTTTAAATGATATAAGGAAGTGAATATGATGGCGAAAATAGTAGTAGCATTAGGTGG +TAATGCTTTAGGAAAATCACCTCAAGAACAACTCGAGCTTGTTAAAAATACTGCGAAATC +ATTAGTAGGATTAATAACAAAAGGACATGAGATTGTTATTAGTCATGGTAATGGACCACA +GGTTGGAAGCATTAATTTGGGACTTAACTATGCTGCAGAACATAACCAAGGTCCGGCATT +TCCATTTGCTGAATGTGGCGCAATGAGTCAAGCTTACATCGGCTATCAATTACAAGAAAG +CTTACAAAATGAATTGCATTCTATTGGAATGGATAAACAAGTGGTAACACTAGTGACACA +AGTTGAAGTTGATGAAAATGATCCGGCATTTAACAATCCTTCAAAACCAATTGGGTTATT +TTACAACAAAGAAGAAGCTGAACAAATTCAAAAAGAAAAAGGATTTATATTTGTTGAAGA +TGCTGGAAGAGGATATAGACGCGTTGTTCCTTCACCACAACCCATCTCTATTATTGAATT +AGAGAGTATTAAAACACTTATTAAAAATGATACACTCGTTATTGCTGCTGGTGGTGGAGG +TATACCAGTAATTAGAGAGCAACATGATGGTTTTAAAGGTATTGATGCAGTTATAGACAA +AGATAAAACAAGTGCGTTGTTGGGTGCTAATATTCAATGCGATCAATTGATTATTTTAAC +AGCAATTGATTATGTATATATTAATTTTAACACTGAAAACCAACAGCCTTTGAAAACAAC +AAATGTTGATGAATTAAAACGATATATAGACGAAAATCAATTTGCAAAAGGAAGTATGTT +ACCAAAAATTGAAGCAGCCATATCATTTATTGAAAACAATCCAAAAGGAAGTGTGCTTAT +AACATCATTAAATGAATTAGATGCTGCCTTAGAGGGTAAAGTAGGTACTGTGATTAAAAA +GTAATTGAATTGAAACGCTTTTCAATTACTATATGTCAAATGCATGATTTTTATTATTTA +TGTGCACCCCCTGAAAATAATGCCTCTATTTTGATGCGGGGTGCACTTTCTTAATTTATA +TTTATAAAATCTTTAAGGTAGAAATGCTAGGTTAAATGTCGAAGGAGATGAAACCGTGGA +AAATACAATTAATGAAAGTGAAAAGAAAAAACGATTTAAATTAAAAATGCCAGGTGCATT +TATGATTTTATTCATTTTAACGGTTGTTGCAGTTATAGCAACATGGGTTATTCCTGCTGG +TGCATATTCTAAACTTTCTTACGAACCTTCATCCCAAGAACTAAAGATAGTTAACCCTCA +TAACCAAGTGAAAAAGGTTCCGGGTACGCAACAGGAACTAGACAAAATGGGGGTTAAAAT +TAAGATTGAACAATTTAAATCAGGTGCAATTAATAAGCCAGTATCAATTCCGAATACTTA +TGAAAGATTAAAGCAACATCCAGCTGGACCAGAACAAATAACAAGTAGCATGGTTGAAGG +TACGATAGAAGCGGTCGATATCATGGTATTCATTCTTGTACTAGGGGGACTTATTGGCGT +AGTTCAAGCCAGTGGTTCTTTTGAATCGGGATTGTTAGCTTTAACGAAGAAAACAAAAGG +GCATGAATTTATGCTAATTGTGTTTGTATCAATACTAATGATTATCGGCGGGACGTTATG +TGGTATTGAAGAAGAAGCTGTAGCATTCTATCCGATTTTAGTCCCTATATTTATAGCGTT +AGGATACGATTCTATCGTTTCAGTTGGTGCCATATTCCTTGCCAGCTCTGTCGGTAGTAC +ATTTTCAACTATTAACCCGTTCTCGGTTGTAATTGCCTCTAATGCCGCTGGTACAACTTT +TACGGATGGCTTGTATTGGAGAATAGGTGCTTGTATTGTCGGTGCGATTTTTGTTATTAG +TTATTTATATTGGTACTGTAAAAAAATTAAAAACGATCCTAAAGCGTCATATTCTTATGA +AGACAAAGATGCTTTTGAACAGCAATGGTCTGTATTAAAAGATGACGATAGTGCCCATTT +TACTTTGCGTAAGAAGATAATCCTTACATTATTTGTACTACCATTTCCAATTATGGTATG +GGGAGTTATGACGCAAGGTTGGTGGTTCCCAGTTATGGCTTCAGCATTTTTAATATTTAC +AATTATAATAATGTTTATTGCTGGGACAGGTAAATCTGGATTGGGAGAAAAAGGAACTGT +AGATGCATTTGTCAATGGTGCATCAAGTTTAGTAGGTGTATCTTTAATTATTGGTTTAGC +TCGAGGTATTAATTTAGTGTTGAATGAAGGTATGATTTCAGATACAATCTTACACTTTTC +ATCATCTTTAGTTCAACATATGAGTGGACCATTATTTATCATCGTATTACTATTTATTTT +CTTCTGTTTAGGTTTTATCGTGCCATCATCTTCTGGATTAGCGGTATTATCAATGCCTAT +CTTTGCACCACTAGCTGATACAGTAGGTATACCAAGATTCGTCATCGTTACGACATATCA +ATTTGGTCAATATGCGATGTTATTCTTAGCGCCGACTGGACTTGTTATGGCCACACTACA +AATGTTAAACATGCGATATTCACATTGGTTCCGATTTGTATGGCCGGTAGTTGCTTTTGT +ATTGATTTTCGGTGGCGGAGTACTAATTACGCAAGTACTAATTTATTCATAATTTGAAAT +GCTATATTATAAAAATACTAATTGGGTTTTATGCATCTCGTAGGTTTGTAGAAATACTAA +ACTAAGCGAGGTGCATTATTATTTTTGATTAAGAAAATAATGACGGTAATGATAACACTA +GTAAGTAATTGATACAATGCTCTATTTAATAATGATATTTTTAAAATTTGTTTTTAATGT +AATGTTAGATCTATGGTATATTATATTTATCGTGGTAAATATGTATTTGCTGTAAACAGT +TTTTGTAACACATGACACATATGGTTGTGAAAATAGCATATATAAAGGATGGCTATAAAT +GACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTTAG +TAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTGCT +AGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTACC +TTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAATTTATCGATATTTA +GAGGTGATAAAATTTGTCAACTATTACAAAGACTAAAATAAAAATCTTTATTAATTATTT +TATTGGTATATTAGCCTTGGCGTTTGCTGCTTATATATTATTTAAATAAAGGTTAGAGTG +AAACGTGTTTATGAACTAGACGTTCTAGTATAGTTACTACAGCTTTAGTCAGAAGGTATC +ATTGATAAGATCATATTAAATCAAAGAGGCATTGATATACACTAAAAAGAGGCAAGATTA +CCTGCCTCTTTTTTAGTTATTAAATATGCGTGTTAATTCTTGGTAATAGTGTTACTGAGA +CGCATTACGAAGAAATTTATCTTGATTTTCCTTTTTTAAAAAGAAGTGAAGATATCCTAT +AAAGACTCTAAGTACTATTACAGTGGCTAATAACACTTCGATAAACAAAAGACCTTTCCA +AATATCTGGAAACATAAGTACAGGCAAACTGTTCTTTAAAGCAGTTGCTGAGATTACTAA +AGGGAATGTGAAAGCTGAAAATACGGGCGAAAAAGGTTCTTTTAGTAATTTAGGCAATTG +TATAATGATATAAAAATAAAATATTTGAGCTAATATTAAAAATATAATTACGATAAAAGC +ATTCGCCTTAGGAAAGGCTATAACATATGCTGCAGCGACTAAAGAAAATGGTGCACAAAT +TGTCGATGTGTTCGGTTTGATTGACGTCTGCAATGGAAATGCTTTTAATCGCTTGAAAAC +AATAGGTAAGACAATACAAGTAGCTACAAAGCCATATATTACTGTTAATTGACCTATGAA +AAAACATCCGCTAACGGGTGCCGTCAATCCTGCGATAGCAATACCAATAAAAAGTACAGT +CCACGAAGGATAAACATTTTCAAGTGAAAAATCTTTTAAATATTTTATTGAAAAAATAAT +CATATGCGTCATAATTCCCACAAGGCATAAAATCCAAATAGGCGTTATTAAGCTATTGAT +AAAAGTTATGTTACTAAAAAATGTATTTAAATAAGTAGTACCTAAAAAGCCAGACATGAA +AAATGTTGTGAACACTGATGAAACTAGAGGACTGTTTAATTGTTCTTTCACATTATTAAA +ATATTTGATCATAGTACATAAAAGGTGAATCCAAATCAAGAAAGCAAAGATTCCGCAAAC +AGCGTTTAAAGTAAGAGATAAGTCTTTTAATAGATTACCCAGGCCTAATAATCCTAAGAC +TAGTCCTGACGTTACTAGAGGTGCTTTTTGAAGTCTCATGATTTACAACCTTTCTTATGT +GATTTTCTTCACTAATTATATCATGATAGCTATGGCCAATTAATAAGAAGAGTGTGTACT +ATTACGTTATTAGAGTATGTATATTGGATTGATAGACACATAATTGACATTTAAATCTCA +AAATTAATGATATTTGAGGTATGCTTTGCAACCTAAATATTGGAATATGTGGAAAAGTAA +TTATTTAATTTAAATGATGATTGAAGATTTACAAGGGGGTGTACAAAATGAGAAATCAAA +TTCAAAAACTATTAGACAGTGATTTGAGCAGTTTACATATATCGAAACAAACAGGAGTTC +CACAAAGCACAATACACAGAATGAGAAAAAAAGAAAGATCATTAGACAATATGTCATTGA +AAAACGCTGAACTACTTTATAAATTTGCCAATAGTATATTTAGCAATGAAAATTAAGAAG +AATAATTAGTGAGTTAAATAATTAAAAAACCACGTCTCGATGGACGTGGTTATTTTTTTA +AATGTATTTAATTATAGGATTTCGGGCCCCTGAAAAGTCCCTAAAATTTTGAATAGTATC +AAAATCTATAGGAATAATATAATAATAAAAATCTCTACGCATGCAAATGGTGAGTGTTAA +AAATCTTGGTGTAGTGTTGGTGTAGTTCAGGTGCAGTATAGATGAAATTTGAGCACAAAA +TTACTTAATTAAAAATGTATAAACAAGTGTCAGACGCGTGTCAATTGTGTGTCAAAAAAT +TCTATGACGAATAAGCATGCTTGTTTTAATTACTGTATTTTCGGGGCTTATGCGGGGGAC +GTTTTGGTGACGCATCATACTATATTACTGAAATTCAAAACAAAAAGAGCCCCGTAATCA +CGGAACTCTTTTGTTTGGTAATGCGTATAAAAATACCTATAAACGTCCTGGGAGGGATTC +GAACCCCCGACCGATGGCTTAGAAGGCCATTGCTCTATCCAGCTGAGCTACCAGGACACG +TTTAACAACACAAGAATTATTATATCTAAATGAACTTAAATTAGCAATACCTTGTAAATA +AAAAATGTTTATATTTTTCACTATTATAGAGCTATTTATCTAAAAAGGTTCAATAAGACT +TAAATGCGAATTCAGGCAACTTAATTGTGTTAAATACAGTTTTGAATGCCTAACTGTATT +TCTTTTCTCTTTAAAATACAGTTAAGTACATTATAAGATGTTGTGCGGATAAACAAACTA +ATTGCATCAAATTTATTTTAAAATAACAACAACAAAACGTTAAGCGAATAACATTTCGGT +GATTTAAAAGCTACGCACGTTTTTGTTATCTTCAAATTTAAATTTTAAGGAGTGTTTTCA +ATGGAAGGTTTATTTAACGCAATTAAAGATACCGTAACTGCAGCAATTAATAATGATGGC +GCAAAATTAGGCACAAGCATTGTGAGCATCGTTGAAAATGGCGTAGGTTTATTAGGTAAA +TTATTCGGATTCTAATTTCAATATGTTATGTAAGTAATCAGTATTATTTCAAAGGTGAGG +GAGAGATTTAAATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAAC +AACATGATAGTGTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTT +TACTAGGTAAATTATTTGGATTCTAATATAATAACTAATATTCTTTAAAATAAACTGGGT +GAGCATACTTTAATGTTATGCACTCAGTTTATTTTATTTGCAGAAATTTGAGCCTCTGTT +AAGATTTAGATACATAGACAATATAGGAGATGGGGAAATTGGGATATAAAAATATTTTGA +TAGACTTTGATGATACAATTGTTGATTTTTATGATGCAGAAGAATGGGCGTTTCACTATA +TGGCGAATGTTTTTAATCATAAAGCAACAAAGGATGATTTTTTAACATTTAAAAAAATCA +ATCACCAACATTGGGAAGCTTTTCAACAAAATAAATTAACGAAGTCTGAAGTATTATCAG +AACGATTTGTGAATTACTTCAAACATCATCAAATGGAAGTTGATGGGCATCGTGCAGATG +TGTTATTTAGAAATGGATTAGCAGAAGCTAAAGTTAAATACTTTGATCAAACATTAGAAA +CAATTGTCGAATTATCGAAAAGACATGATTTATATATTGTTACTAATGGTGTAACCGAAA +CGCAAAAGAGAAGGTTAAATCAGACGCCGTTGCATAAATATATTAAAAAGATATTTATAT +CTGAGGAAACAGGATATCAAAAACCTAATCCGGAATTTTTTAATTATGTTTTTAATGATA +TTGGTGAGGATGAAAGACAGCACTCGATTATAGTTGGAGATTCTTTAACATCTGACATTC +TAGGTGGAATCAATGCGGGTATAGCTACTTGCTGGTTTAATTTTAGAGGATTTGATCATA +ATCCAGGAATTATACCTGATTATGAAATTAATTCATGGAAACAACTAAATGATATTGTAC +GTTAAGACAAATAATTTGAAAATGATTTATATTAGCTTAAGAAAGTAATTGTAAAAACGA +CACTACGAAAAGATGCATTTTCATGCTCGTAGTGTCGAATATTATTATCTTATGGTTTCC +ACAATAAGACATCATGTCCCTCTATATTTTTAGCTTCCACATCTGTAAATCCATGATGTA +AAAAGAAATCCTTAGAATCATTTCTGCCAATGGCTTTAATTGGCATATTGAAACTCTTTG +CAAAATCAATCAATTCTGAAGCGTACCCTCTGTTTTGATATTTTGGTAATACTTCTAACT +TCCATAATAATATATAATCTTCAAAATCTGGGAAGTAGATTTCTTCGACATCACCTTTTT +TTAGTAATGCCATTCTAGCTCCTAATTGATCTCCGACAAATATGCCATAAAATGGTGAAT +CTGAACTTGCATCAATCATTTGACCGTTTAACTCATTGACCATGTATAAGTCTTTGTTGC +CAAACGCTCTAAAGTTTTCGAATAATTCGTCAGTTTTGTAATTAATTTCAAGACGTTTGA +TTTCACTCATATTTATCTACCCCTTATTTGTTTACTACTATTATTATACATTAAACCACT +TGTGTTTTTCATTGATTTGAAAATGTAAAACAATGAACATTATAAATTTTCCATGATTAA +ATTGATTTTCAAGCCGTAAAATTGTCAATCACCGGTATAGACTTTATACTATAGTTTGTA +ATCGCGTACTTAAGGAGAAAAAACATGGACTGTAAAGTAGTTAGTTTAAATGAAAAAGAT +CAGTTTATACCAAAAATAAAGAGCAGTGACCCTGTAATAACAGGATTATTTCAATATGAT +GCAGCTCAACAAACTAGTTTTGAAAAAAGGATGTCTAAAGAAAATAATGGAAGAGAAGCG +GCATTAGCGAATGTTATTCGTGAATATATGAGTGATTTAAAGCTTTCAAATGAACAAGAA +TTAAACATACAACATTTAGCTAATGGTTCAAAAGTTGTGATTGGTGGACAACAAGCAGGG +CTTTTCGGGGGACCATTGTATACATTCCATAAAATATTTTCAATCATTACTTTATCTAAG +GAATTAACGGATACACATAAGCAACAAGTAGTACCAGTTTTTTGGATTGCAGGAGAAGAT +CATGATTTCGATGAAGTGAATCATACATTTGTTTATAACGAAAATCATGGGTCGCTGCAT +AAGGTTAAATATCATACAATGGAGATGCCAGAGACGACAGTCTCTAGATATTATCCTGAT +AAGGCTGAGTTGAAACAAACTTTAAAAACGATGTTCATTCATATGAAAGAAACTGTTCAT +ACACAAGGTCTACTGGAGATTTGTGACAGAATTATTGACCAATATGACTCGTGGACTGAT +ATGTTTAAAGCACTACTGCATGAAACATTTAAAGCATATGGCGTTCTATTTATAGATGCG +CAGTTTGAGCCGTTAAGAAAAATGGAAGCGCCTATGTTTAAAAAGATTTTGAAAAAACAT +CAGTTGCTTGATGATGCTTTTAGAGCAACACAACAACGTACTCAAAATCAAGGCTTGAAT +GCGATGATACAAACAGATACAAATGTTCATTTATTCTTACATGATGAAAATATGCGCCAA +TTAGTTTCGTATGATGGTAAGCATTTTAAATTAAATAAAACAGATAAGACATATATAAAG diff --git a/t/data/expected_query_2.fna b/t/data/expected_query_2.fna new file mode 100644 index 0000000..70ca9a0 --- /dev/null +++ b/t/data/expected_query_2.fna @@ -0,0 +1,252 @@ +>abc|SC|contig000001 +ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT +ATACCCACCACATTTGGTGGAGAACCAAAAATTAGCCGAAAAACATCATTTCTGAAGTTA +TCGGCTAAAGTTATAAATTATATTTATTTGTACATGAACAAATAATTTACATTAATTTGT +CATTTCTTCTTTTTCCCAATCGATTTTATATCTTTCTGAAGAACGATCTGTCCATTTATC +TTTAGTATTGGTACCTTTCCAATTTGTTGAAGTCCAATGCAATTGGTAGTCATCACGAAC +TCGTTCGTATATTACATCTATATTTGTTTGTTGTTTGGATGCTTTTCTATCCATAGTAAT +AACTGTAGCGAAGTCTGGTGAAAACCCTGAAGATAATAGAGAACTTGCTTTGTTAGGATC +AAGGAAGTTCTCTGCTGCTTTCATAGAACCATTTCTAGTTTTCATGAAAAGTTGATTGCC +ATATACCGGGTTCCAAGAATCTCTATCATATGGTCCCCAATTTTGATTCACCATATTGTT +AAATATCACTTTCCAGCCTACTTTTTTATCAGTTGGGCTCTCTAAAATTGTTTTGAAATC +AGGTTGAACATATTTCAGTGTATGACCAATCGAAACATTTGCACCAATAAGGCCGCCAAT +TTTTCCTGTATCATCACCAGTAACATTACCGTTGAATCCATAAGTTAAAGTACTCATATA +CTCTTTTGTATCAATCGAATTTCTTGGATAGTAATCAGATATTTGAGCTACTTCATTATC +AGGTAGTTGCAACTGTACCTTAAAGGCTGAAGGCCAGGCTAAACCACTTTTGTTAGCACC +TTCTTCGCTATAAACTCTATATTGACCAGCAATGGTACCTTTCGTTCTAATAACTAGCAG +TTTTTTATTGTGATTTTTATCATCGATAAAACTATAAAATACTTTTTTGTGCATGCCATT +TTCTTTATCATAAGTGACTAAATCACCTGTTTTTACTGTAGTATTGCTTCCAATATCTGT +AGTACCGGTTTTAATATTAATATCAGAATCTGCGGCATTAGCGACAGGATTCATTAATAT +GGAACCTAGCAATAGTGTTGTTGTTACTGAGCTGACTATACGTGTTTTCATTTTCATCAT +CCTTCTATTTTTTTTAACGATTTGAGGAAACAATAATCAATATGTCAATTTAGAATATTG +CAGTTGAGACATATCAAATATTTATAATCGTTTATATTAGTATATTTAATGTAGCAACTG +ATAAATTACTGAGTGATGATGAGTGATTATTTTAAGAATATGTTTTTAACTTTTATTTAA +AATTTGAAAGGAAGCATTTCAATTTCGAGGGTTAGTCAAAGTTGAATAAATTCTTTATGA +AACAAGGAAAAGACATAGCTAATTTTATTGATTAATTTCTTTAAAACTAATGATTTGTTT +GATTTAAAAATGTAATCGATTACAATATAAAAATACAAATATCTTAGAATTAAATCAATT +AATTAACTATTAAATAAAAATTAAATATATATTAACTACTGTAAATTAATAAATAGAAAT +AGAGAAAAAGGGTATTAATTATGTTTGGAATTTCGTCGGGAATAATTTTTGCCATTACAT +AGAAATATCTAATAGATAATGAAAAAGTATCGTATGTATTTTTAATATAGTGTAAAATAT +CATATGTAAAATAAAATGTAGATTTTTAGTTAGAGGCATTATAAGAAAATTTTGAGTATA +GGTTAGCTTTTAATTATGAATCTTATTGAAATTTGATTAATAAATATATGATAGGGGATT +AAAATGAAACTATTTTATATCGTATTTCTTATTATTATATGGCTGAATATATTTTTAGGA +AATGAAATTATCCATACACTGACTGTTTTAATAACAACATTGTATATTGTTAATTCAAGA +AAGGGGATTAAAAATGACAGAGTTGAATAATATTATAAACTCTCTTCAATCTTTGTTTGA +GTCTGAATCAGGCTATAAAATTTCGAAAAATTCAGGAGTTCCATATCAAACAGTACAAGA +TTTAAGAAATGGGAAAACCAAACTAGAAGATGCTAGATTTAGAACGATTATTAAACTTTA +TAGTTACTATGTCTCATTAAAAGAACATTAATCATGGGACAATAAAGTATTGCTATAATA +ATTGAATCATTAAATGAAGGAATAGAGCCTAAGACATAATAAATCAATGTCTTAGGCTCT +ACAATATTATATTGGTAGTAGTTGACCGAATGAAAATGACCATGTAACAAGCATTTTTGG +GGCCCCAACACAGAAGCTGACGAAAAGTCAGCTTACAATAATGTGCAAGTTGGGGATGGG +CCCCAACACAGAGAATTTCAAAAAGAAATTCTACAGACAATGCAAGTTGGCGGGGCCCCA +ACATAGAGAAATTCAAAAAGAAATTCTACAGACAATGCAAGTTGGGGAGTGACAACGAAA +TAAATTTTATAAAATACCATTTCTGTCACATTCCTACTCCCAATTCCATTTAAATATATA +GAAAATTTCACCTATTATATACATCATACACTTTAAAAATTATCATTTAAATAATCTGAT +TTGGAATTAAAGTGCATGAAGTATAAGTCACCTTCATATACTAATCAAAGAGGACGTCAA +CAGTTATTTTATTAGGATTTTTAACATAAACATTTGCTAGATCTGAATGTAATCTTTTGC +TTAAATCAATAGTGTAGTTATTACCGCCACCGGTGATCTTAAGCTTACCTTTATTACGAT +TTTCGGTATATAATATTTTATTTTTTATTAACGCTTCTCGTGCACGGAAATCGATTTCTT +TCAATGTTAATACTGGTTTATTGCCTTGGTATATTTTATGCGCACCAATAATCGTTTGTA +GTTTATCTTTGTATTGCACAAAAAGATTATAAGTTTTATCAGAAGGTTTTGCGGCTGGTG +TAACGCCACCTGTAAATGTCTCTCTATAAGACCACCATAACTGATCAGTATCTTTGTCTT +TTAGTCCAAACACATCTACGTAACGATCTTTTAACTGATTAATATTTCCCCAACTTTCAG +CGCCCCATAAAGATATATGTGCTGACCATGAATATCTCTTAAGTTCAACATAAATGTTTC +CATTATCATATTGATATAGCCATTTATTTGAAAATGAAAAATGAGGCTGTGTGTAATATT +TAATTAATTCATTGATGTTAGTTTCATCTTGACCAATGCTATAAGCTTTAGCTTCAGAGT +AAAAACTAAATACTGGTTTTTGATTTTGAGGTAATACAGTACCTAATAGTAATAATGTTG +TCGTTAAAATTATATTTTTCGTGATGTTCTTACTCATTAGAACATCTCCTTTCAGAGGAA +TCATGATACGAGGAATAAGAAATTTAAATGTGAGCGAAGTCAATATAGTATTTGCGATTA +TTTTTATTAACTCCATTATTATTGTTAGTTTGATTTTTCGAGGATAACTTCAATTTTTGC +ATTTTGAGGTTTTTTAACATATCTATTTGCATCAGTTGATGGCAACCTTTTACTTAAATC +TATTGTGTAGTTATTGTCTGTACCTGTTATTTTAATTTGTCCTTTATTATAAGAATTATT +ATATAATTTTTTACTTTTAATTAATGTTTGACGAATACGAAAATCTAATTCTTTTAAAGT +TAAAACAGGCTTATTGCCTTCATAAACTGGAAATCCGCCAGTAAACGTTTCTGCTTTATC +TTTATATGTTACATTCAGTTTATAGTGTTTATCGTTAGATGTTGCTGCAGGAGTAACACC +ACCAGTAAACGTTTCTTGAGATAATGCAAAAGAATCAATGGTTTCTTGGTCTTTTATGCC +AAAAATATCAACGCTTTTATTTCTTAATTGGTTGATATTGCCCCAACTTTCAGGTCCATA +AACTTGAATATGACTATACCAAGAAAACTGTAACAACGTTGCATGAATCGTACCGTTATC +TTTTTGCCATAACGTACTGTTAGAGAAGGTTAAATATTTTTGCGAGTAATATTTAGTTAA +CTCATTAACGTTAGTTTCGTTTTGATTTATATAATAAGCTTTCGCTTCAGATGAAGAATT +GATAGGTGTATTAGGAAATTGTGTAAATGCTGTACCTAATAGTAACAATGTTGTTGATAA +AATAATTTTTTTCGTGATGTTATTGTTCATTCGAATTTCTCCTTTGAGTATTGTTGGAAT +GTTTAATTATAAAAAGGGGTTAATTAGATAATTGAAATTATCCGCATTTACAAAAGGTAA +TAGGTTAGTTAGATTTTTCGAGTATGACTTCAATTTGTGCATTTTTAGGATTTTTAACAT +AACGGTTTGTGTCAGTTAATTTTAACTTTTTACTTAAATCAATCGTGTAATTATTTCCAT +CAGCAGTTATCTTAATTTGACCTTTATTAAATTCTCCGTTATATAACTTTTTATTCTTTA +TTAATGTTTGACGAATACGGAAATCTAATTCTTTTAAAGTTAATACTGGTTTATTTCCTT +TGTAAAATTCATGTCCACCGATGATAGTTTGTTGTTTATCACTATATTTTAAAAATAGTC +TATAAGGTTTATCAGATGAAGTAGCTGCTGGCGTAACACCACCAGTAAATGTTTCATCAT +AAGTCCAGTAACCTTCAACTGTGTCCTCATCTTTAGTTCCAAATATATCAACGTATTTAT +TTCTTAACTGATTAATGTTTCCCCAACTCTCGGATCCAAACACTTGAATATGACTATACC +AAACCCACGTTTGCAATGTTGCATGAATGCTACCATTGGGCTTTTGCCATAGCCATTTTC +CAGATAATGAAAAATGCGGCTGAGTGTAATATTTGATTAACTCATTGATATTAGTCTCGT +TTTCACTGATATTATAGGCTTTTGCTTCAGATGAAAAACTGATTGGTGTTTTAGGAAGTT +GTGTTGATGTGGTTCCTAAAAGTAACAATGCTGTTGATAAAACTAATTTATTCATGATGT +TCTTTTTCATATGAAAATCTCCTTTGCGTGAATTACCCAAAGTATATAAGCTATTACACC +GATTCGGAATTAAATAAAAGCTAAAACTATGTTAAATAAACTTAAACAGTTAGTAGTGTT +ATTTAAGCAAAACTTATCATTTTTAAGTTTTGGACAGAAACAGTACTTAATAAAGTAGGC +GGGAGTTATATATTAAAAACGACACGTTACAATTATTCTCTAATCAATTGCATTAAATTG +TTTGATAATTGAATTTTCTAACTACTTGAAAAATAGTTATACTTTAAATGTAGTACTTAT +TTTAATTATTTCCTACTACTTAAATTTAATATTAATAAAATGTTCATTTAATTATTGATA +AAATATTACAAATTTTAATAGTAGGTTGTGTTTATTTTGTATGCGCTTACAATTTAGGTG +TAACTAAAATAAAAGGAGTTGTTATTAATGAAAAATTTACGAAACAGAAGTTTTTTAACT +TTATTAGACTTTTCACGACAAGAGGTAGAATTCTTATTAACACTCTCCGAGGATTTAAAA +CGTGCTAAATATATTGGCACTGAAAAGCCTATGTTAAAAAATAAAAATATTGCACTGTTA +TTTGAAAAAGATTCTACAAGAACGCGATGTGCATTTGAAGTTGCAGCGCATGATCAAGGT +GCAAATGTAACTTATTTAGGCCCAACTGGATCACAAATGGGTAAAAAAGAAACAACTAAA +GATACTGCACGTGTGCTTGGTGGAATGTATGATGGCATTGAATACCGTGGTTTTTCACAA +AGAACAGTAGAAACTTTAGCTGAAAATTCAGGCGTACCAGTGTGGAATGGTTTAACTGAT +GAAGATCATCCTACTCAAGTTCTTGCTGATTTCTTAACAGCAAAAGAAGTCTTAAAAAAA +GATTATGCAGATATTAACTTTACATATGTTGGAGATGGTCGTAATAACGTTGCAAATGCA +TTAATGCAAGGTGCTGCCATTATGGGTATGAACTTCCATTTAGTTTGTCCAAAAGAATTA +AATCCAACAGATGAATTATTAAATCGCTGTAAAAATATTGCCGCTGAAAATGGTGGCAAC +ATATTAATCACAGATGATATTGACCAAGGTGTAAAAGGTTCGGATGTAATTTACACTGAT +GTTTGGGTATCAATGGGTGAACCTGATGAAGTATGGAAAGAACGACTTGAATTATTGAAA +CCATATCAAGTAAATAAAGAAATGATGGATAAAACTGGTAATCCAAATGTTATTTTTGAG +CATTGCTTACCATCTTTCCATAATGCTGATACGAAAATTGGTCAACAAATTTTTGAAAAA +TATGGTATTCGAGAAATGGAAGTTACAGATGAAGTATTCGAAAGTAAAGCTTCAGTTGTA +TTCCAAGAAGCTGAGAACAGAATGCATACAATCAAAGCAGTCATGGTTGCTACATTGGGT +GAATTTTAAATGATATAAGGAAGTGAATATGATGGCGAAAATAGTAGTAGCATTAGGTGG +TAATGCTTTAGGAAAATCACCTCAAGAACAACTCGAGCTTGTTAAAAATACTGCGAAATC +ATTAGTAGGATTAATAACAAAAGGACATGAGATTGTTATTAGTCATGGTAATGGACCACA +GGTTGGAAGCATTAATTTGGGACTTAACTATGCTGCAGAACATAACCAAGGTCCGGCATT +TCCATTTGCTGAATGTGGCGCAATGAGTCAAGCTTACATCGGCTATCAATTACAAGAAAG +CTTACAAAATGAATTGCATTCTATTGGAATGGATAAACAAGTGGTAACACTAGTGACACA +AGTTGAAGTTGATGAAAATGATCCGGCATTTAACAATCCTTCAAAACCAATTGGGTTATT +TTACAACAAAGAAGAAGCTGAACAAATTCAAAAAGAAAAAGGATTTATATTTGTTGAAGA +TGCTGGAAGAGGATATAGACGCGTTGTTCCTTCACCACAACCCATCTCTATTATTGAATT +AGAGAGTATTAAAACACTTATTAAAAATGATACACTCGTTATTGCTGCTGGTGGTGGAGG +TATACCAGTAATTAGAGAGCAACATGATGGTTTTAAAGGTATTGATGCAGTTATAGACAA +AGATAAAACAAGTGCGTTGTTGGGTGCTAATATTCAATGCGATCAATTGATTATTTTAAC +AGCAATTGATTATGTATATATTAATTTTAACACTGAAAACCAACAGCCTTTGAAAACAAC +AAATGTTGATGAATTAAAACGATATATAGACGAAAATCAATTTGCAAAAGGAAGTATGTT +ACCAAAAATTGAAGCAGCCATATCATTTATTGAAAACAATCCAAAAGGAAGTGTGCTTAT +AACATCATTAAATGAATTAGATGCTGCCTTAGAGGGTAAAGTAGGTACTGTGATTAAAAA +GTAATTGAATTGAAACGCTTTTCAATTACTATATGTCAAATGCATGATTTTTATTATTTA +TGTGCACCCCCTGAAAATAATGCCTCTATTTTGATGCGGGGTGCACTTTCTTAATTTATA +TTTATAAAATCTTTAAGGTAGAAATGCTAGGTTAAATGTCGAAGGAGATGAAACCGTGGA +AAATACAATTAATGAAAGTGAAAAGAAAAAACGATTTAAATTAAAAATGCCAGGTGCATT +TATGATTTTATTCATTTTAACGGTTGTTGCAGTTATAGCAACATGGGTTATTCCTGCTGG +TGCATATTCTAAACTTTCTTACGAACCTTCATCCCAAGAACTAAAGATAGTTAACCCTCA +TAACCAAGTGAAAAAGGTTCCGGGTACGCAACAGGAACTAGACAAAATGGGGGTTAAAAT +TAAGATTGAACAATTTAAATCAGGTGCAATTAATAAGCCAGTATCAATTCCGAATACTTA +TGAAAGATTAAAGCAACATCCAGCTGGACCAGAACAAATAACAAGTAGCATGGTTGAAGG +TACGATAGAAGCGGTCGATATCATGGTATTCATTCTTGTACTAGGGGGACTTATTGGCGT +AGTTCAAGCCAGTGGTTCTTTTGAATCGGGATTGTTAGCTTTAACGAAGAAAACAAAAGG +GCATGAATTTATGCTAATTGTGTTTGTATCAATACTAATGATTATCGGCGGGACGTTATG +TGGTATTGAAGAAGAAGCTGTAGCATTCTATCCGATTTTAGTCCCTATATTTATAGCGTT +AGGATACGATTCTATCGTTTCAGTTGGTGCCATATTCCTTGCCAGCTCTGTCGGTAGTAC +ATTTTCAACTATTAACCCGTTCTCGGTTGTAATTGCCTCTAATGCCGCTGGTACAACTTT +TACGGATGGCTTGTATTGGAGAATAGGTGCTTGTATTGTCGGTGCGATTTTTGTTATTAG +TTATTTATATTGGTACTGTAAAAAAATTAAAAACGATCCTAAAGCGTCATATTCTTATGA +AGACAAAGATGCTTTTGAACAGCAATGGTCTGTATTAAAAGATGACGATAGTGCCCATTT +TACTTTGCGTAAGAAGATAATCCTTACATTATTTGTACTACCATTTCCAATTATGGTATG +GGGAGTTATGACGCAAGGTTGGTGGTTCCCAGTTATGGCTTCAGCATTTTTAATATTTAC +AATTATAATAATGTTTATTGCTGGGACAGGTAAATCTGGATTGGGAGAAAAAGGAACTGT +AGATGCATTTGTCAATGGTGCATCAAGTTTAGTAGGTGTATCTTTAATTATTGGTTTAGC +TCGAGGTATTAATTTAGTGTTGAATGAAGGTATGATTTCAGATACAATCTTACACTTTTC +ATCATCTTTAGTTCAACATATGAGTGGACCATTATTTATCATCGTATTACTATTTATTTT +CTTCTGTTTAGGTTTTATCGTGCCATCATCTTCTGGATTAGCGGTATTATCAATGCCTAT +CTTTGCACCACTAGCTGATACAGTAGGTATACCAAGATTCGTCATCGTTACGACATATCA +ATTTGGTCAATATGCGATGTTATTCTTAGCGCCGACTGGACTTGTTATGGCCACACTACA +AATGTTAAACATGCGATATTCACATTGGTTCCGATTTGTATGGCCGGTAGTTGCTTTTGT +ATTGATTTTCGGTGGCGGAGTACTAATTACGCAAGTACTAATTTATTCATAATTTGAAAT +GCTATATTATAAAAATACTAATTGGGTTTTATGCATCTCGTAGGTTTGTAGAAATACTAA +ACTAAGCGAGGTGCATTATTATTTTTGATTAAGAAAATAATGACGGTAATGATAACACTA +GTAAGTAATTGATACAATGCTCTATTTAATAATGATATTTTTAAAATTTGTTTTTAATGT +AATGTTAGATCTATGGTATATTATATTTATCGTGGTAAATATGTATTTGCTGTAAACAGT +TTTTGTAACACATGACACATATGGTTGTGAAAATAGCATATATAAAGGATGGCTATAAAT +GACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTTAG +TAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTGCT +AGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTACC +TTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAATTTATCGATATTTA +GAGGTGATAAAATTTGTCAACTATTACAAAGACTAAAATAAAAATCTTTATTAATTATTT +TATTGGTATATTAGCCTTGGCGTTTGCTGCTTATATATTATTTAAATAAAGGTTAGAGTG +AAACGTGTTTATGAACTAGACGTTCTAGTATAGTTACTACAGCTTTAGTCAGAAGGTATC +ATTGATAAGATCATATTAAATCAAAGAGGCATTGATATACACTAAAAAGAGGCAAGATTA +CCTGCCTCTTTTTTAGTTATTAAATATGCGTGTTAATTCTTGGTAATAGTGTTACTGAGA +CGCATTACGAAGAAATTTATCTTGATTTTCCTTTTTTAAAAAGAAGTGAAGATATCCTAT +AAAGACTCTAAGTACTATTACAGTGGCTAATAACACTTCGATAAACAAAAGACCTTTCCA +AATATCTGGAAACATAAGTACAGGCAAACTGTTCTTTAAAGCAGTTGCTGAGATTACTAA +AGGGAATGTGAAAGCTGAAAATACGGGCGAAAAAGGTTCTTTTAGTAATTTAGGCAATTG +TATAATGATATAAAAATAAAATATTTGAGCTAATATTAAAAATATAATTACGATAAAAGC +ATTCGCCTTAGGAAAGGCTATAACATATGCTGCAGCGACTAAAGAAAATGGTGCACAAAT +TGTCGATGTGTTCGGTTTGATTGACGTCTGCAATGGAAATGCTTTTAATCGCTTGAAAAC +AATAGGTAAGACAATACAAGTAGCTACAAAGCCATATATTACTGTTAATTGACCTATGAA +AAAACATCCGCTAACGGGTGCCGTCAATCCTGCGATAGCAATACCAATAAAAAGTACAGT +CCACGAAGGATAAACATTTTCAAGTGAAAAATCTTTTAAATATTTTATTGAAAAAATAAT +CATATGCGTCATAATTCCCACAAGGCATAAAATCCAAATAGGCGTTATTAAGCTATTGAT +AAAAGTTATGTTACTAAAAAATGTATTTAAATAAGTAGTACCTAAAAAGCCAGACATGAA +AAATGTTGTGAACACTGATGAAACTAGAGGACTGTTTAATTGTTCTTTCACATTATTAAA +ATATTTGATCATAGTACATAAAAGGTGAATCCAAATCAAGAAAGCAAAGATTCCGCAAAC +AGCGTTTAAAGTAAGAGATAAGTCTTTTAATAGATTACCCAGGCCTAATAATCCTAAGAC +TAGTCCTGACGTTACTAGAGGTGCTTTTTGAAGTCTCATGATTTACAACCTTTCTTATGT +GATTTTCTTCACTAATTATATCATGATAGCTATGGCCAATTAATAAGAAGAGTGTGTACT +ATTACGTTATTAGAGTATGTATATTGGATTGATAGACACATAATTGACATTTAAATCTCA +AAATTAATGATATTTGAGGTATGCTTTGCAACCTAAATATTGGAATATGTGGAAAAGTAA +TTATTTAATTTAAATGATGATTGAAGATTTACAAGGGGGTGTACAAAATGAGAAATCAAA +TTCAAAAACTATTAGACAGTGATTTGAGCAGTTTACATATATCGAAACAAACAGGAGTTC +CACAAAGCACAATACACAGAATGAGAAAAAAAGAAAGATCATTAGACAATATGTCATTGA +AAAACGCTGAACTACTTTATAAATTTGCCAATAGTATATTTAGCAATGAAAATTAAGAAG +AATAATTAGTGAGTTAAATAATTAAAAAACCACGTCTCGATGGACGTGGTTATTTTTTTA +AATGTATTTAATTATAGGATTTCGGGCCCCTGAAAAGTCCCTAAAATTTTGAATAGTATC +AAAATCTATAGGAATAATATAATAATAAAAATCTCTACGCATGCAAATGGTGAGTGTTAA +AAATCTTGGTGTAGTGTTGGTGTAGTTCAGGTGCAGTATAGATGAAATTTGAGCACAAAA +TTACTTAATTAAAAATGTATAAACAAGTGTCAGACGCGTGTCAATTGTGTGTCAAAAAAT +TCTATGACGAATAAGCATGCTTGTTTTAATTACTGTATTTTCGGGGCTTATGCGGGGGAC +GTTTTGGTGACGCATCATACTATATTACTGAAATTCAAAACAAAAAGAGCCCCGTAATCA +CGGAACTCTTTTGTTTGGTAATGCGTATAAAAATACCTATAAACGTCCTGGGAGGGATTC +GAACCCCCGACCGATGGCTTAGAAGGCCATTGCTCTATCCAGCTGAGCTACCAGGACACG +TTTAACAACACAAGAATTATTATATCTAAATGAACTTAAATTAGCAATACCTTGTAAATA +AAAAATGTTTATATTTTTCACTATTATAGAGCTATTTATCTAAAAAGGTTCAATAAGACT +TAAATGCGAATTCAGGCAACTTAATTGTGTTAAATACAGTTTTGAATGCCTAACTGTATT +TCTTTTCTCTTTAAAATACAGTTAAGTACATTATAAGATGTTGTGCGGATAAACAAACTA +ATTGCATCAAATTTATTTTAAAATAACAACAACAAAACGTTAAGCGAATAACATTTCGGT +GATTTAAAAGCTACGCACGTTTTTGTTATCTTCAAATTTAAATTTTAAGGAGTGTTTTCA +ATGGAAGGTTTATTTAACGCAATTAAAGATACCGTAACTGCAGCAATTAATAATGATGGC +GCAAAATTAGGCACAAGCATTGTGAGCATCGTTGAAAATGGCGTAGGTTTATTAGGTAAA +TTATTCGGATTCTAATTTCAATATGTTATGTAAGTAATCAGTATTATTTCAAAGGTGAGG +GAGAGATTTAAATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAAC +AACATGATAGTGTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTT +TACTAGGTAAATTATTTGGATTCTAATATAATAACTAATATTCTTTAAAATAAACTGGGT +GAGCATACTTTAATGTTATGCACTCAGTTTATTTTATTTGCAGAAATTTGAGCCTCTGTT +AAGATTTAGATACATAGACAATATAGGAGATGGGGAAATTGGGATATAAAAATATTTTGA +TAGACTTTGATGATACAATTGTTGATTTTTATGATGCAGAAGAATGGGCGTTTCACTATA +TGGCGAATGTTTTTAATCATAAAGCAACAAAGGATGATTTTTTAACATTTAAAAAAATCA +ATCACCAACATTGGGAAGCTTTTCAACAAAATAAATTAACGAAGTCTGAAGTATTATCAG +AACGATTTGTGAATTACTTCAAACATCATCAAATGGAAGTTGATGGGCATCGTGCAGATG +TGTTATTTAGAAATGGATTAGCAGAAGCTAAAGTTAAATACTTTGATCAAACATTAGAAA +CAATTGTCGAATTATCGAAAAGACATGATTTATATATTGTTACTAATGGTGTAACCGAAA +CGCAAAAGAGAAGGTTAAATCAGACGCCGTTGCATAAATATATTAAAAAGATATTTATAT +CTGAGGAAACAGGATATCAAAAACCTAATCCGGAATTTTTTAATTATGTTTTTAATGATA +TTGGTGAGGATGAAAGACAGCACTCGATTATAGTTGGAGATTCTTTAACATCTGACATTC +TAGGTGGAATCAATGCGGGTATAGCTACTTGCTGGTTTAATTTTAGAGGATTTGATCATA +ATCCAGGAATTATACCTGATTATGAAATTAATTCATGGAAACAACTAAATGATATTGTAC +GTTAAGACAAATAATTTGAAAATGATTTATATTAGCTTAAGAAAGTAATTGTAAAAACGA +CACTACGAAAAGATGCATTTTCATGCTCGTAGTGTCGAATATTATTATCTTATGGTTTCC +ACAATAAGACATCATGTCCCTCTATATTTTTAGCTTCCACATCTGTAAATCCATGATGTA +AAAAGAAATCCTTAGAATCATTTCTGCCAATGGCTTTAATTGGCATATTGAAACTCTTTG +CAAAATCAATCAATTCTGAAGCGTACCCTCTGTTTTGATATTTTGGTAATACTTCTAACT +TCCATAATAATATATAATCTTCAAAATCTGGGAAGTAGATTTCTTCGACATCACCTTTTT +TTAGTAATGCCATTCTAGCTCCTAATTGATCTCCGACAAATATGCCATAAAATGGTGAAT +CTGAACTTGCATCAATCATTTGACCGTTTAACTCATTGACCATGTATAAGTCTTTGTTGC +CAAACGCTCTAAAGTTTTCGAATAATTCGTCAGTTTTGTAATTAATTTCAAGACGTTTGA +TTTCACTCATATTTATCTACCCCTTATTTGTTTACTACTATTATTATACATTAAACCACT +TGTGTTTTTCATTGATTTGAAAATGTAAAACAATGAACATTATAAATTTTCCATGATTAA +ATTGATTTTCAAGCCGTAAAATTGTCAATCACCGGTATAGACTTTATACTATAGTTTGTA +ATCGCGTACTTAAGGAGAAAAAACATGGACTGTAAAGTAGTTAGTTTAAATGAAAAAGAT +CAGTTTATACCAAAAATAAAGAGCAGTGACCCTGTAATAACAGGATTATTTCAATATGAT +GCAGCTCAACAAACTAGTTTTGAAAAAAGGATGTCTAAAGAAAATAATGGAAGAGAAGCG +GCATTAGCGAATGTTATTCGTGAATATATGAGTGATTTAAAGCTTTCAAATGAACAAGAA +TTAAACATACAACATTTAGCTAATGGTTCAAAAGTTGTGATTGGTGGACAACAAGCAGGG +CTTTTCGGGGGACCATTGTATACATTCCATAAAATATTTTCAATCATTACTTTATCTAAG +GAATTAACGGATACACATAAGCAACAAGTAGTACCAGTTTTTTGGATTGCAGGAGAAGAT +CATGATTTCGATGAAGTGAATCATACATTTGTTTATAACGAAAATCATGGGTCGCTGCAT +AAGGTTAAATATCATACAATGGAGATGCCAGAGACGACAGTCTCTAGATATTATCCTGAT +AAGGCTGAGTTGAAACAAACTTTAAAAACGATGTTCATTCATATGAAAGAAACTGTTCAT +ACACAAGGTCTACTGGAGATTTGTGACAGAATTATTGACCAATATGACTCGTGGACTGAT +ATGTTTAAAGCACTACTGCATGAAACATTTAAAGCATATGGCGTTCTATTTATAGATGCG +CAGTTTGAGCCGTTAAGAAAAATGGAAGCGCCTATGTTTAAAAAGATTTTGAAAAAACAT +CAGTTGCTTGATGATGCTTTTAGAGCAACACAACAACGTACTCAAAATCAAGGCTTGAAT +GCGATGATACAAACAGATACAAATGTTCATTTATTCTTACATGATGAAAATATGCGCCAA +TTAGTTTCGTATGATGGTAAGCATTTTAAATTAAATAAAACAGATAAGACATATATAAAG diff --git a/t/data/kraken_test/database.idx b/t/data/kraken_test/database.idx new file mode 100644 index 0000000000000000000000000000000000000000..79a245164a8529fe2dbaef6121da2b6b364079e3 GIT binary patch literal 8208 zcmeH{J!=9%7=*7dX=1=4rDc9Vu&_6UV6M?LIZ}(J5M$v-AS3|=V?ZPyn%^6aeX50* z1#yMw*gQD1w|h6dJLzpLZDf^`#QPQa@ADM>%wQ4y++Yd)BET}f&r9^H04w-DtLWDT z>*zNIo9Hcrcj%eHd-QgI9ekf%^bj9dXR(KGv5&uNKC^p$HAek9AR3&r4`AM zwT*WoCHm$Ld$yv<{f_87sXl1MfuMC4c%6gu^>)H@UQa(EO+To#;nlBS;&uK~F86iG zxz1BZr9|d`Ms_EKMLCl0ZjZbBu8^nsx?LF`{5kQEp84d5GT%kz-%xtp-#g=YxcA53 zRp%H^cBhW&2R{?6;Bn2R!i!^0yCMbM-cxc3-uk79#bdWDbFSt^7eMQtd)ka4F>0 z-`WxNPAQWssa#IFyrc6?lRhvi;Mo_ewJId}nAUUb!glnh+n+RKj;|mdOG>|2tftt9 z%I+VWtU>%7_1b-QMT3Mg|sF;4@ntjt*w!7$zZomk9xlSq>h+7}_fczt(a*$VZzN@r}a&%y(>kbP$vYigSmF@E(pDYSg z@8LKGetbMKrbBam02Vk;JCVj(Y!5a(`}r^a$!reep{C zkV&4I@t7l@33i@(_<8xt;7#v)Z+qSv=kmyP@n0UV>+?m@*Df0bSzf0C&*VNVrLoIB zm`_wgj@cc;=OamBR4L7gh0%*fln42XmhKMqKl>|I&Cchgr4zS%lyvb48-^9u- zS02l&8)1(sib_t?9c}g`Mb6Lfwn1Fu$x-^@W=|jXVqT-)3+l9Qe|SYh+W((By!uXA z+!l4h8zOR2_-(tN|73V8kA`Dai%9a0tZ~ILHyA)~N zC&zsuU#c4?(G;M~&{O5MBHi8!kB8BB@$w=cQgFPvS=po`IHEY_X=Y>-DM21MmJb)! zq$YW>l}A>z`xewJzHczjkg3^ui218iT&Z29NXgzwjxhalkYTA?h_+ z*q!rYFLl8lnFXGM@rkHft%O~0(KmckWVaU{uh~D&*HARKcu+$`Qxv%GPvmWa#& zW)5h6A+EYiL9C!wZrf>zjOKwqwwXf`vtbrIq4gp~t71^_(2db7yDnuyt+8zHeuU&9 z!8x_*CYcUHq3`O=JBbY10w-)YPY!!g9UGtNGFv*~`8sI`*OZzJ_u-j%ZY)0%1#b(d zi09okV(?^+y(65a=EFVoOwDwgt_p^IaRkvB`YbjX=Q*k<Hg&L>WeiDMg~Hk3DUxU4x4G|!`1 zzrW3zx5ayLSsg2px<=4hL(qycT6BDv{a(9npLrU12C>e_0Idmf@Gq)bzd&0}n z{JpB<(90rG_KSa6tS8Phn-&~#wT%XGo{``cHue(mYi*!R@^1w0+wzds-FFFt(5u>B zpGqf6fxq0;D0M~h1vn2Q?1GmtgaG?@lS_&Gp%?prxa80t@~43 zX8U4J0A3x==~4eoXo8(SzMSv>Qr-htL(f>!lZgU0Xpf{f&G*1ws4s~j?3p3JT?Sc9 zHNSlcu7R!=q&$sY?1tm>7B6ORbA#djljesh*@J1IjXdTtO!-C*bCp&{5LvoWh<6TY zEi5iIfX|H`@{TLBdeG0Mg%4GB%OK}$k!;OcqYQlb0I?mEPy-;R^M2Os4!3rg@$G@0 z>l@7f1#La-xhb>A2>3~SHfs0G4EPNe!ZK+WJRiJ=CVcy99n?nFO-0kVX5duVeSODX z_CD-dJ=ud^y15R%35uZbmWfPT7ktK6!*98WMGNd=` zOSH7V1nx#b0ouK*${l9^f*-+?P-=n4mRMUwoZ*63$h3@6C(YpDJnMzUuNuEa;hiRI tX{KN_OUOT<`s@C2Y=>XBGwX{3Q~{{!xnY`g#f literal 0 HcmV?d00001 diff --git a/t/data/kraken_test/database.kdb b/t/data/kraken_test/database.kdb new file mode 100644 index 0000000000000000000000000000000000000000..14a89d0b02e59089f882389c84043b82ca66e83d GIT binary patch literal 2872 zcmXYzdsGwG7RHB62q7d)fS?hOP(TrgBq2{=OoC!1m4X7Twu*1C^;Kxlf}kMKD-{GP zB3>VWk1Jj-C{(H-u}%me+CX@6(N2=ltfZZ+~Z>z4s*f~L==O(a!iQ%(-v4}5J6w_%OqGreI1 z7w9eyXO8OzHwl*WxRx^E&?7-d~~fjY=9WT4Qq#*gi-OnTz*bsJ(O!XBrv39K6x57VF;HQ!$j^7aXv`X44xdQQxL<;i9m8Y~ z4o%iV0nR$jeuv@(qKH@SPE5%G$tSP5r`gY&2>Q=>)mtyWV0?v5w$pylv+GtHZ69Sa zmIM`j2E~`Zc7HsnarsDgVp&dQ+xJN&x}~9Q$uB}vhneeYD#9DM*`9~j+7ze{wR-v@ z{fitfyCUmR0$;9`N(SQBg+CALh}HsPh7Au;m}}WvVivcL*h>xQk~J=MM_VdZglzc8ciW4$c(=#yi~ja{(@-FizHwVG$o4rCbT02v8I@Jx#dxY3 za?a@#z8Fakr%0(TOq5nUqCCW3uw+jd=lpMI4J+5La?sk&J@(X&xUe#5s&p%bb-vp7 z`ZZHW|L}rr*v!3u6& zmtK;~{}NldVRnOBl=#!z!(*#XIfQR4skJ?;<+S}|Uz`5wrb)gn@_dr;;lJqAuI^J`me8+lz!Kty1MT+=Z*Z3Co0)kP$`kqZ zBdk$Hap_r_v)$h0sJXd4c944lDOxw&;_b&;#A^z8NtyQ7M^rYZ6V$ZK#l2Yw5jrjW z&i*Kc)>Riu$N8Y@@53e%c`?r683V+z*9Gaegi~j}=5AAfHby5!VnKCb$a#m=#&496#tX+j|LRfWW`yS)!gL|W?)%p+n#%bLGTXddfQ5-f;~X} z-&NffEzy?({|1iyrsiQ3m_5~r@tJOmwF~Y~55MFi3?+b^*@Bo%T{eq^@5@7!=tQ}d z6z5NQMf&@<-hi_-_dB%vEz^K|S>`E=$O!t_C5zvTCB6l0g0`-Kq?6WoESDNFRMjH` z{$(|hFjG4U80!2U8GWk-eT1fE^m=I~57*ZcG@{H_E$CO`tI>jes*~WXd=Yjj5LSZR zo+jiV4$m#%EaJf!rp|Q;c)mqLnI;Vt=vQsr1=FeX-(8VE>sYG-bwIC|DhFcC>uC5{ zO`Y=2YqL6_cU|Oz74DUR9vxSd@zoay;N2lwZtarUKM>;jTOsA659-{3-k>>}(LC1+ zJU6x|o3#XIkb_l{>{m{CoHwq2qY08p{tE$5Y9Ca*G8O@Ir3j8_oyf8Wv%w945EeqW z#q|+iClx_F0r0X$OHw4sBf^DJ?sV{JF&G};4>NQ3g=(XIw%NiQZ2lFtS9uj%I zeg5tq^cLAoDFgUOy^^Y}jZ~1EDc#NAQh5jDz}B>u!A;0A+;e@%3T$q2+#hGM(MA`I zWdeW7D>#%jGX(Bv57$-0GhC1lCe@A;sR~dNqU8!|Q4yEpv8eGFEFMR}Y*SGP)~Pih z*4&;AWf<8YRsgABi%c4T>$TEQt|=`Ay!&oif-H$m1-~zv43kWYA|MAXj1f@O3iwCW wPsLEV7I3dYv@0r5V*>X=`7nZEN)U_5=ui}@2K{Z%pd%)w2iX7cR~Ib*0GqCCyZ`_I literal 0 HcmV?d00001 diff --git a/t/data/kraken_test/taxonomy/names.dmp b/t/data/kraken_test/taxonomy/names.dmp new file mode 100644 index 0000000..120d91a --- /dev/null +++ b/t/data/kraken_test/taxonomy/names.dmp @@ -0,0 +1,77 @@ +1 | all | | synonym | +1 | root | | scientific name | +2 | Bacteria | Bacteria | scientific name | +2 | Monera | Monera | in-part | +2 | Procaryotae | Procaryotae | in-part | +2 | Prokaryota | Prokaryota | in-part | +2 | Prokaryotae | Prokaryotae | in-part | +2 | bacteria | bacteria | blast name | +2 | eubacteria | | genbank common name | +2 | not Bacteria Haeckel 1894 | | synonym | +2 | prokaryote | prokaryote | in-part | +2 | prokaryotes | prokaryotes | in-part | +1239 | Bacillus/Clostridium group | | synonym | +1239 | Clostridium group firmicutes | | synonym | +1239 | Firmacutes | | synonym | +1239 | Firmicutes | | scientific name | +1239 | Firmicutes corrig. Gibbons and Murray 1978 | | authority | +1239 | Gram positive bacteria | | misspelling | +1239 | Gram-positive bacteria | | genbank common name | +1239 | Low G+C firmicutes | | synonym | +1239 | clostridial firmicutes | | synonym | +1239 | firmicutes | firmicutes | blast name | +1239 | low G+C Gram-positive bacteria | | common name | +1239 | low GC Gram+ | low GC gram-positives | common name | +1279 | "Aurococcus" Winslow and Rogers 1906 | | authority | +1279 | Aurococcus | | synonym | +1279 | Staphylococcus | | scientific name | +1279 | Staphylococcus Rosenbach 1884 | | authority | +1280 | "Micrococcus aureus" (Rosenbach 1884) Zopf 1885 | | authority | +1280 | "Micrococcus pyogenes" Lehmann and Neumann 1896 | | authority | +1280 | "Staphlococcus pyogenes citreus" Passet 1885 | | authority | +1280 | "Staphylococcus pyogenes aureus" Rosenbach 1884 | | authority | +1280 | ATCC 12600 | | type material | +1280 | ATCC 12600-U | | type material | +1280 | CCM 885 | | type material | +1280 | CCUG 1800 | | type material | +1280 | CIP 65.8 | | type material | +1280 | DSM 20231 | | type material | +1280 | HAMBI 66 | | type material | +1280 | JCM 20624 | | type material | +1280 | Micrococcus aureus | | synonym | +1280 | Micrococcus pyogenes | | synonym | +1280 | NBRC 100910 | | type material | +1280 | NCAIM B.01065 | | type material | +1280 | NCCB 72047 | | type material | +1280 | NCTC 8532 | | type material | +1280 | Staphilococcus aureus | | misspelling | +1280 | Staphlococcus pyogenes citreus | | synonym | +1280 | Staphylococcus aureus | | scientific name | +1280 | Staphylococcus aureus Rosenbach 1884 | | authority | +1280 | Staphylococcus pyogenes aureus | | synonym | +1280 | Staphylococus aureus | | misspelling | +1280 | Streptococcus aureus | | misnomer | +1385 | Bacillales | | scientific name | +1385 | Bacillales Prevot 1953 | | authority | +1385 | Bacillus/Staphylococcus group | | synonym | +46170 | Staphylococcus aureus aureus | | equivalent name | +46170 | Staphylococcus aureus subsp. aureus | | scientific name | +46170 | Staphylococcus aureus subsp. aureus Rosenbach 1884 | | authority | +90964 | Staphylococcaceae | | scientific name | +90964 | Staphylococcaceae Schleifer and Bell 2010 | | authority | +90964 | Staphylococceae | | includes | +90964 | Staphylococceae Prevot 1940 | | includes | +90964 | Staphylococcus group | | synonym | +91061 | Bacilli | | scientific name | +91061 | Bacilli Ludwig et al. 2010 | | authority | +91061 | Bacillus/Lactobacillus/Streptococcus group | | synonym | +91061 | Firmibacteria | | synonym | +91061 | Firmibacteria Murray 1988 | | authority | +131567 | biota | | synonym | +131567 | cellular organisms | | scientific name | +663951 | Staphylococcus aureus subsp. aureus 0528 | | misspelling | +663951 | Staphylococcus aureus subsp. aureus 0582 | | synonym | +663951 | Staphylococcus aureus subsp. aureus TW20 | | scientific name | +663951 | Staphylococcus aureus subsp. aureus str. TW20 | | equivalent name | +663951 | Staphylococcus aureus subsp. aureus strain TW20 | | equivalent name | +2000000000 | Staphylococcus aureus | | scientific name | diff --git a/t/data/kraken_test/taxonomy/nodes.dmp b/t/data/kraken_test/taxonomy/nodes.dmp new file mode 100644 index 0000000..77545e1 --- /dev/null +++ b/t/data/kraken_test/taxonomy/nodes.dmp @@ -0,0 +1,12 @@ +1 | 1 | no rank | | 8 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | | +2 | 131567 | superkingdom | | 0 | 0 | 11 | 0 | 0 | 0 | 0 | 0 | | +1239 | 2 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +1279 | 90964 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +1280 | 1279 | species | SA | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | +1385 | 91061 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +46170 | 1280 | subspecies | SA | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | +90964 | 1385 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | +91061 | 1239 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +131567 | 1 | no rank | | 8 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | +663951 | 46170 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | +2000000000 | 663951 | no rank | HI | 9 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | From 18d9abbc19d463e814dd6431838d44e11fe0e634 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Wed, 20 May 2015 15:48:41 +0100 Subject: [PATCH 2/9] suppress output --- lib/Bio/Roary/QC/Report.pm | 2 +- t/Bio/Roary/QC/Report.t | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/Bio/Roary/QC/Report.pm b/lib/Bio/Roary/QC/Report.pm index fc7459e..e2101c6 100644 --- a/lib/Bio/Roary/QC/Report.pm +++ b/lib/Bio/Roary/QC/Report.pm @@ -69,7 +69,7 @@ sub _kraken_cmd { my $kcmd = $self->kraken_exec . " --fasta-input ". " --db " . $self->kraken_db . - " --output $kraken_output $a"; + " --output $kraken_output $a > /dev/null 2>&1"; return $kcmd; } diff --git a/t/Bio/Roary/QC/Report.t b/t/Bio/Roary/QC/Report.t index c3c5d44..3732066 100755 --- a/t/Bio/Roary/QC/Report.t +++ b/t/Bio/Roary/QC/Report.t @@ -44,7 +44,7 @@ ok( outfile => "kraken_report.csv", job_runner => "Local", kraken_db => 't/data/kraken_test/', - verbose => 1, + verbose => 0, ), 'QC report object created with data' ); @@ -86,7 +86,7 @@ SKIP: is_deeply( [ $qc_report_obj->_tmp_directory . '/query_1.kraken.report', $qc_report_obj->_tmp_directory . '/query_2.kraken.report' ], $kraken_report_files, 'check kraken report files are created from kraken files' ); - is_deeply([['query_1','Staphylococcus', 'aureus'],['query_2','Staphylococcus', 'aureus']],$qc_report_obj->_parse_kraken_reports($kraken_report_files),'check output report'); + is_deeply([['query_1','Staphylococcus', 'Staphylococcus aureus'],['query_2','Staphylococcus', 'Staphylococcus aureus']],$qc_report_obj->_parse_kraken_reports($kraken_report_files),'check output report'); } done_testing(); From b4fa74128ad15211ba1b5baf0c23d998504a0261 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Wed, 20 May 2015 15:53:39 +0100 Subject: [PATCH 3/9] check report file --- t/Bio/Roary/QC/Report.t | 7 +++++++ t/data/exp_qc_report_real.csv | 3 +++ 2 files changed, 10 insertions(+) create mode 100644 t/data/exp_qc_report_real.csv diff --git a/t/Bio/Roary/QC/Report.t b/t/Bio/Roary/QC/Report.t index 3732066..6b1da76 100755 --- a/t/Bio/Roary/QC/Report.t +++ b/t/Bio/Roary/QC/Report.t @@ -87,6 +87,13 @@ SKIP: $kraken_report_files, 'check kraken report files are created from kraken files' ); is_deeply([['query_1','Staphylococcus', 'Staphylococcus aureus'],['query_2','Staphylococcus', 'Staphylococcus aureus']],$qc_report_obj->_parse_kraken_reports($kraken_report_files),'check output report'); + + + ok( $qc_report_obj->report, 'report generated with real data' ); + ok( -e 'kraken_report.csv', 'report file exists with real data' ); + is( read_file('kraken_report.csv'), read_file("t/data/exp_qc_report_real.csv"), 'report file correct' ); + unlink('kraken_report.csv'); + } done_testing(); diff --git a/t/data/exp_qc_report_real.csv b/t/data/exp_qc_report_real.csv new file mode 100644 index 0000000..3d18a94 --- /dev/null +++ b/t/data/exp_qc_report_real.csv @@ -0,0 +1,3 @@ +Sample,Genus,Species +query_1,Staphylococcus,Staphylococcus aureus +query_2,Staphylococcus,Staphylococcus aureus From d4f1b677ddb3975b70e4e1b6aeb701873f3671be Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Wed, 20 May 2015 15:57:35 +0100 Subject: [PATCH 4/9] cleanup as you go along --- lib/Bio/Roary/QC/Report.pm | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lib/Bio/Roary/QC/Report.pm b/lib/Bio/Roary/QC/Report.pm index e2101c6..a23b749 100644 --- a/lib/Bio/Roary/QC/Report.pm +++ b/lib/Bio/Roary/QC/Report.pm @@ -109,6 +109,12 @@ sub _run_kraken_on_nuc_files cpus => $self->cpus ); $kraken_runner_obj->run(); + + for my $filename(@{$nuc_files}) + { + unlink($filename); + } + return \@kraken_output_files; } @@ -138,6 +144,10 @@ sub _run_kraken_report_on_kraken_files cpus => $self->cpus ); $kraken_runner_obj->run(); + for my $filename(@{$kraken_files}) + { + unlink($filename); + } return \@kraken_report_output_files; } From 7e81b1309d51ce68c5460924369d46e3b6de2734 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Wed, 20 May 2015 16:12:01 +0100 Subject: [PATCH 5/9] cleanup report files --- lib/Bio/Roary/QC/Report.pm | 2 ++ t/Bio/Roary/QC/Report.t | 1 + 2 files changed, 3 insertions(+) diff --git a/lib/Bio/Roary/QC/Report.pm b/lib/Bio/Roary/QC/Report.pm index a23b749..0c8b859 100644 --- a/lib/Bio/Roary/QC/Report.pm +++ b/lib/Bio/Roary/QC/Report.pm @@ -168,7 +168,9 @@ sub _parse_kraken_reports for my $kraken_report(@{$kraken_report_files}) { push(@report_rows, $self->_parse_kraken_report($kraken_report)); + unlink($kraken_report); } + return \@report_rows; } diff --git a/t/Bio/Roary/QC/Report.t b/t/Bio/Roary/QC/Report.t index 6b1da76..e4741ec 100755 --- a/t/Bio/Roary/QC/Report.t +++ b/t/Bio/Roary/QC/Report.t @@ -96,6 +96,7 @@ SKIP: } + done_testing(); From 7228b407a7e4d9d75e7862c21183154bd2c32d44 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Wed, 20 May 2015 16:12:14 +0100 Subject: [PATCH 6/9] remove unneeded tests --- t/Bio/Roary/QC/Kraken.t | 30 ----------------- t/Bio/Roary/QC/ShredAssemblies.t | 57 -------------------------------- 2 files changed, 87 deletions(-) delete mode 100644 t/Bio/Roary/QC/Kraken.t delete mode 100644 t/Bio/Roary/QC/ShredAssemblies.t diff --git a/t/Bio/Roary/QC/Kraken.t b/t/Bio/Roary/QC/Kraken.t deleted file mode 100644 index 81b8ee4..0000000 --- a/t/Bio/Roary/QC/Kraken.t +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env perl -use strict; -use warnings; - -BEGIN { unshift( @INC, './lib' ) } - -BEGIN { - use Test::Most; - use_ok('Bio::Roary::QC::Kraken'); -} - -ok( - my $kraken_obj = Bio::Roary::QC::Kraken->new( - assembly_directory => "t/data/kraken", - glob_search => "*.test.fa", - job_runner => "Local" - ), - 'kraken object created' -); - -my $exp = "kraken --db /lustre/scratch108/pathogen/pathpipe/kraken/minikraken_20140330/ --output test.kraken test.fa"; -is( $kraken_obj->_kraken_cmd( 'test.fa', 'test.kraken' ), $exp, 'kraken command correct' ); - -$exp = $exp = "kraken-report --db /lustre/scratch108/pathogen/pathpipe/kraken/minikraken_20140330/ test.kraken > test.kraken_report"; -is( $kraken_obj->_kraken_report_cmd( 'test.kraken', 'test.kraken_report' ), $exp, 'kraken-report command correct' ); - -$exp = [ 'Brucella', 'Brucella ceti' ]; -is_deeply( $kraken_obj->_parse_kraken_report( "t/data/kraken_report.txt" ), $exp, 'kraken report parsed fine' ); - -done_testing(); \ No newline at end of file diff --git a/t/Bio/Roary/QC/ShredAssemblies.t b/t/Bio/Roary/QC/ShredAssemblies.t deleted file mode 100644 index 6e575cd..0000000 --- a/t/Bio/Roary/QC/ShredAssemblies.t +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env perl -use strict; -use warnings; -use Data::Dumper; -use File::Slurp::Tiny qw(read_file write_file); -use File::Temp; -use Cwd; - -BEGIN { unshift( @INC, './lib' ) } - -BEGIN { - use Test::Most; - use_ok('Bio::Roary::QC::ShredAssemblies'); -} - -my $temp_directory_obj = File::Temp->newdir(DIR => getcwd, CLEANUP => 1 ); -my $tmp = $temp_directory_obj->dirname(); - -my $shred_obj; -ok( - $shred_obj = Bio::Roary::QC::ShredAssemblies->new( - gff_files => ['t/data/shred1.gff', 't/data/shred2.gff'], - read_size => 10, - output_directory => $tmp, - job_runner => "Local" - ), - 'shredding object created' -); -ok( $shred_obj->shred, 'data shredded' ); -ok( -e "$tmp/shred1.shred.fa", 'output file exists' ); -ok( -e "$tmp/shred2.shred.fa", 'output file exists' ); - -is( - read_file('t/data/shred1.shred.fa'), - read_file("$tmp/shred1.shred.fa"), - 'shredded file correct' -); -is( - read_file('t/data/shred2.shred.fa'), - read_file("$tmp/shred2.shred.fa"), - 'shredded file correct' -); - -my $exp = [ "AAAAA", "TTTTT", "CCCCC", "GGGGG" ]; -ok( - $shred_obj = Bio::Roary::QC::ShredAssemblies->new( - gff_files => ['t/data/shred1.fa', 't/data/shred2.fa'], - read_size => 5, - output_directory => $tmp, - job_runner => "Local" - ), - 'shredding object created' -); -my $got = $shred_obj->_shredded_seq("AAAAATTTTTCCCCCGGGGG "); -is_deeply $got, $exp, 'shredding correct'; - -done_testing(); \ No newline at end of file From 6a9187a412d0c44458797fd37ea32f37e19fa75a Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Wed, 20 May 2015 16:23:14 +0100 Subject: [PATCH 7/9] close FH --- lib/Bio/Roary/QC/Report.pm | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/Bio/Roary/QC/Report.pm b/lib/Bio/Roary/QC/Report.pm index 0c8b859..f5c0917 100644 --- a/lib/Bio/Roary/QC/Report.pm +++ b/lib/Bio/Roary/QC/Report.pm @@ -168,6 +168,10 @@ sub _parse_kraken_reports for my $kraken_report(@{$kraken_report_files}) { push(@report_rows, $self->_parse_kraken_report($kraken_report)); + } + + for my $kraken_report(@{$kraken_report_files}) + { unlink($kraken_report); } @@ -178,7 +182,7 @@ sub _parse_kraken_report { my ( $self, $kraken_report ) = @_; # parse report - open( REPORT, '<', $kraken_report ); + open( my $report_fh, '<', $kraken_report ); my $sample_name = $kraken_report; $sample_name =~ s/.report$//; @@ -186,7 +190,7 @@ sub _parse_kraken_report { my($sample_base_name, $dirs, $suffix) = fileparse($sample_name); my ( $top_genus, $top_species ); - while ( ){ + while ( <$report_fh> ){ my @parts = split( "\t" ); chomp @parts; @@ -195,6 +199,7 @@ sub _parse_kraken_report { last if (defined $top_genus && defined $top_species); } + close($report_fh); $top_genus ||= "not_found"; $top_genus =~ s/^\s+//g; From fa8c16bf36bf99ef7408696b9c20c34cae929766 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Thu, 21 May 2015 10:08:12 +0100 Subject: [PATCH 8/9] pass through kraken db --- README.md | 13 ++++++++++++- dist.ini | 7 ++++++- lib/Bio/Roary/CommandLine/CreatePanGenome.pm | 5 ++++- lib/Bio/Roary/CommandLine/Roary.pm | 10 +++++++--- lib/Bio/Roary/QC/Report.pm | 6 +++--- t/Bio/Roary/QC/Report.t | 17 +---------------- 6 files changed, 33 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 41ad5fe..3a45f23 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,6 @@ ls Roary-* ``` ###Add to your Environment - Add the following lines to your $HOME/.bashrc file, or to /etc/profile.d/roary.sh to make it available to all users: ``` @@ -55,3 +54,15 @@ export PERL5LIB=$PERL5LIB:$HOME/Roary-x.x.x/lib ``` cpanm Array::Utils BioPerl Exception::Class File::Find::Rule File::Grep File::Slurp::Tiny Graph Moose Moose::Role Text::CSV Log::Log4perl File::Which ``` + +#When things go wrong +###cdhit seg faults +Old versions of cdhit have a bug, so you need to use at least version 4.6.1. The cdhit packages for Ubuntu 12.04 seem to be effected, so [installing from the source](http://cd-hit.org/) is the only option. + +###I installed the homebrew Kraken package and now theres an error when I run the tests or QC +Theres a bug and you'll need to [install it from source](https://ccb.jhu.edu/software/kraken/) on older versions of OSX (like Mountain Lion). + +###Why dont you bundle a Kraken database for the QC? +Its massive (2.7GB) and changes as RefSeq is updated. The [authors](https://ccb.jhu.edu/software/kraken/) have prebuilt databases and details about how to make your own. + + diff --git a/dist.ini b/dist.ini index 8ecd975..0762d92 100644 --- a/dist.ini +++ b/dist.ini @@ -1,5 +1,5 @@ name = Bio-Roary -version = 2.2.1 +version = 2.2.2 author = Andrew J. Page license = GPL_3 copyright_holder = Wellcome Trust Sanger Institute @@ -32,5 +32,10 @@ requires = parallel [Encoding] filename = t/data/expected_set_difference_common_set_plot.png filename = t/data/expected_set_difference_unique_set_two_plot.png +filename = t/data/kraken_test/database.idx +filename = t/data/kraken_test/database.jdb +filename = t/data/kraken_test/database.kdb +filename = t/data/kraken_test/taxonomy/names.dmp +filename = t/data/kraken_test/taxonomy/nodes.dmp encoding = bytes diff --git a/lib/Bio/Roary/CommandLine/CreatePanGenome.pm b/lib/Bio/Roary/CommandLine/CreatePanGenome.pm index b461761..6f2053b 100644 --- a/lib/Bio/Roary/CommandLine/CreatePanGenome.pm +++ b/lib/Bio/Roary/CommandLine/CreatePanGenome.pm @@ -65,8 +65,11 @@ sub usage_text { # Include full annotation and inference in group statistics create_pan_genome --verbose_stats *.gff - # Increase the groups/clusters limit (default 50,000). Please check the QC results before running this + # Increase the groups/clusters limit (default 50,000). Please check the QC results before running this! create_pan_genome --group_limit 60000 *.gff + + # Use a different Kraken database + roary -k /path/to/kraken_database/ *.gff # This help message create_pan_genome -h diff --git a/lib/Bio/Roary/CommandLine/Roary.pm b/lib/Bio/Roary/CommandLine/Roary.pm index ca5488e..ab872ce 100644 --- a/lib/Bio/Roary/CommandLine/Roary.pm +++ b/lib/Bio/Roary/CommandLine/Roary.pm @@ -40,6 +40,7 @@ has 'translation_table' => ( is => 'rw', isa => 'Int', default => 11 ); has 'group_limit' => ( is => 'rw', isa => 'Num', default => 50000 ); has 'core_definition' => ( is => 'rw', isa => 'Num', default => 1 ); has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 ); +has 'kraken_db' => ( is => 'rw', isa => 'Str', default => '/lustre/scratch108/pathogen/pathpipe/kraken/minikraken_20140330/' ); has 'run_qc' => ( is => 'rw', isa => 'Bool', default => 0 ); @@ -51,7 +52,7 @@ sub BUILD { $max_threads, $dont_delete_files, $dont_split_groups, $perc_identity, $output_filename, $job_runner, $makeblastdb_exec, $mcxdeblast_exec, $mcl_exec, $blastp_exec, $apply_unknowns_filter, $cpus, $output_multifasta_files, $verbose_stats, $translation_table, - $run_qc, $core_definition, $help + $run_qc, $core_definition, $help, $kraken_db, ); GetOptionsFromArray( @@ -76,6 +77,7 @@ sub BUILD { 'dont_run_qc' => \$dont_run_qc, 'cd|core_definition=i' => \$core_definition, 'v|verbose' => \$verbose, + 'k|kraken_db=s' => \$kraken_db, 'h|help' => \$help, ); @@ -119,6 +121,7 @@ sub BUILD { $self->verbose_stats($verbose_stats) if ( defined $verbose_stats ); $self->translation_table($translation_table) if ( defined($translation_table) ); $self->group_limit($group_limit) if ( defined($group_limit) ); + $self->kraken_db($kraken_db) if ( defined($kraken_db) ); if ( defined($run_qc) ) { @@ -167,7 +170,8 @@ sub run { input_files => $self->fasta_files, job_runner => $self->job_runner, cpus => $self->cpus, - verbose => $self->verbose + verbose => $self->verbose, + kraken_db => $self->kraken_db ); $qc_input_files->report; } @@ -238,7 +242,7 @@ sub usage_text { # Generate QC report detailing top genus and species for each assembly # Requires Kraken to be installed - roary -qc *.gff + roary -k /path/to/kraken_database/ -qc *.gff # This help message roary -h diff --git a/lib/Bio/Roary/QC/Report.pm b/lib/Bio/Roary/QC/Report.pm index f5c0917..72119ae 100644 --- a/lib/Bio/Roary/QC/Report.pm +++ b/lib/Bio/Roary/QC/Report.pm @@ -16,14 +16,14 @@ with 'Bio::Roary::JobRunner::Role'; has 'input_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); has 'kraken_exec' => ( is => 'ro', isa => 'Str', default => 'kraken' ); has 'kraken_report_exec' => ( is => 'ro', isa => 'Str', default => 'kraken-report' ); -has 'kraken_db' => ( is => 'ro', isa => 'Str', default => '/lustre/scratch108/pathogen/pathpipe/kraken/minikraken_20140330/' ); +has 'kraken_db' => ( is => 'ro', isa => 'Str', required => 1 ); has 'outfile' => ( is => 'rw', isa => 'Str', default => 'qc_report.csv' ); has '_kraken_data' => ( is => 'rw', isa => 'ArrayRef', lazy_build => 1 ); has '_header' => ( is => 'rw', isa => 'Str', lazy_build => 1 ); has 'kraken_memory' => ( is => 'rw', isa => 'Int', default => 2000 ); has '_tmp_directory_obj' => ( is => 'rw', lazy_build => 1 ); -has '_tmp_directory' => ( is => 'rw', lazy_build => 1, isa => 'Str', ); +has '_tmp_directory' => ( is => 'rw', lazy_build => 1, isa => 'Str', ); sub _nuc_fasta_filename @@ -173,7 +173,7 @@ sub _parse_kraken_reports for my $kraken_report(@{$kraken_report_files}) { unlink($kraken_report); - } + } return \@report_rows; } diff --git a/t/Bio/Roary/QC/Report.t b/t/Bio/Roary/QC/Report.t index e4741ec..bb04600 100755 --- a/t/Bio/Roary/QC/Report.t +++ b/t/Bio/Roary/QC/Report.t @@ -25,6 +25,7 @@ ok( input_files => [], outfile => "kraken_report.csv", _kraken_data => $kraken_data, + kraken_db => 't/data/kraken_test/', job_runner => "Local" ), 'QC report object created with no input gff files' @@ -99,19 +100,3 @@ SKIP: done_testing(); - - - - - - - - - - - - - - - - From 01cb0facc0b81342aee6a874f05080ff30cfaac5 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Thu, 21 May 2015 10:33:48 +0100 Subject: [PATCH 9/9] make the sequence name generic --- t/Bio/Roary/CommandLine/Roary.t | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/t/Bio/Roary/CommandLine/Roary.t b/t/Bio/Roary/CommandLine/Roary.t index aced151..ef4dd10 100755 --- a/t/Bio/Roary/CommandLine/Roary.t +++ b/t/Bio/Roary/CommandLine/Roary.t @@ -71,7 +71,8 @@ SKIP: fasta_file => 'core_gene_alignment.aln', ), 'Check size of the core_gene_alignment.aln init'); - is($seq_len->sequence_lengths->{'11111_1#11'}, 58389, 'length of first sequence'); + my @keys = keys %{$seq_len->sequence_lengths}; + is($seq_len->sequence_lengths->{$keys[0]}, 58389, 'length of first sequence'); ok(-e 'accessory.tab'); ok(-e 'core_accessory.tab');