From 7014c661f4f6bcde5378b20a39e73f39a7b42485 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Thu, 21 May 2015 11:37:35 +0100 Subject: [PATCH 1/6] suppress warning output from mcl --- lib/Bio/Roary/External/Mcl.pm | 2 +- t/Bio/Roary/External/Mcl.t | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Bio/Roary/External/Mcl.pm b/lib/Bio/Roary/External/Mcl.pm index f3aa353..265d469 100644 --- a/lib/Bio/Roary/External/Mcl.pm +++ b/lib/Bio/Roary/External/Mcl.pm @@ -64,7 +64,7 @@ sub _command_to_run { " ", ( $self->mcxdeblast_exec, '-m9', '--score='.$self->_score, - '--line-mode=abc', $self->blast_results, + '--line-mode=abc', $self->blast_results, '2> /dev/null', '|', $self->mcl_exec, '-', '--abc', '-I', $self->_inflation_value, '-o', $self->output_file, $self->_logging diff --git a/t/Bio/Roary/External/Mcl.t b/t/Bio/Roary/External/Mcl.t index 58af5f5..b8f1a9c 100644 --- a/t/Bio/Roary/External/Mcl.t +++ b/t/Bio/Roary/External/Mcl.t @@ -28,7 +28,7 @@ ok( is( $obj->_command_to_run, $cwd - . '/t/bin/dummy_mcxdeblast -m9 --score=r --line-mode=abc some_blast_results | ' + . '/t/bin/dummy_mcxdeblast -m9 --score=r --line-mode=abc some_blast_results 2> /dev/null | ' . $cwd . '/t/bin/dummy_mcl - --abc -I 1.5 -o output.groups > /dev/null 2>&1', 'Command constructed as expected' From 8cc5b5215bac4f8dc95efc1b7acce4e9149d2247 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Thu, 21 May 2015 11:41:42 +0100 Subject: [PATCH 2/6] delete faa files --- lib/Bio/Roary/PostAnalysis.pm | 5 +++++ t/Bio/Roary/CommandLine/Roary.t | 35 +++++++++++++++++++-------------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/lib/Bio/Roary/PostAnalysis.pm b/lib/Bio/Roary/PostAnalysis.pm index efcc868..f3b829e 100644 --- a/lib/Bio/Roary/PostAnalysis.pm +++ b/lib/Bio/Roary/PostAnalysis.pm @@ -218,6 +218,11 @@ sub _delete_intermediate_files my ($self) = @_; return if($self->dont_delete_files == 1); + for my $fasta_file (@{$self->fasta_files}) + { + unlink($fasta_file) if(-e $fasta_file); + } + unlink($self->_output_mcl_filename) ; unlink($self->_output_inflate_clusters_filename) ; unlink($self->_output_group_labels_filename) ; diff --git a/t/Bio/Roary/CommandLine/Roary.t b/t/Bio/Roary/CommandLine/Roary.t index ef4dd10..53a0b66 100755 --- a/t/Bio/Roary/CommandLine/Roary.t +++ b/t/Bio/Roary/CommandLine/Roary.t @@ -53,6 +53,12 @@ cleanup_files(); ); mock_execute_script_and_check_output_sorted_groups( $script_name, \%scripts_and_expected_files, [0,6,7,8,9] ); + +# Make sure faa files are cleaned up automatically +ok(!(-e 'query_1.gff.proteome.faa'),'Check protein query_1.gff.proteome.faa is cleaned up'); +ok(!(-e 'query_2.gff.proteome.faa'),'Check protein query_2.gff.proteome.faa is cleaned up'); +ok(!(-e 'query_5.gff.proteome.faa'),'Check protein query_5.gff.proteome.faa is cleaned up'); + cleanup_files(); SKIP: @@ -89,32 +95,31 @@ done_testing(); sub cleanup_files { remove_tree('pan_genome_sequences'); + unlink('_clustered'); + unlink('_clustered.bak.clstr'); + unlink('accessory.header.embl'); + unlink('accessory.header.tab'); + unlink('accessory.tab'); + unlink('blast_identity_frequency.Rtab'); unlink('clustered_proteins'); + unlink('core_accessory.header.embl'); + unlink('core_accessory.header.tab'); + unlink('core_accessory.tab'); + unlink('core_gene_alignment.aln'); unlink('database_masking.asnb'); unlink('example_1.faa.tmp.filtered.fa'); unlink('example_2.faa.tmp.filtered.fa'); unlink('example_3.faa.tmp.filtered.fa'); unlink('gene_presence_absence.csv'); - unlink('query_1.gff.proteome.faa'); - unlink('query_2.gff.proteome.faa'); - unlink('query_3.gff.proteome.faa'); - unlink('_clustered'); - unlink('_clustered.bak.clstr'); - unlink('pan_genome.fa'); - unlink('core_accessory.header.tab'); - unlink('accessory.header.tab'); - unlink('accessory.tab'); - unlink('core_accessory.tab'); unlink('number_of_conserved_genes.Rtab'); unlink('number_of_genes_in_pan_genome.Rtab'); unlink('number_of_new_genes.Rtab'); unlink('number_of_unique_genes.Rtab'); + unlink('pan_genome.fa'); + unlink('query_1.gff.proteome.faa'); + unlink('query_2.gff.proteome.faa'); + unlink('query_3.gff.proteome.faa'); unlink('query_5.gff.proteome.faa'); - unlink('core_gene_alignment.aln'); - unlink('blast_identity_frequency.Rtab'); unlink('real_data_1.gff.proteome.faa'); unlink('real_data_2.gff.proteome.faa'); - unlink('accessory.header.embl'); - unlink('core_accessory.header.embl'); - } \ No newline at end of file From cf316003863842e72dbad405d62208cd3fe57fe5 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Thu, 21 May 2015 12:03:52 +0100 Subject: [PATCH 3/6] check protein files not cleaned up --- t/Bio/Roary/CommandLine/Roary.t | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/t/Bio/Roary/CommandLine/Roary.t b/t/Bio/Roary/CommandLine/Roary.t index 53a0b66..88d0570 100755 --- a/t/Bio/Roary/CommandLine/Roary.t +++ b/t/Bio/Roary/CommandLine/Roary.t @@ -59,8 +59,18 @@ ok(!(-e 'query_1.gff.proteome.faa'),'Check protein query_1.gff.proteome.faa is c ok(!(-e 'query_2.gff.proteome.faa'),'Check protein query_2.gff.proteome.faa is cleaned up'); ok(!(-e 'query_5.gff.proteome.faa'),'Check protein query_5.gff.proteome.faa is cleaned up'); + +%scripts_and_expected_files = ( +'-j Local --dont_delete_files t/data/query_1.gff t/data/query_2.gff t/data/query_5.gff ' => + [ 'empty_file', 't/data/empty_file' ], + ); +mock_execute_script_and_check_output_sorted_groups( $script_name, \%scripts_and_expected_files, [0,6,7,8,9] ); +ok((-e 'query_1.gff.proteome.faa'),'Check protein query_1.gff.proteome.faa is not cleaned up'); +ok((-e 'query_2.gff.proteome.faa'),'Check protein query_2.gff.proteome.faa is not cleaned up'); +ok((-e 'query_5.gff.proteome.faa'),'Check protein query_5.gff.proteome.faa is not cleaned up'); + cleanup_files(); - + SKIP: { @@ -95,8 +105,18 @@ done_testing(); sub cleanup_files { remove_tree('pan_genome_sequences'); + unlink('_blast_results'); unlink('_clustered'); unlink('_clustered.bak.clstr'); + unlink('_clustered.clstr'); + unlink('_combined_files'); + unlink('_combined_files.groups'); + unlink('_fasta_files'); + unlink('_gff_files'); + unlink('_inflated_mcl_groups'); + unlink('_inflated_unsplit_mcl_groups'); + unlink('_labeled_mcl_groups'); + unlink('_uninflated_mcl_groups'); unlink('accessory.header.embl'); unlink('accessory.header.tab'); unlink('accessory.tab'); @@ -122,4 +142,5 @@ sub cleanup_files unlink('query_5.gff.proteome.faa'); unlink('real_data_1.gff.proteome.faa'); unlink('real_data_2.gff.proteome.faa'); + } \ No newline at end of file From 3e91966b5fcad975c3e32ffc95882b8281c3f2b7 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Thu, 21 May 2015 12:04:16 +0100 Subject: [PATCH 4/6] split groups should use temp directory module and close filehandles --- lib/Bio/Roary/SplitGroups.pm | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/lib/Bio/Roary/SplitGroups.pm b/lib/Bio/Roary/SplitGroups.pm index de56cec..4521d13 100644 --- a/lib/Bio/Roary/SplitGroups.pm +++ b/lib/Bio/Roary/SplitGroups.pm @@ -24,11 +24,11 @@ has 'outfile' => ( is => 'ro', isa => 'Str', required => 1 ); has 'iterations' => ( is => 'ro', isa => 'Int', default => 5 ); has 'dont_delete' => ( is => 'ro', isa => 'Bool', default => 0 ); -has '_outfile_handle' => ( is => 'ro', lazy_build => 1 ); has '_neighbourhood_size' => ( is => 'ro', isa => 'Int', default => 5 ); has '_group_filelist' => ( is => 'rw', isa => 'ArrayRef', lazy_build => 1 ); -has '_tmp_dir' => ( is => 'ro', isa => 'Str', default => 'split_groups' ); +has '_tmp_dir_object' => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } ); +has '_tmp_dir' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__tmp_dir' ); has '_analyse_groups_obj' => ( is => 'ro', lazy_build => 1 ); has '_genes_to_files' => ( is => 'ro', lazy_build => 1 ); @@ -45,11 +45,9 @@ has '_gene_files_temp_dir_obj' => has '_do_sorting' => ( is => 'rw', isa => 'Bool', default => 0 ); # set to 1 for testing only -sub _build__outfile_handle { - my ( $self ) = @_; - - open( my $fh, '>', $self->outfile ); - return $fh; +sub _build__tmp_dir { + my ($self) = @_; + return $self->_tmp_dir_object->dirname(); } sub _build__analyse_groups_obj { @@ -79,14 +77,6 @@ sub _build__group_filelist { return \@filelist; } -sub _make_tmp_dir { - my ( $self ) = @_; - my $dir = $self->_tmp_dir; - unless ( -e $dir ) { - make_path($dir) or die "Cannot make dir: $dir\n" ; - } -} - sub _build__genes_to_neighbourhood { my ( $self ) = @_; @@ -119,8 +109,6 @@ sub _build__genes_to_neighbourhood sub split_groups { my ( $self ) = @_; - $self->_make_tmp_dir; - # iteratively for my $x ( 0..($self->iterations - 1) ){ my ( $in_groups, $out_groups ) = $self->_get_files_for_iteration( $x ); @@ -177,8 +165,8 @@ sub _set_genes_to_groups { my %genes2groups; my $c = 0; - open( GFH, '<', $groupfile ); - while( my $line = ){ + open( my $gfh, '<', $groupfile ); + while( my $line = <$gfh> ){ chomp $line; my @genes = split( /\s+/, $line ); for my $g ( @genes ){ @@ -186,6 +174,7 @@ sub _set_genes_to_groups { } $c++; } + close($gfh); $self->_genes_to_groups( \%genes2groups ); } From 826730bdf6ba77d873ecda12daf68389d4fd23e3 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Thu, 21 May 2015 13:06:23 +0100 Subject: [PATCH 5/6] update version --- dist.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dist.ini b/dist.ini index 0762d92..5309b93 100644 --- a/dist.ini +++ b/dist.ini @@ -1,5 +1,5 @@ name = Bio-Roary -version = 2.2.2 +version = 2.2.3 author = Andrew J. Page license = GPL_3 copyright_holder = Wellcome Trust Sanger Institute From df023e0df3138b1f1e736879201e957f178c8938 Mon Sep 17 00:00:00 2001 From: andrewjpage Date: Thu, 21 May 2015 13:10:19 +0100 Subject: [PATCH 6/6] no need to manually delete temp directory --- lib/Bio/Roary/SplitGroups.pm | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/Bio/Roary/SplitGroups.pm b/lib/Bio/Roary/SplitGroups.pm index 4521d13..d039c18 100644 --- a/lib/Bio/Roary/SplitGroups.pm +++ b/lib/Bio/Roary/SplitGroups.pm @@ -156,8 +156,6 @@ sub split_groups { } close( $outfile_handle ); } - - remove_tree( $self->_tmp_dir ) unless ( $self->dont_delete ); } sub _set_genes_to_groups {