diff --git a/dist.ini b/dist.ini index 0762d92..5309b93 100644 --- a/dist.ini +++ b/dist.ini @@ -1,5 +1,5 @@ name = Bio-Roary -version = 2.2.2 +version = 2.2.3 author = Andrew J. Page license = GPL_3 copyright_holder = Wellcome Trust Sanger Institute diff --git a/lib/Bio/Roary/External/Mcl.pm b/lib/Bio/Roary/External/Mcl.pm index f3aa353..265d469 100644 --- a/lib/Bio/Roary/External/Mcl.pm +++ b/lib/Bio/Roary/External/Mcl.pm @@ -64,7 +64,7 @@ sub _command_to_run { " ", ( $self->mcxdeblast_exec, '-m9', '--score='.$self->_score, - '--line-mode=abc', $self->blast_results, + '--line-mode=abc', $self->blast_results, '2> /dev/null', '|', $self->mcl_exec, '-', '--abc', '-I', $self->_inflation_value, '-o', $self->output_file, $self->_logging diff --git a/lib/Bio/Roary/PostAnalysis.pm b/lib/Bio/Roary/PostAnalysis.pm index efcc868..f3b829e 100644 --- a/lib/Bio/Roary/PostAnalysis.pm +++ b/lib/Bio/Roary/PostAnalysis.pm @@ -218,6 +218,11 @@ sub _delete_intermediate_files my ($self) = @_; return if($self->dont_delete_files == 1); + for my $fasta_file (@{$self->fasta_files}) + { + unlink($fasta_file) if(-e $fasta_file); + } + unlink($self->_output_mcl_filename) ; unlink($self->_output_inflate_clusters_filename) ; unlink($self->_output_group_labels_filename) ; diff --git a/lib/Bio/Roary/SplitGroups.pm b/lib/Bio/Roary/SplitGroups.pm index de56cec..d039c18 100644 --- a/lib/Bio/Roary/SplitGroups.pm +++ b/lib/Bio/Roary/SplitGroups.pm @@ -24,11 +24,11 @@ has 'outfile' => ( is => 'ro', isa => 'Str', required => 1 ); has 'iterations' => ( is => 'ro', isa => 'Int', default => 5 ); has 'dont_delete' => ( is => 'ro', isa => 'Bool', default => 0 ); -has '_outfile_handle' => ( is => 'ro', lazy_build => 1 ); has '_neighbourhood_size' => ( is => 'ro', isa => 'Int', default => 5 ); has '_group_filelist' => ( is => 'rw', isa => 'ArrayRef', lazy_build => 1 ); -has '_tmp_dir' => ( is => 'ro', isa => 'Str', default => 'split_groups' ); +has '_tmp_dir_object' => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } ); +has '_tmp_dir' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__tmp_dir' ); has '_analyse_groups_obj' => ( is => 'ro', lazy_build => 1 ); has '_genes_to_files' => ( is => 'ro', lazy_build => 1 ); @@ -45,11 +45,9 @@ has '_gene_files_temp_dir_obj' => has '_do_sorting' => ( is => 'rw', isa => 'Bool', default => 0 ); # set to 1 for testing only -sub _build__outfile_handle { - my ( $self ) = @_; - - open( my $fh, '>', $self->outfile ); - return $fh; +sub _build__tmp_dir { + my ($self) = @_; + return $self->_tmp_dir_object->dirname(); } sub _build__analyse_groups_obj { @@ -79,14 +77,6 @@ sub _build__group_filelist { return \@filelist; } -sub _make_tmp_dir { - my ( $self ) = @_; - my $dir = $self->_tmp_dir; - unless ( -e $dir ) { - make_path($dir) or die "Cannot make dir: $dir\n" ; - } -} - sub _build__genes_to_neighbourhood { my ( $self ) = @_; @@ -119,8 +109,6 @@ sub _build__genes_to_neighbourhood sub split_groups { my ( $self ) = @_; - $self->_make_tmp_dir; - # iteratively for my $x ( 0..($self->iterations - 1) ){ my ( $in_groups, $out_groups ) = $self->_get_files_for_iteration( $x ); @@ -168,8 +156,6 @@ sub split_groups { } close( $outfile_handle ); } - - remove_tree( $self->_tmp_dir ) unless ( $self->dont_delete ); } sub _set_genes_to_groups { @@ -177,8 +163,8 @@ sub _set_genes_to_groups { my %genes2groups; my $c = 0; - open( GFH, '<', $groupfile ); - while( my $line = ){ + open( my $gfh, '<', $groupfile ); + while( my $line = <$gfh> ){ chomp $line; my @genes = split( /\s+/, $line ); for my $g ( @genes ){ @@ -186,6 +172,7 @@ sub _set_genes_to_groups { } $c++; } + close($gfh); $self->_genes_to_groups( \%genes2groups ); } diff --git a/t/Bio/Roary/CommandLine/Roary.t b/t/Bio/Roary/CommandLine/Roary.t index ef4dd10..88d0570 100755 --- a/t/Bio/Roary/CommandLine/Roary.t +++ b/t/Bio/Roary/CommandLine/Roary.t @@ -53,8 +53,24 @@ cleanup_files(); ); mock_execute_script_and_check_output_sorted_groups( $script_name, \%scripts_and_expected_files, [0,6,7,8,9] ); + +# Make sure faa files are cleaned up automatically +ok(!(-e 'query_1.gff.proteome.faa'),'Check protein query_1.gff.proteome.faa is cleaned up'); +ok(!(-e 'query_2.gff.proteome.faa'),'Check protein query_2.gff.proteome.faa is cleaned up'); +ok(!(-e 'query_5.gff.proteome.faa'),'Check protein query_5.gff.proteome.faa is cleaned up'); + + +%scripts_and_expected_files = ( +'-j Local --dont_delete_files t/data/query_1.gff t/data/query_2.gff t/data/query_5.gff ' => + [ 'empty_file', 't/data/empty_file' ], + ); +mock_execute_script_and_check_output_sorted_groups( $script_name, \%scripts_and_expected_files, [0,6,7,8,9] ); +ok((-e 'query_1.gff.proteome.faa'),'Check protein query_1.gff.proteome.faa is not cleaned up'); +ok((-e 'query_2.gff.proteome.faa'),'Check protein query_2.gff.proteome.faa is not cleaned up'); +ok((-e 'query_5.gff.proteome.faa'),'Check protein query_5.gff.proteome.faa is not cleaned up'); + cleanup_files(); - + SKIP: { @@ -89,32 +105,42 @@ done_testing(); sub cleanup_files { remove_tree('pan_genome_sequences'); + unlink('_blast_results'); + unlink('_clustered'); + unlink('_clustered.bak.clstr'); + unlink('_clustered.clstr'); + unlink('_combined_files'); + unlink('_combined_files.groups'); + unlink('_fasta_files'); + unlink('_gff_files'); + unlink('_inflated_mcl_groups'); + unlink('_inflated_unsplit_mcl_groups'); + unlink('_labeled_mcl_groups'); + unlink('_uninflated_mcl_groups'); + unlink('accessory.header.embl'); + unlink('accessory.header.tab'); + unlink('accessory.tab'); + unlink('blast_identity_frequency.Rtab'); unlink('clustered_proteins'); + unlink('core_accessory.header.embl'); + unlink('core_accessory.header.tab'); + unlink('core_accessory.tab'); + unlink('core_gene_alignment.aln'); unlink('database_masking.asnb'); unlink('example_1.faa.tmp.filtered.fa'); unlink('example_2.faa.tmp.filtered.fa'); unlink('example_3.faa.tmp.filtered.fa'); unlink('gene_presence_absence.csv'); - unlink('query_1.gff.proteome.faa'); - unlink('query_2.gff.proteome.faa'); - unlink('query_3.gff.proteome.faa'); - unlink('_clustered'); - unlink('_clustered.bak.clstr'); - unlink('pan_genome.fa'); - unlink('core_accessory.header.tab'); - unlink('accessory.header.tab'); - unlink('accessory.tab'); - unlink('core_accessory.tab'); unlink('number_of_conserved_genes.Rtab'); unlink('number_of_genes_in_pan_genome.Rtab'); unlink('number_of_new_genes.Rtab'); unlink('number_of_unique_genes.Rtab'); + unlink('pan_genome.fa'); + unlink('query_1.gff.proteome.faa'); + unlink('query_2.gff.proteome.faa'); + unlink('query_3.gff.proteome.faa'); unlink('query_5.gff.proteome.faa'); - unlink('core_gene_alignment.aln'); - unlink('blast_identity_frequency.Rtab'); unlink('real_data_1.gff.proteome.faa'); unlink('real_data_2.gff.proteome.faa'); - unlink('accessory.header.embl'); - unlink('core_accessory.header.embl'); } \ No newline at end of file diff --git a/t/Bio/Roary/External/Mcl.t b/t/Bio/Roary/External/Mcl.t index 58af5f5..b8f1a9c 100644 --- a/t/Bio/Roary/External/Mcl.t +++ b/t/Bio/Roary/External/Mcl.t @@ -28,7 +28,7 @@ ok( is( $obj->_command_to_run, $cwd - . '/t/bin/dummy_mcxdeblast -m9 --score=r --line-mode=abc some_blast_results | ' + . '/t/bin/dummy_mcxdeblast -m9 --score=r --line-mode=abc some_blast_results 2> /dev/null | ' . $cwd . '/t/bin/dummy_mcl - --abc -I 1.5 -o output.groups > /dev/null 2>&1', 'Command constructed as expected'