Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup files #126

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dist.ini
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name = Bio-Roary
version = 2.2.2
version = 2.2.3
author = Andrew J. Page <[email protected]>
license = GPL_3
copyright_holder = Wellcome Trust Sanger Institute
Expand Down
2 changes: 1 addition & 1 deletion lib/Bio/Roary/External/Mcl.pm
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ sub _command_to_run {
" ",
(
$self->mcxdeblast_exec, '-m9', '--score='.$self->_score,
'--line-mode=abc', $self->blast_results,
'--line-mode=abc', $self->blast_results, '2> /dev/null',
'|', $self->mcl_exec, '-', '--abc',
'-I', $self->_inflation_value, '-o', $self->output_file,
$self->_logging
Expand Down
5 changes: 5 additions & 0 deletions lib/Bio/Roary/PostAnalysis.pm
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,11 @@ sub _delete_intermediate_files
my ($self) = @_;
return if($self->dont_delete_files == 1);

for my $fasta_file (@{$self->fasta_files})
{
unlink($fasta_file) if(-e $fasta_file);
}

unlink($self->_output_mcl_filename) ;
unlink($self->_output_inflate_clusters_filename) ;
unlink($self->_output_group_labels_filename) ;
Expand Down
29 changes: 8 additions & 21 deletions lib/Bio/Roary/SplitGroups.pm
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ has 'outfile' => ( is => 'ro', isa => 'Str', required => 1 );
has 'iterations' => ( is => 'ro', isa => 'Int', default => 5 );
has 'dont_delete' => ( is => 'ro', isa => 'Bool', default => 0 );

has '_outfile_handle' => ( is => 'ro', lazy_build => 1 );
has '_neighbourhood_size' => ( is => 'ro', isa => 'Int', default => 5 );

has '_group_filelist' => ( is => 'rw', isa => 'ArrayRef', lazy_build => 1 );
has '_tmp_dir' => ( is => 'ro', isa => 'Str', default => 'split_groups' );
has '_tmp_dir_object' => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
has '_tmp_dir' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__tmp_dir' );

has '_analyse_groups_obj' => ( is => 'ro', lazy_build => 1 );
has '_genes_to_files' => ( is => 'ro', lazy_build => 1 );
Expand All @@ -45,11 +45,9 @@ has '_gene_files_temp_dir_obj' =>

has '_do_sorting' => ( is => 'rw', isa => 'Bool', default => 0 ); # set to 1 for testing only

sub _build__outfile_handle {
my ( $self ) = @_;

open( my $fh, '>', $self->outfile );
return $fh;
sub _build__tmp_dir {
my ($self) = @_;
return $self->_tmp_dir_object->dirname();
}

sub _build__analyse_groups_obj {
Expand Down Expand Up @@ -79,14 +77,6 @@ sub _build__group_filelist {
return \@filelist;
}

sub _make_tmp_dir {
my ( $self ) = @_;
my $dir = $self->_tmp_dir;
unless ( -e $dir ) {
make_path($dir) or die "Cannot make dir: $dir\n" ;
}
}

sub _build__genes_to_neighbourhood
{
my ( $self ) = @_;
Expand Down Expand Up @@ -119,8 +109,6 @@ sub _build__genes_to_neighbourhood
sub split_groups {
my ( $self ) = @_;

$self->_make_tmp_dir;

# iteratively
for my $x ( 0..($self->iterations - 1) ){
my ( $in_groups, $out_groups ) = $self->_get_files_for_iteration( $x );
Expand Down Expand Up @@ -168,24 +156,23 @@ sub split_groups {
}
close( $outfile_handle );
}

remove_tree( $self->_tmp_dir ) unless ( $self->dont_delete );
}

sub _set_genes_to_groups {
my ( $self, $groupfile ) = @_;

my %genes2groups;
my $c = 0;
open( GFH, '<', $groupfile );
while( my $line = <GFH> ){
open( my $gfh, '<', $groupfile );
while( my $line = <$gfh> ){
chomp $line;
my @genes = split( /\s+/, $line );
for my $g ( @genes ){
$genes2groups{$g} = $c;
}
$c++;
}
close($gfh);
$self->_genes_to_groups( \%genes2groups );
}

Expand Down
56 changes: 41 additions & 15 deletions t/Bio/Roary/CommandLine/Roary.t
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,24 @@ cleanup_files();
);

mock_execute_script_and_check_output_sorted_groups( $script_name, \%scripts_and_expected_files, [0,6,7,8,9] );

# Make sure faa files are cleaned up automatically
ok(!(-e 'query_1.gff.proteome.faa'),'Check protein query_1.gff.proteome.faa is cleaned up');
ok(!(-e 'query_2.gff.proteome.faa'),'Check protein query_2.gff.proteome.faa is cleaned up');
ok(!(-e 'query_5.gff.proteome.faa'),'Check protein query_5.gff.proteome.faa is cleaned up');


%scripts_and_expected_files = (
'-j Local --dont_delete_files t/data/query_1.gff t/data/query_2.gff t/data/query_5.gff ' =>
[ 'empty_file', 't/data/empty_file' ],
);
mock_execute_script_and_check_output_sorted_groups( $script_name, \%scripts_and_expected_files, [0,6,7,8,9] );
ok((-e 'query_1.gff.proteome.faa'),'Check protein query_1.gff.proteome.faa is not cleaned up');
ok((-e 'query_2.gff.proteome.faa'),'Check protein query_2.gff.proteome.faa is not cleaned up');
ok((-e 'query_5.gff.proteome.faa'),'Check protein query_5.gff.proteome.faa is not cleaned up');

cleanup_files();

SKIP:
{

Expand Down Expand Up @@ -89,32 +105,42 @@ done_testing();
sub cleanup_files
{
remove_tree('pan_genome_sequences');
unlink('_blast_results');
unlink('_clustered');
unlink('_clustered.bak.clstr');
unlink('_clustered.clstr');
unlink('_combined_files');
unlink('_combined_files.groups');
unlink('_fasta_files');
unlink('_gff_files');
unlink('_inflated_mcl_groups');
unlink('_inflated_unsplit_mcl_groups');
unlink('_labeled_mcl_groups');
unlink('_uninflated_mcl_groups');
unlink('accessory.header.embl');
unlink('accessory.header.tab');
unlink('accessory.tab');
unlink('blast_identity_frequency.Rtab');
unlink('clustered_proteins');
unlink('core_accessory.header.embl');
unlink('core_accessory.header.tab');
unlink('core_accessory.tab');
unlink('core_gene_alignment.aln');
unlink('database_masking.asnb');
unlink('example_1.faa.tmp.filtered.fa');
unlink('example_2.faa.tmp.filtered.fa');
unlink('example_3.faa.tmp.filtered.fa');
unlink('gene_presence_absence.csv');
unlink('query_1.gff.proteome.faa');
unlink('query_2.gff.proteome.faa');
unlink('query_3.gff.proteome.faa');
unlink('_clustered');
unlink('_clustered.bak.clstr');
unlink('pan_genome.fa');
unlink('core_accessory.header.tab');
unlink('accessory.header.tab');
unlink('accessory.tab');
unlink('core_accessory.tab');
unlink('number_of_conserved_genes.Rtab');
unlink('number_of_genes_in_pan_genome.Rtab');
unlink('number_of_new_genes.Rtab');
unlink('number_of_unique_genes.Rtab');
unlink('pan_genome.fa');
unlink('query_1.gff.proteome.faa');
unlink('query_2.gff.proteome.faa');
unlink('query_3.gff.proteome.faa');
unlink('query_5.gff.proteome.faa');
unlink('core_gene_alignment.aln');
unlink('blast_identity_frequency.Rtab');
unlink('real_data_1.gff.proteome.faa');
unlink('real_data_2.gff.proteome.faa');
unlink('accessory.header.embl');
unlink('core_accessory.header.embl');

}
2 changes: 1 addition & 1 deletion t/Bio/Roary/External/Mcl.t
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ ok(
is(
$obj->_command_to_run,
$cwd
. '/t/bin/dummy_mcxdeblast -m9 --score=r --line-mode=abc some_blast_results | '
. '/t/bin/dummy_mcxdeblast -m9 --score=r --line-mode=abc some_blast_results 2> /dev/null | '
. $cwd
. '/t/bin/dummy_mcl - --abc -I 1.5 -o output.groups > /dev/null 2>&1',
'Command constructed as expected'
Expand Down