Skip to content

Commit

Permalink
Merge pull request #103 from andrewjpage/stop_deep_recursion
Browse files Browse the repository at this point in the history
Stop deep recursion
  • Loading branch information
andrewjpage committed Mar 26, 2015
2 parents 32b4d5d + f9d9b2e commit ec9c313
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 100 deletions.
2 changes: 1 addition & 1 deletion dist.ini
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name = Bio-Roary
version = 2.0.4
version = 2.0.5
author = Andrew J. Page <[email protected]>
license = GPL_3
copyright_holder = Wellcome Trust Sanger Institute
Expand Down
4 changes: 2 additions & 2 deletions lib/Bio/Roary/QC/ShredAssemblies.pm
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ with 'Bio::Roary::JobRunner::Role';

has 'gff_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'read_size' => ( is => 'rw', isa => 'Int', default => 150 );
has 'max_reads_per_sequence' => ( is => 'rw', isa => 'Int',default => 10 );
has 'max_sequences' => ( is => 'rw', isa => 'Int', default => 10 );
has 'max_reads_per_sequence' => ( is => 'rw', isa => 'Int',default => 20 );
has 'max_sequences' => ( is => 'rw', isa => 'Int', default => 20 );
has 'output_directory' => ( is => 'rw', isa => 'Str', lazy_build => 1 );

sub _build_output_directory {
Expand Down
102 changes: 5 additions & 97 deletions lib/Bio/Roary/SplitGroups.pm
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ has 'fasta_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
has 'outfile' => ( is => 'ro', isa => 'Str', required => 1 );
has 'iterations' => ( is => 'ro', isa => 'Int', default => 10 );
has 'dont_delete' => ( is => 'ro', isa => 'Bool', default => 0 );
has 'max_recursion' => ( is => 'ro', isa => 'Int', default => 5 );

has '_outfile_handle' => ( is => 'ro', lazy_build => 1 );
has '_neighbourhood_size' => ( is => 'ro', isa => 'Int', default => 5 );
Expand Down Expand Up @@ -87,7 +88,7 @@ sub split_groups {
my @group = split( /\s+/, $line );

if( $self->_contains_paralogs( \@group ) ){
my @true_orthologs = @{ $self->_true_orthologs( \@group ) };
my @true_orthologs = @{ $self->_true_orthologs( \@group,$self->max_recursion ) };
push( @newgroups, @true_orthologs);
}
else {
Expand All @@ -105,52 +106,6 @@ sub split_groups {
close( $outfile_handle );
}

sub split_groups_old {
my ( $self ) = @_;

$self->_make_tmp_dir;

# iteratively
for my $x ( 0..($self->iterations - 1) ){
my ( $in_groups, $out_groups ) = $self->_get_files_for_iteration( $x );

# read in groups, check paralogs and split
my @newgroups;
my $any_paralogs = 0;
open( my $group_handle, '<', $in_groups );
while( my $line = <$group_handle> ){
my @group = split( /\s+/, $line );

if( $self->_contains_paralogs( \@group ) ){
$self->_set_genes_to_groups( $in_groups );
my @true_orthologs = @{ $self->_true_orthologs_old( \@group ) };
push( @newgroups, @true_orthologs);
$any_paralogs = 1;
}
else {
push( @newgroups, \@group );
}
}
close( $group_handle );

# check if next iteration required, move output if not
unless ($any_paralogs){
move $in_groups, $self->outfile; # input file will be the same as new output file if no splitting has been performed
last;
}

# write split groups to file
open( my $outfile_handle, '>', $out_groups );
for my $g ( @newgroups ) {
my $group_str = join( "\t", @{ $g } ) . "\n";
print $outfile_handle $group_str;
}
close( $outfile_handle );
}

remove_tree( $self->_tmp_dir ) unless ( $self->dont_delete );
}

sub _set_genes_to_groups {
my ( $self, $groupfile ) = @_;

Expand Down Expand Up @@ -221,7 +176,7 @@ sub _find_paralogs {
}

sub _true_orthologs {
my ( $self, $gs ) = @_;
my ( $self, $gs, $max_recursion ) = @_;

# first, create CGN hash for group
my %cgns;
Expand Down Expand Up @@ -262,8 +217,8 @@ sub _true_orthologs {

my @new_groups;
for my $g ( @split_groups ){
if( $self->_contains_paralogs( $g ) ){
my @true_orthologs = @{ $self->_true_orthologs( $g ) };
if( $self->_contains_paralogs( $g ) && $max_recursion > 0){
my @true_orthologs = @{ $self->_true_orthologs( $g,$max_recursion - 1) };
push( @new_groups, @true_orthologs);
}
else {
Expand All @@ -284,53 +239,6 @@ sub _true_orthologs {
return \@new_groups;
}

sub _true_orthologs_old {
my ( $self, $group ) = @_;

# first, create CGN hash for group
my %cgns;
for my $g ( @{ $group } ){
$cgns{$g} = $self->_parse_gene_neighbourhood( $g );
}

# finding paralogs in the group
my @paralogs = @{ $self->_find_paralogs( $group ) };
my @paralog_cgns;
for my $p ( @paralogs ){
push( @paralog_cgns, $cgns{$p} );
}

# create data structure to hold new groups
my @new_groups;
for my $p ( @paralogs ){
push( @new_groups, [ $p ] );
}
push( @new_groups, [] ); # extra "leftovers" array to gather genes that don't share CGN with anything

# cluster other members of the group to their closest match
for my $g ( @{ $group } ){
next if ( grep {$_ eq $g} @paralogs );
my $closest = $self->_closest_cgn( $cgns{$g}, \@paralog_cgns );
push( @{ $new_groups[$closest] }, $g );
}

# check for "leftovers", remove if absent
my $last = pop @new_groups;
push( @new_groups, $last ) if ( @$last > 0 );

# sort
if ( $self->_do_sorting ){
my @sorted_new_groups;
for my $gr ( @new_groups ){
my @s_gr = sort @{ $gr };
push( @sorted_new_groups, \@s_gr );
}
return \@sorted_new_groups;
}

return \@new_groups;
}

sub _closest_cgn {
my ( $self, $cgn, $p_cgns ) = @_;

Expand Down

0 comments on commit ec9c313

Please sign in to comment.