diff --git a/lib/CXGN/Phylo/IdTaxonMap.pm b/lib/CXGN/Phylo/IdTaxonMap.pm
new file mode 100644
index 00000000..0c6cc2df
--- /dev/null
+++ b/lib/CXGN/Phylo/IdTaxonMap.pm
@@ -0,0 +1,70 @@
+package CXGN::Phylo::IdTaxonMap;
+
+use strict;
+use List::Util qw ( min max sum );
+
+my %default_map =
+ (
+ '^AT' => 'arabidopsis',
+ '^Bradi' => 'brachypodium',
+ '^(?:X_)?\d{5}[.]m\d{6}' => 'castorbean', # can have X_ prefix or not
+ '^GSVIV' => 'grape',
+ '^(?:GRMZM|AC\d{6})' => 'maize',
+ '^IMGA[|_](?:Medtr|AC|CU)' => 'medicago',
+ '^evm' => 'papaya',
+ '^POPTR' => 'poplar',
+ '^LOC_Os' => 'rice',
+ '^jgi_Selmo' => 'selaginella',
+ '^Sb' => 'sorghum',
+ '^Glyma' => 'soybean',
+ '^Solyc' => 'tomato'
+ );
+
+
+sub new {
+ my $class = shift;
+ my $arg = shift; # hash ref, keys are regular expressions, values are corresponding taxon names.
+ my $args= {};
+ my $self = bless $args, $class;
+
+ my %map = ();
+ foreach (keys %default_map) {
+ $map{$_} = $default_map{$_};
+ }
+
+ foreach (keys %$arg) {
+ $map{$_} = $arg->{$_};
+ }
+
+ $self->{map} = \%map;
+ return $self;
+}
+
+sub get_map{
+ my $self = shift;
+ return $self->{map};
+}
+
+
+sub add_idregex_taxonname{
+ my $self = shift;
+ my $idregex = shift;
+ my $taxonname = shift;
+ $self->{map}->{$idregex} = $taxonname;
+}
+
+
+sub id_to_taxonname{
+ my $self = shift;
+ my $id = shift;
+
+ my $map = $self->get_map();
+ foreach (keys %$map) {
+# print "id regex: $_\n";
+ return $map->{$_} if($id =~ /$_/);
+ }
+ warn "No taxon name found for id: $id\n";
+ return;
+}
+
+1;
diff --git a/lib/CXGN/Phylo/Layout.pm b/lib/CXGN/Phylo/Layout.pm
index 76ae4ae0..be508be6 100644
--- a/lib/CXGN/Phylo/Layout.pm
+++ b/lib/CXGN/Phylo/Layout.pm
@@ -357,8 +357,7 @@ sub _layout_vertical {
my $total_leaves = $self->get_tree()->get_leaf_count();
my $image_height = $self->get_image_height();
if ($total_leaves < 1) { return; }
- my $vertical_gap = ($image_height - $self->get_top_margin() - $self->get_bottom_margin()) / ($total_leaves);
-
+ my $vertical_gap = ($total_leaves > 1)?($image_height - $self->get_top_margin() - $self->get_bottom_margin()) / ($total_leaves-1): 0;
# the leaf nodes should be easy.
# traverse the tree, find all the leaves, and set the vertical
# coordinate in the leaves to an increasing value with step $vertical_gap.
diff --git a/lib/CXGN/Phylo/Mrbayes.pm b/lib/CXGN/Phylo/Mrbayes.pm
new file mode 100644
index 00000000..a98ac963
--- /dev/null
+++ b/lib/CXGN/Phylo/Mrbayes.pm
@@ -0,0 +1,671 @@
+package Mrbayes;
+use strict;
+use List::Util qw ( min max sum );
+#use TlyUtil qw ( Kolmogorov_Smirnov_D );
+
+# this is an object to facilitate running MrBayes (Bayesian phylogeny
+# inference program). The main functionality it adds is a relatively
+# easy way to control the criteria for deciding when to end a run.
+
+my $default_chunk_size = 2000;
+
+sub new {
+ my $class = shift;
+ my $arg = shift; # a hashref for setting various options
+ my $default_arguments = {'alignment_nex_filename' => undef,
+ 'file_basename' => undef,
+ 'seed' => undef,
+ 'swapseed' => undef,
+ 'n_runs' => 2,
+ 'n_temperatures' => 4,
+ 'temperature_gap' => 0.25,
+ 'chunk_size' => $default_chunk_size,
+ 'print_freq' => undef,
+ 'sample_freq' => 20,
+ 'burnin_frac' => 0.1,
+ 'diag_freq' => undef,
+ 'converged_chunks_required' => 10,
+
+ 'fixed_pinvar' => undef, # undef -> leaves default of uniform(0,1) in effect
+ # convergence criteria
+ 'splits_min_hits' => 25,
+ 'splits_max_ok_stddev' => 0.03,
+ 'splits_max_ok_avg_stddev' => 0.01,
+ 'modelparam_min_ok_ESS' => 250,
+ 'modelparam_max_ok_PSRF' => 1.02,
+ 'modelparam_max_ok_KSD' => 0.2,
+ 'ngens_run' => 0,
+ 'id_species_map' => {}
+ };
+ my $self = bless $default_arguments, $class;
+
+ foreach my $option (keys %$arg) {
+ warn "Unknown option: $option in Mrbayes constructor.\n" if(!exists $self->{$option});
+ $self->{$option} = $arg->{$option};
+ }
+ $self->{print_freq} = $self->{chunk_size} if(!defined $self->{print_freq});
+ $self->{diag_freq} = $self->{chunk_size} if(!defined $self->{diag_freq});
+ # print "print, diag freq: ", $self->{print_freq}, " ", $self->{diag_freq}, "\n";
+
+ my $alignment_nex_filename = $self->{alignment_nex_filename};
+ # if(defined $self->{file_basename}){
+ # my $file_basename;
+ if (!defined $self->{file_basename}) {
+ my $file_basename = $alignment_nex_filename;
+ $file_basename =~ s/[.]nex$//; # delete .nex ending
+ $self->{file_basename} = $file_basename;
+ }
+ my $n_runs = $self->{n_runs};
+ my $burnin_frac = $self->{burnin_frac};
+ my $n_temperatures = $self->{n_temperatures};
+ my $temperature_gap = $self->{temperature_gap};
+ my $sample_freq = $self->{sample_freq};
+ my $print_freq = $self->{print_freq};
+ my $chunk_size = $self->{chunk_size};
+ my $fixed_pinvar = $self->{fixed_pinvar};
+ my $prset_pinvarpr = (defined $fixed_pinvar)? "prset pinvarpr=fixed($fixed_pinvar);\n" : '';
+
+ my $begin_piece =
+ "begin mrbayes;\n" .
+ "set autoclose=yes nowarn=yes;\n";
+ my $seed_piece = '';
+ if (defined $self->{seed}) {
+ my $seed = $self->{seed}; $seed_piece .= "set seed=$seed;\n";
+ }
+ if (defined $self->{swapseed}) {
+ my $swapseed = $self->{swapseed}; $seed_piece .= "set swapseed=$swapseed;\n";
+ }
+ my $middle_piece = "execute $alignment_nex_filename;\n" .
+ "set precision=6;\n" .
+ "lset rates=invgamma;\n" .
+ "prset aamodelpr=fixed(wag);\n" .
+ "$prset_pinvarpr" .
+ # "prset pinvarpr=fixed(0.15);\n" .
+ "mcmcp minpartfreq=0.02;\n" . # bipartitions with freq. less than this are not used in the diagnostics (default is 0.10)
+ "mcmcp allchains=yes;\n" .
+ "mcmcp burninfrac=$burnin_frac;\n" .
+ "mcmcp nchains=$n_temperatures;\n" .
+ "mcmcp nruns=$n_runs;\n" .
+ "mcmcp temp=$temperature_gap;\n" .
+ "mcmcp samplefreq=$sample_freq;\n" .
+ "mcmcp printfreq=$print_freq;\n" .
+ # "mcmcp filename=$file_basename;\n" .
+ "mcmcp checkpoint=yes checkfreq=$chunk_size;\n";
+ my $end_piece = "sump;\n" . "sumt;\n" . "end;\n";
+
+ $self->{mrbayes_block1} =
+ $begin_piece . $seed_piece .
+ $middle_piece . "mcmc ngen=$chunk_size;\n" .
+ $end_piece;
+
+ $self->{mrbayes_block2} =
+ $begin_piece . $middle_piece .
+ "mcmc append=yes ngen=$chunk_size;\n" .
+ $end_piece;
+
+ $self->setup_id_species_map();
+
+ return $self;
+}
+
+sub run{
+ my $self = shift;
+
+ my $chunk_size = $self->{chunk_size};
+ my $ngen = $chunk_size;
+ my $mrbayes_block1 = $self->{mrbayes_block1};
+
+ open my $fh, ">tmp_mrb1.nex";
+ print $fh "$mrbayes_block1";
+ close $fh;
+
+ my $mb_output_string = `mb tmp_mrb1.nex`;
+ $self->{ngens_run} = $ngen;
+
+ my $mc3swap_filename = $self->{file_basename} . ".mc3swap";
+ open my $fhmc3, ">$mc3swap_filename";
+ print $fhmc3 "$ngen ", $self->extract_swap_info($mb_output_string), "\n";
+
+ open $fh, ">mb1.stdout";
+ print $fh "$mb_output_string \n";
+ close $fh;
+
+ my ($converged, $conv_string) = $self->test_convergence($self->{file_basename});
+ my $converge_count += $converged;
+ open my $fhc, ">MB.converge";
+ print $fhc "$ngen $converge_count $conv_string\n";
+
+ foreach (my $i=1; $i>0; $i++) { # infinite loop
+ $ngen += $chunk_size;
+ my $mrbayes_block2 = $self->{mrbayes_block2};
+ $mrbayes_block2 =~ s/ngen=\d+;/ngen=$ngen;/; # subst in the new ngen
+
+ open $fh, ">tmp_mrb2.nex";
+ print $fh "$mrbayes_block2";
+ close $fh;
+
+ $mb_output_string = `mb tmp_mrb2.nex`;
+
+ $self->{ngens_run} = $ngen;
+
+ print $fhmc3 "$ngen ", $self->extract_swap_info($mb_output_string), "\n";
+ open $fh, ">mb2.stdout";
+ print $fh "$mb_output_string \n";
+ close $fh;
+
+ ($converged, $conv_string) = $self->test_convergence($self->{file_basename});
+ $converge_count += $converged;
+ print $fhc "$ngen $converge_count $conv_string\n";
+ last if($converge_count >= $self->{converged_chunks_required});
+ }
+ close $fhmc3; close $fhc;
+ return;
+}
+
+
+sub splits_convergence{
+ my $self = shift;
+ my $file_basename = shift; # e.g. fam9877
+ my $min_hits = $self->{splits_min_hits}; # ignore splits with fewer hits than this.
+ my $max_ok_stddev = $self->{splits_max_ok_stddev}; # convergence is 'ok' for a split if stddev < this.
+ my $max_ok_avg_stddev = $self->{splits_max_ok_avg_stddev}; # convergence is 'ok' for a split if stddev < this.
+ #print "in splits convergence file basename: ", $file_basename, "\n"; #exit;
+
+ my $filename = $file_basename . ".nex.tstat";
+ # print "filename: $filename\n";
+ open my $fh, "<$filename";
+ my @lines = <$fh>;
+
+ my ($avg_stddev, $count, $bad_count) = (0, 0, 0);
+ foreach (@lines) {
+ # print;
+ next unless(/^\s*\d/); # skip if first non-whitespace is not numeral.
+ my @cols = split(" ", $_);
+ my $hits = $cols[1]; my $stddev = $cols[3];
+ # print "$hits, $min_hits, $stddev\n";
+ last if($hits < $min_hits);
+ $count++;
+ $avg_stddev += $stddev;
+ if ($stddev > $max_ok_stddev) {
+ $bad_count++;
+ next;
+ }
+ }
+ $avg_stddev = ($count == 0)? 100 : $avg_stddev/$count;
+ my $splits_converged = ($bad_count == 0 and $avg_stddev < $max_ok_avg_stddev);
+ return ($splits_converged, $count, $bad_count, $avg_stddev);
+}
+
+
+sub modelparam_convergence{ # look at numbers in *.pstat file
+ # to test convergence
+ my $self = shift;
+ my $file_basename = shift;
+ my $min_ok_ESS = $self->{modelparam_min_ok_ESS};
+ my $max_ok_PSRF = $self->{modelparam_max_ok_PSRF};
+ my $max_ok_KSD = $self->{modelparam_max_ok_KSD};
+ my $string = '';
+ my $ngens_skip = int($self->{burnin_frac} * $self->{ngens_run});
+
+ open my $fh, "<$file_basename.nex.pstat";
+ my @lines = <$fh>;
+ close $fh;
+ my $discard = shift @lines;
+ my $count_param_lines = 0;
+ my $KSD_datacol = 1;
+ my $LL_KSD = $self->KSDmax($ngens_skip, 1);
+ my $n_bad = ($LL_KSD <= $max_ok_KSD)? 0 : 1;
+ my @KSDmaxes = ($LL_KSD);
+ # $n_bad++ if($LL_KSD > $self->{modelparam_max_ok_KSD}); # require LogL just to pass KSD test
+ foreach (@lines) {
+ my @cols = split(" ", $_);
+ my ($avgESS, $PSRF) = @cols[7, 8]; # col 6 is the min ESS among the runs, col 7 is avg.
+ next unless($avgESS =~ /^\d*[.]?\d+/ and $PSRF =~ /^\d*[.]?\d+/);
+ $KSD_datacol++; # 2,3,4,... the params in pstat file are in cols 2,3,4,... in *.run?.p
+ my $KSDmax = $self->KSDmax($ngens_skip, $KSD_datacol);
+ push @KSDmaxes, $KSDmax;
+ $string .= "$avgESS $PSRF ";
+ if ($avgESS < $min_ok_ESS
+ or $PSRF > $max_ok_PSRF
+ or $KSDmax > $max_ok_KSD) {
+ $n_bad++;
+ }
+ }
+ $string .= join(" ", map sprintf("%5.3f", $_), @KSDmaxes); # join(" ", @KSDmaxes);
+ return ($n_bad, $string);
+}
+
+sub test_convergence{
+ my $self = shift;
+ my $file_basename = shift; # e.g. fam9877.nex
+
+ my ($splits_converged, $splits_count, $splits_bad_count, $splits_avg_stddev) =
+ $self->splits_convergence($file_basename);
+ my ($modelparam_n_bad, $modelparam_string) =
+ $self->modelparam_convergence($file_basename);
+ my $ngens_skip = int($self->{burnin_frac} * $self->{ngens_run});
+
+ my $conv_string = "$splits_count $splits_bad_count $splits_avg_stddev " .
+ " $modelparam_string $modelparam_n_bad ";
+
+ my $converged = ($splits_converged and $modelparam_n_bad == 0);
+
+ return ($converged? 1 : 0, $conv_string);
+}
+
+
+sub extract_swap_info{
+ my $self = shift;
+ my $mb_stdout_string = shift;
+ my @mb_stdout_lines = split("\n", $mb_stdout_string);
+ my $n_lines_to_extract = 0;
+ my $extract_next_n = 0;
+ my $n_runs = undef;
+ my $n_chains = undef;
+ my $out_string = '';
+ foreach (@mb_stdout_lines) {
+ if (/number of chains to (\d+)/) {
+ $n_chains = $1;
+ $n_lines_to_extract = $n_chains + 4;
+ last if(defined $n_runs);
+ } elsif (/number of runs to (\d+)/) {
+ $n_runs = $1;
+ last if(defined $n_chains);
+ }
+
+ }
+ my $run;
+ my %run_string = ();
+ foreach (@mb_stdout_lines) {
+ if (/Chain swap information for run (\d+)/) {
+ $run = $1;
+ $extract_next_n = $n_lines_to_extract;
+ }
+ $out_string .= "$_\n" if($extract_next_n > 0);
+ $extract_next_n--;
+ if ($extract_next_n == 0) {
+ $run_string{$run} = $out_string;
+ $out_string = '';
+ last if($run == $n_runs);
+ }
+ }
+ $out_string = '';
+
+ foreach (keys %run_string) {
+ #print "$_ ", $run_string{$_}, "\n";
+ my @lines = split("\n", $run_string{$_});
+ splice @lines, 0, 4;
+ # print join("\n", @lines);
+
+ my %ij_swap_pA = ();
+ my %ij_swap_tries = ();
+ foreach my $i (1..$n_chains) {
+ my $l = $lines[$i-1];
+ $l =~ s/^\s*\d+\s+[|]\s+//;
+ my @xs = split(" ", $l);
+ my $n_ntry = $i-1;
+ my $n_pA = $n_chains-$i;
+ foreach my $j (1..$n_ntry) {
+ # print "swap_tries key: [$i $j]\n";
+ $ij_swap_tries{"$i $j"} = shift @xs;
+ }
+ foreach (1..$n_pA) {
+ my $j = $_ + $i;
+ # print "swap_pA key: [$j $i]\n";
+ $ij_swap_pA{"$j $i"} = shift @xs;
+ }
+ } # loop over chains
+ my %ij_swap_accepts = ();
+ # my @sijs = sort {$a cmp $b} keys %ij_swap_tries;
+ # foreach (@sijs) {
+
+ foreach my $diff (1..$n_chains-1) {
+ foreach my $i (1..$n_chains-1) {
+ my $j = $i + $diff;
+
+ last if($j > $n_chains);
+ my $key = "$j $i";
+ # print "i,j: $i, $j, key: [$key] \n";
+ if (exists $ij_swap_pA{$key} and exists $ij_swap_tries{$key}) {
+ $ij_swap_accepts{$key} = $ij_swap_tries{$key} * $ij_swap_pA{$key};
+ $out_string .= int($ij_swap_accepts{$key}+0.5) . " " . $ij_swap_tries{$key} . " ";
+ } else {
+ warn "key $key present in neither ij_swap_tries nor ij_swap_pA.\n";
+ }
+ }
+ $out_string .= ' ';
+ }
+ $out_string .= ' ';
+ } # loop over runs
+ return $out_string;
+}
+
+
+sub KSDmax{ # This does all pairwise comparisons between runs
+ # for one of the params in the *.run?.p files
+ # and returns the largest Kolmogorov-Smirnov D
+ my $self = shift;
+
+ my $ngen_skip = shift || 0;
+ my $datacol = shift; # data column to use.
+ $datacol = 1 if(!defined $datacol);
+
+ my $bigneg = -1e300;
+
+ my $file_basename = $self->{alignment_nex_filename}; # e.g. fam9877
+ # store data in hashes
+ my @val_count_hrefs = ({}, {}); #
+ my @counts = (0, 0);
+ my @files = `ls $file_basename.run?.p`;
+ my $runs_to_analyze = scalar @files;
+ warn "in KSDmax. n_runs: ", $self->{n_runs}, " *.p files found: ", $runs_to_analyze, " should agree, using min of the two.\n" if($self->{n_runs} != $runs_to_analyze);
+ $runs_to_analyze = min($runs_to_analyze, $self->{n_runs});
+
+ foreach my $irun (1..$runs_to_analyze) {
+ my $i = $irun - 1;
+ my $filename = "$file_basename.run" . $irun . ".p";
+ open my $fh, "<$filename";
+ while (<$fh>) {
+ my @cols = split(" ", $_);
+ # skip non-numerical stuff.
+ next unless($cols[0] =~ /^\d+$/);
+ my ($ngens, $x) = @cols[0,$datacol];
+ next if($ngens < $ngen_skip);
+ $val_count_hrefs[$i]->{$x}++;
+ $counts[$i]++;
+ }
+ close $fh;
+ }
+
+ # get cumulative distributions:
+ my @val_cumeprob_hrefs = ();
+ foreach my $i (0..$runs_to_analyze-1) {
+ push @val_cumeprob_hrefs, cumulative_prob($val_count_hrefs[$i], $counts[$i]);
+ }
+
+ my @KSDs = (); # Kolmogorov-Smirnov D for each pairwise comparison between runs
+ foreach my $i (0..scalar @files - 2) {
+ foreach my $j ($i+1..scalar @files - 1) {
+ push @KSDs, Kolmogorov_Smirnov_D(@val_cumeprob_hrefs[$i,$j]);
+ }
+ }
+ return max(@KSDs);
+}
+
+
+
+sub retrieve_param_samples{
+ # read data from run?.p files
+ # store in
+ my $self = shift;
+ my $pattern = shift || $self->{alignment_nex_filename}; # e.g. fam9877.nex
+ my $p_files = `ls $pattern.run?.p`;
+ my @p_infiles = split(" ", $p_files);
+ my $n_runs_p = scalar @p_infiles;
+
+ # the following has one elem for each run, and it is
+ # a hash ref, with generation numbers as keys,
+ # parameter strings (logL, TL, alpha ...) as values
+ my @gen_param_hashes = ();
+ foreach (1..$n_runs_p) {
+ push @gen_param_hashes, {};
+ }
+ #my $p_run = 1;
+ foreach my $i_run_p (1..$n_runs_p) {
+ my $p_file = "$pattern.run$i_run_p.p";
+ open my $fhp, "<$p_file";
+
+ while (my $line = <$fhp>) {
+ chomp $line;
+ next unless($line =~ /^\s*(\d+)/);
+ # print "$line \n";
+ my @cols = split(" ", $line);
+ my $generations = shift @cols;
+ my $param_string = join(" ", @cols);
+ $gen_param_hashes[$i_run_p-1]->{$generations} = $param_string;
+ }
+ $i_run_p++;
+ }
+ return \@gen_param_hashes;
+}
+# end of reading in parameter values
+
+sub retrieve_topology_samples{
+ my $self = shift;
+my $pattern = shift || $self->{alignment_nex_filename}; # e.g. fam987?.nex
+
+ my $t_files = `ls $pattern.run?.t`;
+ my @t_infiles = split(" ", $t_files);
+ my $n_runs = scalar @t_infiles;
+ my @gen_ntopo_hashes = ();
+ foreach (1..$n_runs) {
+ push @gen_ntopo_hashes, {};
+ }
+my %newick_number_map = ();
+my %number_newick_map = ();
+ my $topology_count = 0;
+ foreach my $i_run (1..$n_runs) {
+ my $t_infile = "$pattern.run$i_run.t";
+ open my $fh, "<$t_infile";
+
+ # read trees in, remove branch lengths, store in array
+
+ while (my $line = <$fh>) {
+ chomp $line;
+ if ($line =~ s/tree gen[.](\d+) = .{4}\s+//) {
+ my $newick = $line;
+ my $generation = $1;
+ $newick =~ s/:[0-9e\-.]*(,|[)])/$1/g; # remove branch lengths
+ #print "[$newick]\n";
+ $newick =~ s/^\s+//;
+ $newick =~ s/;\s*//;
+ $newick = order_newick($newick);
+ # print $newick, "\n";
+ # exit;
+ if (!exists $newick_number_map{$newick}) {
+ $topology_count++;
+ $newick_number_map{$newick} = $topology_count; # 1,2,3,...
+ $number_newick_map{$topology_count} = $newick;
+ }
+ $gen_ntopo_hashes[$i_run-1]->{$generation} = $newick_number_map{$newick};
+ }
+ } # now $gen_ntopo_hashes[$i_run] is hash ref with generations as keys, and topology numbers as values.
+ }
+return (\@gen_ntopo_hashes, \%newick_number_map, \%number_newick_map);
+}
+
+
+sub retrieve_number_id_map{
+ my $self = shift;
+ my $pattern = shift || $self->{alignment_nex_filename}; # e.g. fam9877.nex
+
+ my $trprobs_file = "$pattern.trprobs";
+ open my $fh, "<$trprobs_file";
+
+ my %number_id_map = ();
+ while (my $line = <$fh>) {
+ last if($line =~ /^\s*tree\s+tree_/);
+ if ($line =~ /^\s*(\d+)\s+(\S+)/) {
+ my $number = $1;
+ my $id = $2;
+ $id =~ s/[,;]$//; # delete final comma
+ $number_id_map{$number} = $id;
+ }
+ }
+ return \%number_id_map;
+}
+
+
+sub count_topologies{
+my $self = shift;
+my $gen_ntopo_hrefs = shift;
+my %topo_count = (); # key: topology number, value: arrayref with counts in each run
+# e.g. $topo_count{13} = [3,5] means that topo 13 occurred 3 times in run1, 5 times in run 2.
+my $total_trees = 0;
+foreach my $i_run (1..scalar @$gen_ntopo_hrefs) {
+ my $gen_ntopo = $gen_ntopo_hrefs->[$i_run-1];
+ my $trees_read_in = scalar keys %{$gen_ntopo};
+ print "Run: $i_run. Trees read in: $trees_read_in\n";
+ # store trees from array in hash, skipping burn-in
+ my $n_burnin = int($self->{burnin_frac} * $trees_read_in);
+# print "trees read in: $trees_read_in. Post burn-in: ", $trees_read_in - $n_burnin, "\n";
+ my @sorted_generations = sort {$a <=> $b} keys %{$gen_ntopo};
+ foreach my $i_gen (@sorted_generations[$n_burnin..$trees_read_in-1]) {
+ my $topo_number = $gen_ntopo->{$i_gen};
+ if (!exists $topo_count{$topo_number}) {
+ my @zeroes = ((0) x scalar @$gen_ntopo_hrefs);
+ $topo_count{$topo_number} = \@zeroes;
+ }
+ $topo_count{$topo_number}->[$i_run-1]++;
+ $total_trees++;
+ }
+ $i_run++;
+}
+return (\%topo_count, $total_trees);
+}
+
+sub restore_ids_to_newick{
+ my $self = shift;
+ my $newick = shift;
+ my $number_id_map = shift;
+
+ foreach my $number (keys %$number_id_map) {
+ my $id = $number_id_map->{$number};
+$id .= '[species=' . $self->{id_species_map}->{$id} . ']';
+ $newick =~ s/([(,])$number([,)])/$1$id$2/;
+ }
+ return $newick;
+}
+
+sub setup_id_species_map{
+my $self = shift;
+my $file = $self->{alignment_nex_filename};
+#$file =~ s/[.]nex/.fasta/;
+open my $fh, "<$file";
+while (my $line = <$fh>){
+ # print $line;
+ next unless($line =~ /^(\S+)\[species=(\S+)\]/);
+ my $id = $1;
+my $species = $2;
+#print $line;
+ $self->{id_species_map}->{$id} = $species;
+ print "$id $species \n";
+}
+#exit;
+return;
+}
+
+my $bigneg = -1e300;
+
+# given a set of numbers (some of which may occur more than once), which
+# are stored as keys in a hash, with the values being how many times they occur
+# or more generally whatever weights you want, sort the keys and get the
+# cumulative distribution.
+sub cumulative_prob{
+ my $val_weight_href = shift; # hashref, key: numbers, values: weights.
+ my $sum_weights = shift;
+ my $val_cumeprob_href = { $bigneg => 0};
+ my $cume_prob = 0;
+ foreach (sort {$a <=> $b} keys %{$val_weight_href}) {
+ $cume_prob += $val_weight_href->{$_}/$sum_weights;
+ $val_cumeprob_href->{$_} = $cume_prob;
+ # print "$i $_ $cume_prob ", $val_count_hashes[$i]->{$_}, "\n";
+ }
+ # print "\n";
+ return $val_cumeprob_href;
+}
+
+
+sub Kolmogorov_Smirnov_D{
+# get the maximum difference between two empirical cumulative distributions.
+# Arguments are two hashrefs, each representing an empirical cumulative distribution.
+# Each key is a data point (a real number), and the corresponding hash value is
+# the proportion of data pts <= to it. So the largest value should be 1.
+ my $val_cumeprob1 = shift;
+ my $val_cumeprob2 = shift;
+ my @sorted_vals1 = sort {$a <=> $b} keys %{$val_cumeprob1};
+ my @sorted_vals2 = sort {$a <=> $b} keys %{$val_cumeprob2};
+
+ my ($i1, $i2) = (0, 0);
+
+ my $size1 = scalar @sorted_vals1;
+ my $size2 = scalar @sorted_vals2;
+
+ my $D = 0;
+ while(1){
+ my ($xlo1, $xhi1) = ($sorted_vals1[$i1], $sorted_vals1[$i1+1]);
+ my ($xlo2, $xhi2) = ($sorted_vals2[$i2], $sorted_vals2[$i2+1]);
+ die "$xlo1 > $xhi2 ??\n" if($xlo1 > $xhi2);
+ die "$xlo2 > $xhi1 ??\n" if($xlo2 > $xhi1);
+
+ my ($cume_prob1, $cume_prob2) = ($val_cumeprob1->{$xlo1}, $val_cumeprob2->{$xlo2});
+ my $abs_diff = abs($cume_prob1 - $cume_prob2);
+ $D = $abs_diff if($abs_diff > $D);
+ if($xhi1 <= $xhi2){
+ $i1++;
+ }elsif($xhi2 <= $xhi1){
+ $i2++;
+ }else{
+ die "$xhi1 xhi2 should be numerical.\n";
+ }
+ last if($i1 == $size1-1);
+ last if($i2 == $size2-1);
+ }
+ return $D;
+}
+
+# operates on a newick of form (3,(6,4))
+# i.e. no whitespace, no branch lengths, ids must be numbers.
+# so just parens, commas and numbers
+# puts the leaves in order, such that at each node the
+# subtree with smaller value is on left. The value of an
+# internal node is the min of the values of the two child
+# nodes, and the value of a leave is its id, which must be a number.
+sub order_newick{
+ my $newick = shift;
+ if ($newick =~ /^(\d+)$/) { # subtree is leaf!
+ return ($1, $newick);
+ } else { # subtree has > 1 leaf.
+ my %label_newick = ();
+ $newick =~ /^[(](.*)[)]$/;
+ my @newick_chars = split('',$1); # without surrounding ()
+ my $lmr_paren_count = 0;
+ my ($il, $ir) = (0, 0);
+ my $n_chars = scalar @newick_chars;
+ my $min_label = 10000000;
+ foreach (@newick_chars) {
+ die "$_ ", $newick_chars[$ir], " not same!\n" if($_ ne $newick_chars[$ir]);
+ if ($_ eq '(') {
+ $lmr_paren_count++;
+ }
+ if ($_ eq ')') {
+ $lmr_paren_count--;
+ }
+
+ if (($ir == $n_chars-1) or ($_ eq ',' and $lmr_paren_count == 0)) { #split
+ my $ilast = ($ir == $n_chars-1)? $ir : $ir-1;
+ my $sub_newick = join('', @newick_chars[$il..$ilast]);
+ # print "subnewick $sub_newick\n";
+ my ($label, $ordered_subnewick) = order_newick($sub_newick);
+ $label_newick{$label} = $ordered_subnewick;
+ $min_label = min($min_label, $label);
+ $il = $ir+1; $ir = $il; # skip the ','
+ } else {
+ $ir++;
+ }
+ } # loop over chars in @newick_chars
+ my $ordered_newick = '';
+ foreach (sort {$a <=> $b} keys %label_newick) {
+ $ordered_newick .= $label_newick{$_} . ",";
+ }
+ $ordered_newick =~ s/,$//;
+ $ordered_newick = '(' . $ordered_newick . ')';
+ return ($min_label, $ordered_newick);
+ }
+ die "shouldnt get here, in order_newick\n";
+}
+
+
+1;
+
+
diff --git a/lib/CXGN/Phylo/Node.pm b/lib/CXGN/Phylo/Node.pm
index 2fb36430..01b69967 100644
--- a/lib/CXGN/Phylo/Node.pm
+++ b/lib/CXGN/Phylo/Node.pm
@@ -29,7 +29,7 @@ use CXGN::Phylo::Species_name_map;
use CXGN::Phylo::Tree;
use CXGN::Phylo::Label;
use CXGN::Page::FormattingHelpers qw/tooltipped_text/;
-
+
=head2 function new()
@@ -686,12 +686,13 @@ sub get_standard_species {
my $tree = $self->get_tree();
if (defined $tree) {
my $species_standardizer = $tree->get_species_standardizer();
- if (defined $species_standardizer) {
+ if (defined $species_standardizer and defined ($species_standardizer->get_standard_name($species)) ) {
$species = $species_standardizer->get_standard_name($species);
- # print "species standardizer branch. species: $species \n";
+ # print "species standardizer branch. species: [$species] \n";
} else {
- $species = CXGN::Phylo::Species_name_map::to_standard_format($species); # just e.g. solanum lycopersicum -> Solanum_lycopersicum
- # print "to_standard_format branch: $species \n";
+# print "species: [$species]\n";
+ $species = CXGN::Phylo::Species_name_map->to_standard_format($species, 1); # just e.g. solanum lycopersicum -> Solanum_lycopersicum
+# print "to_standard_format branch: [$species] \n";
}
}
return $species;
@@ -999,13 +1000,14 @@ sub set_hidden {
=head2 function recursive_propagate_properties()
- Synopsis:
- Arguments:
+ Synopsis: $root->recursive_propagate_properties('hidden');
+ Arguments: A list of properties to be propagated. e.g.
+'hidden', but if no list given then hidden and hilited are propagated.
Returns:
Side effects:
Description: recursively propagates certain attributes to the
children nodes.
- currently, the propagated properties are:
+ currently, the default propagated properties are:
hidden, hilited
=cut
@@ -1016,15 +1018,24 @@ sub recursive_propagate_properties {
my $hidden = $self->get_hidden();
my $hilited = $self->get_hilited();
+ my @properties = @_;
+ unless (@properties){ @properties = ('hidden', 'hilited') }
+ # print STDERR "X [", scalar @properties, "] ", join("; ", @properties), "\n";
my @children = $self->get_children();
foreach my $c (@children) {
- if ($hidden) {
+ foreach my $prop (@properties){
+
+ if (($prop eq 'hidden') and $hidden) {
$c->set_hidden($hidden);
- }
- if ($hilited) {
+ }elsif (($prop eq 'hilited') and $hilited) {
+# print STDERR "In rec.propagate_properties.: ", $c->get_name(), " being set hilited\n";
$c->set_hilited($hilited);
+ }elsif(defined $self->get_attribute($prop)){
+ $c->set_attribute($prop, $self->get_attribute($prop));
}
- $c->recursive_propagate_properties();
+ }
+# print STDERR "Y [", scalar @properties, "] ", join("; ", @properties), "\n";
+ $c->recursive_propagate_properties(@properties);
}
}
@@ -1740,7 +1751,7 @@ sub recursive_set_implicit_species_bits{
my $bithash = shift; # this gives the bit pattern associated with each species
my $species_bits = int 0;
my $implicit_species = $self->get_implicit_species();
- # print "implicit species: ", join(" ", @$implicit_species), "\n";
+# print STDERR "\nZZZ. implicit species: ", join(" ", @$implicit_species), "\n";
my $a = int 0;
my $b = int 0;
foreach (@$implicit_species) {
@@ -1749,6 +1760,7 @@ sub recursive_set_implicit_species_bits{
# print STDERR "impl species: [$_], [", $bithash->{$_}, "]\n";
}
}
+# print STDERR "In rec set impl species bits. nodename: ", $self->get_name(), " species bit pattern: [", $species_bits, "]\n\n";
$self->set_attribute("species_bit_pattern", $species_bits);
foreach ($self->get_children()) {
$_->recursive_set_implicit_species_bits($bithash);
@@ -1909,11 +1921,11 @@ sub recursive_copy {
=cut
sub copy {
- my $self = shift;
+ my $self = shift; # this is the node to be copied
my $new_parent = shift;
my $new_tree = shift;
- my $new = CXGN::Phylo::Node->new();
+ my $new = CXGN::Phylo::Node->new(); # the copy
if ($new_parent) {
$new_parent->add_child_node($new);
@@ -1923,7 +1935,7 @@ sub copy {
$new->set_tree($new_tree);
$new->set_species($self->get_species());
$new->set_label($self->get_label()->copy());
- $new->set_node_key($self->get_node_key());
+ $new->set_node_key($self->get_node_key()); # the copy gets the same node key as the original
$new->set_hidden($self->get_hidden());
$new->set_hilited($self->get_hilited());
$new->set_horizontal_coord($self->get_horizontal_coord());
@@ -1934,7 +1946,8 @@ sub copy {
$new->set_branch_length($self->get_branch_length());
$new->set_attribute("leaf_count", $self->get_attribute("leaf_count"));
-$new->set_attribute("leaf_species_count", $self->get_attribute("leaf_species_count"));
+ $new->set_attribute("leaf_species_count", $self->get_attribute("leaf_species_count"));
+ $new->set_attribute("species_bit_pattern", $self->get_attribute("species_bit_pattern"));
return $new;
}
@@ -2003,7 +2016,8 @@ sub make_newick_attributes {
my $show_all = shift;
my $string = "";
# print "In Node::make_newick_attributes. ", join("; ", $self->get_tree()->newick_shown_attributes() ), "\n";
- foreach my $attr ( $self->get_tree()->newick_shown_attributes() ) {
+
+foreach my $attr ( $self->get_tree()->newick_shown_attributes() ) {
# print "in make_newick_attributes. attribute: $attr\n";
my $value = "";
if ($attr eq "species") { # species shown for leaves, or all nodes if $show_all
@@ -2359,7 +2373,7 @@ sub recursive_subtree_node_list {
=head2 function get_attribute()
- Synopsis: my $foo = $node->get_attribte("foo");
+ Synopsis: my $foo = $node->get_attribute("foo");
Arguments: the name of the attribute
Returns: the value of the attribute named foo.
Side effects: none
@@ -3139,14 +3153,20 @@ print("in robinson-foulds, root bls: ", $root1->get_branch_length(), " ", $roo
sub determine_species_from_name{
my $self = shift;
my $str = shift;
- my $species = undef;
+# my $species = undef;
if (!$str) { $str = $self->get_name(); }
+ my $id_taxon_map = $self->get_tree()->get_id_taxon_map();
+ my $species = $id_taxon_map->id_to_taxonname($str);
+
# print STDERR "string to get species from: ", $str, "\n";
- if ($str =~ /^At/i) { # At.... is Arabidopsis
- $species = "Arabidopsis";
+# if ($str =~ /^At/i) { # At.... is Arabidopsis
+# $species = "Arabidopsis";
+# }
+
+ if(defined $species){
+ # just keep this value of $species
}
-
# for $str of form SGN-U followed by digits,eliminate the SGN-U:
elsif ($str =~ /^SGN-{0,1}U(\d+)/i) { # should we require SGN to be initial?
@@ -3540,11 +3560,15 @@ sub recursive_compare_species_split{
my $self = shift; # a node of species tree, typically
my $a1 = shift;
my $a2 = shift;
+
+ if($a1 == 0 or $a2 == 0){ return 0; }; # one subtree has no species found in species tree
my @children = $self->get_children();
+# print "ZZZ [", scalar @children, "][", $self->get_attribute("species_bit_pattern"), "]  ";
+
return int 0 if(scalar @children < 2); # if reach leaf of species tree, no speciation.
my $b1 = $children[0]->get_attribute("species_bit_pattern");
my $b2 = $children[1]->get_attribute("species_bit_pattern");
- if (($a1 & ~$b1) == 0) { # a1 species set is subset of b1 species set
+ if (($a1 & ~$b1) == 0) { # a1 (gene tree) species set is subset of b1 species set
if (($a2 & ~$b2) == 0) {
return int 1;
} elsif (($a2 & ~$b1) == 0) {
@@ -3571,8 +3595,11 @@ sub recursive_compare_species_split{
sub collect_orthologs_of_leaf{
my $self = shift; # the leaf to start at
my @ortholog_array;
+ if ($self->get_attribute("species_bit_pattern") == 0){ return @ortholog_array; }
my $prev_parent = $self;
my $parent = $self->get_parent();
+# return @ortholog_array;
+# print "XXX: [", $parent->get_attribute("speciation"), "] ";
while (1) {
if ($parent->get_attribute("speciation")) {
#print join(";", $parent->get_implicit_names()), "\n";
@@ -3585,7 +3612,7 @@ sub collect_orthologs_of_leaf{
$n =~ s/(.*)?\|/$1/;
}
#print STDERR "implicit names: ", join(" ", @imp_names), "\n";
- @ortholog_array = (@ortholog_array, @imp_names);
+ @ortholog_array = (@ortholog_array, @imp_names) if ($self->get_attribute("species_bit_pattern") > 0);
}
}
last if($parent->is_root());
@@ -3610,6 +3637,7 @@ sub recursive_collect_max_speciation_nodes{
}
}
+# just recursively set_hilited for each speciation node.
sub recursive_hilite_speciation_nodes{
my $self = shift;
# return;
diff --git a/lib/CXGN/Phylo/Orthologger.pm b/lib/CXGN/Phylo/Orthologger.pm
new file mode 100644
index 00000000..c6022f43
--- /dev/null
+++ b/lib/CXGN/Phylo/Orthologger.pm
@@ -0,0 +1,250 @@
+package CXGN::Phylo::Orthologger;
+use strict;
+use List::Util qw ( min max sum );
+
+# use Devel::Cycle; # not gonna use this anymore?
+
+my $default_arg_href = {
+ 'reroot_method' => 'none', # choices are mindl maxmin minmax minvar
+ 'gene_tree' => undef,
+ 'gene_tree_newick' => undef,
+ 'species_tree' => undef, # tree object.
+ 'species_name_map' => undef,
+ 'query_species' => undef,
+ 'query_id_regex' => undef
+ };
+
+sub new {
+ my $class = shift;
+ my $arg_href = shift;
+
+ my $self = bless {}, $class;
+
+ # initialize parameters to defaults.
+ foreach (keys %$default_arg_href) {
+ $self->{$_} = $default_arg_href->{$_};
+ }
+ # reset any parameters specified in argument hash ref.
+ foreach (keys %$arg_href) {
+# print STDERR "param => value: $_ => ", $arg_href->{$_}, "\n";
+ $self->{$_} = $arg_href->{$_};
+ }
+ $self->set_species_name_map(CXGN::Phylo::Species_name_map->new()) unless(defined $self->get_species_name_map());
+
+ # get the gene tree
+ die "Must supply either a gene tree object, or gene tree newick string.\n"
+ if (!defined $self->get_gene_tree() and !defined $self->get_gene_tree_newick());
+ my $do_set_error = 0;
+ my $gene_tree;
+ if (defined $self->get_gene_tree()) {
+ # print "gene tree branch \n";
+ $gene_tree = $self->get_gene_tree();
+ } else {
+ my $gene_tree_newick = $self->get_gene_tree_newick();
+ $gene_tree = CXGN::Phylo::Parse_newick->new($gene_tree_newick, $do_set_error)->parse();
+ $gene_tree->show_newick_attribute('species');
+ if (!$gene_tree) {
+ die "Gene tree. Parser_newick->parse() failed to return a tree object.\n Newick string: ". $gene_tree_newick . "\n";
+ }
+ $self->set_gene_tree($gene_tree);
+ }
+ my $species_tree = $self->get_species_tree();
+ # tweak the gene tree object:
+ $gene_tree->impose_branch_length_minimum();
+ $gene_tree->get_root()->recursive_implicit_names();
+ $gene_tree->get_root()->recursive_implicit_species();
+ # if species is empty, (because no [species=x] in newick), get it from the name...
+ $gene_tree->set_missing_species_from_names();
+
+ $gene_tree->set_show_standard_species(1);
+ $gene_tree->update_label_names();
+ $gene_tree->set_species_standardizer($self->get_species_name_map());
+
+ # now tweak species tree:
+ $species_tree->set_missing_species_from_names(); # get species from name if species undef
+ $species_tree->impose_branch_length_minimum();
+ $species_tree->collapse_tree();
+ $species_tree->get_root()->recursive_implicit_names();
+ $species_tree->get_root()->recursive_implicit_species();
+
+ my $spec_bit_hash = $self->get_species_bithash();
+
+ $gene_tree->get_root()->recursive_set_implicit_species_bits($spec_bit_hash);
+ $species_tree->get_root()->recursive_set_implicit_species_bits($spec_bit_hash);
+
+
+ $species_tree->set_show_standard_species(1);
+ $species_tree->update_label_names();
+ $species_tree->set_species_standardizer($self->get_species_name_map());
+
+#print STDERR "in Orthogger->new. About to call reroot. \n";
+
+ $self->reroot();
+
+ $gene_tree->get_root()->recursive_set_leaf_species_count();
+ $gene_tree->get_root()->recursive_set_leaf_count();
+ $gene_tree->get_root()->recursive_implicit_species();
+ $gene_tree->get_root()->recursive_set_implicit_species_bits($spec_bit_hash);
+
+ my $root_spec = $gene_tree->get_root()->speciation_at_this_node($species_tree);
+ if ($root_spec < 0) {
+ # if trifurcation at root, reroot to one of the neighboring branches
+ # if this will yield a speciation at root...
+ my $cn = ($gene_tree->get_root()->get_children())[$root_spec + 3];
+ my $bl = $cn->get_branch_length();
+ $gene_tree->reset_root_to_point_on_branch($cn, 0.5*$bl);
+
+ $gene_tree->get_root()->recursive_implicit_species();
+ $gene_tree->get_root()->recursive_set_implicit_species_bits($spec_bit_hash);
+ }
+ $gene_tree->get_root()->recursive_implicit_names();
+ $gene_tree->get_root()->recursive_set_speciation($species_tree);
+ # $gene_tree->show_newick_attribute("species"); # should work now
+ $gene_tree->show_newick_attribute("speciation");
+ return $self;
+} # end of constructor
+
+sub reroot{
+ my $self = shift;
+ my $gene_tree = $self->get_gene_tree();
+ my $species_tree = $self->get_species_tree();
+ my $reroot_method = $self->get_reroot_method();
+ # print "top of reroot. reroot method: $reroot_method ; \n", $gene_tree->generate_newick(), "\n";
+ my @new_root_point = (undef, undef);
+ if (defined $reroot_method and $reroot_method ne 'none') {
+ # reset root
+ if ($reroot_method eq "mindl") { # min duplicate & loss
+ # $gene_tree->set_branch_lengths_equal(0.0001);
+
+ @new_root_point = $gene_tree->find_mindl_node($species_tree);
+ die "find_mindl_node failed\n" if(!defined $new_root_point[0]);
+ } elsif ($reroot_method eq "minvar") { # min variance
+ @new_root_point = $gene_tree->min_leaf_dist_variance_point();
+ } elsif ($reroot_method eq "maxmin") { # max min; max over possible pts in tree (along branches) of min over nodes of pt-node distance.
+ @new_root_point = $gene_tree->find_point_furthest_from_leaves();
+ } elsif ($reroot_method eq "minmax" or $reroot_method eq "midpoint") { # min max, aka midpoint (midpoint of longest leaf-leaf path)
+ @new_root_point = $gene_tree->find_point_closest_to_furthest_leaf();
+ }
+ $gene_tree->reset_root_to_point_on_branch(@new_root_point);
+ $gene_tree->get_root()->recursive_implicit_names(); # needed after rerooting?
+ }
+ return $gene_tree;
+}
+
+sub ortholog_result_string{
+ my $self = shift;
+ my $gene_tree = $self->get_gene_tree();
+ my $ortholog_str = '';
+ my $leaf_ortholog_hashref = $gene_tree->leaf_ortholog_table();
+ foreach my $leafname (keys %$leaf_ortholog_hashref) {
+ $ortholog_str .= "orthologs of " . $leafname . ": ";
+ my @orthologs = @{$leaf_ortholog_hashref->{$leafname}};
+ $ortholog_str .= join(" ", @orthologs) . "\n";
+ }
+ $ortholog_str .= "Leaves not in species tree: " . join(" ", keys %{$self->get_gene_tree()->non_speciestree_leafnode_names()}) . "\n";
+ return $ortholog_str;
+} # end of ortholog_result_string
+
+sub get_species_bithash{ # get a hash giving a bit pattern for each species which is in both $gene_tree and $spec_tree
+ my $self = shift;
+ return $self->get_gene_tree()->get_species_bithash($self->get_species_tree());
+} # end get_species_bithash
+
+sub get_expanded_subtrees{
+ my $self = shift;
+ my $node = shift;
+ my $treename = shift || 'Default_tree_name';
+ if (!defined $node->get_leaf_species_count()) {
+ $node->recursive_set_leaf_species_count();
+ }
+ my $expansion_report_string = '';
+ my $leaf_species_count = $node->get_attribute("leaf_species_count");
+ my $leaf_count = $node->get_attribute("leaf_count");
+
+ if (($leaf_species_count==1) && ($leaf_count>1)) {
+ my $sp = $node->recursive_get_a_leaf()->get_standard_species();
+ $expansion_report_string = "tree $treename has expansion of $leaf_count for species $sp\n";
+ return $expansion_report_string;
+ }
+ foreach my $c ($node->get_children()) {
+ $expansion_report_string .= $self->get_expanded_subtrees($c, $treename);
+ }
+ return $expansion_report_string;
+}
+
+sub decircularize{
+# call before Orthologger obj goes out of scope
+# so it can be garbage collected, to avoid memory leak.
+my $self = shift;
+$self->set_species_tree(undef);
+$self->get_gene_tree()->decircularize();
+$self->set_gene_tree(undef);
+}
+
+
+#accessors:
+
+sub get_gene_tree_newick{
+ my $self = shift;
+ return $self->{gene_tree_newick};
+}
+sub set_gene_tree_newick{
+ my $self = shift;
+ $self->{gene_tree_newick} = shift;
+}
+
+sub get_gene_tree{
+ my $self = shift;
+ return $self->{gene_tree};
+}
+sub set_gene_tree{
+ my $self = shift;
+ $self->{gene_tree} = shift;
+}
+
+sub get_species_tree{
+ my $self = shift;
+ return $self->{species_tree};
+}
+sub set_species_tree{
+ my $self = shift;
+ $self->{species_tree} = shift;
+}
+
+sub get_reroot_method{
+ my $self = shift;
+ return $self->{reroot_method};
+}
+sub set_reroot_method{
+ my $self = shift;
+ $self->{reroot_method} = shift;
+}
+
+sub get_species_name_map{
+ my $self = shift;
+ return $self->{species_name_map};
+}
+sub set_species_name_map{
+ my $self = shift;
+ $self->{species_name_map} = shift;
+}
+
+sub get_query_species{
+ my $self = shift;
+ return $self->{query_species};
+}
+sub set_query_species{
+ my $self = shift;
+ $self->{query_species} = shift;
+}
+
+sub get_query_id_regex{
+ my $self = shift;
+ return $self->{query_id_regex};
+}
+sub set_query_id_regex{
+ my $self = shift;
+ $self->{query_id_regex} = shift;
+}
+
+1;
diff --git a/lib/CXGN/Phylo/Overlap.pm b/lib/CXGN/Phylo/Overlap.pm
new file mode 100644
index 00000000..364722ee
--- /dev/null
+++ b/lib/CXGN/Phylo/Overlap.pm
@@ -0,0 +1,228 @@
+package Overlap;
+use strict;
+use List::Util qw ( min max sum );
+
+my $NO_ID = 'NOT_AN_ACTUAL_ID';
+
+sub new {
+ my $class = shift;
+ my $arg = shift; # either filename or string with contents of fasta file
+ my $fraction = shift || 0.8;
+ my $seed = shift || undef;
+ my $args= {};
+ my $self = bless $args, $class;
+
+ if (defined $seed) {
+ srand($seed);
+ } else {
+ srand();
+ }
+ my @ids = ();
+ my %id_overlapseq = ();
+ my %id_sequence = ();
+ my @lines = ();
+# print "arg: $arg\n";
+# print "arg is file? ", -f $arg, "\n";
+# print "XXX: ", $arg =~ /\n/, "\n";
+ # if ((! $arg =~ /\n/) and
+ if (!($arg =~ /\n/) and -f $arg) { # $arg is filename
+ open my $fhin, "<$arg";
+ @lines = <$fhin>;
+# print "XXXXXXXlines: ", join("\n", @lines), "\n";
+ close $fhin;
+
+ } else { # treat arg as string
+ @lines = split("\n", $arg);
+ }
+ my $id = $NO_ID;
+ my $sequence = '';
+ while (@lines) {
+ my $line = shift @lines;
+ # print $line;
+ if ($line =~ /^>/) {
+ if ($id ne $NO_ID) {
+ $id_sequence{$id} = $sequence;
+ push @ids, $id;
+ }
+ $id = $line;
+ $id =~ s/^>\s*//;
+ $id =~ s/\s+$//;
+ $sequence = '';
+ } else {
+ $line =~ s/^\s+//;
+ $line =~ s/\s+$//;
+ $sequence .= $line;
+ }
+ }
+ if (! exists $id_sequence{$id} and $sequence ne '') { # take care of the last id-sequence pair.
+ $id_sequence{$id} = $sequence;
+ push @ids, $id;
+ }
+ $self->{id_seq} = \%id_sequence;
+ $self->{ids} = \@ids;
+
+ my $n_sequences = scalar @ids;
+ my $seq_length = length $id_sequence{$ids[0]};
+ $self->{align_length} = $seq_length;
+ $self->{n_sequences} = $n_sequences;
+ my @position_counts = ((0) x $seq_length);
+ my @position_aas = (('') x $seq_length);
+ foreach my $id (@ids) {
+ my $sequence = $id_sequence{$id};
+ my $seql = length $sequence;
+ die "Non-equal sequence lengths in alignment: $seq_length, $seql. id: $id \nsequence: $sequence\n" if($seql ne $seq_length);
+
+ for (my $i = 0; $i < $seq_length; $i++) {
+ my $aa = substr($sequence, $i, 1);
+ if ($aa ne '-') {
+ $position_counts[$i]++;
+ if(!($position_aas[$i] =~ /$aa/)){
+ $position_aas[$i] .= $aa; # not invariant
+ }
+ }
+ }
+ }
+ my $n_invariant = 0;
+ foreach (@position_aas){
+ # print "[$_]\n";
+ $n_invariant++ if(length $_ == 1);
+ }
+# print "n_invariant: $n_invariant align length: ", scalar @position_aas, "\n";
+# print "pinv: ", $n_invariant/$seq_length, "\n";
+ $self->{position_counts} = \@position_counts;
+ my $n_required = ($fraction >= 1)? $n_sequences: int ($fraction * $n_sequences) + 1;
+ $self->{n_required} = $n_required;
+ my $overlap_length = 0;
+ my %id_overlapnongapcount = ();
+my $overlap_n_invariant = 0;
+ foreach my $position (0..@position_counts-1) {
+ my $count = $position_counts[$position];
+ if ($count >= $n_required) {
+ $overlap_length++;
+ foreach my $id (@ids) {
+ my $char = substr($id_sequence{$id}, $position, 1);
+ $id_overlapseq{$id} .= $char;
+ $id_overlapnongapcount{$id}++ if($char ne '-');
+ }
+ $overlap_n_invariant++ if(length $position_aas[$position] == 1);
+ }
+ }
+# print "overlap n_invariant: $overlap_n_invariant, length: $overlap_length\n";
+# print "overlap pinv: ", $overlap_n_invariant/$overlap_length, "\n";
+
+ $self->{id_overlapseq} = \%id_overlapseq;
+ $self->{id_overlapnongapcount} = \%id_overlapnongapcount;
+ die "overlap length inconsistency??? $overlap_length \n" if($overlap_length != length $id_overlapseq{$ids[0]});
+ $self->{overlap_length} = $overlap_length;
+ # $self->{ids} = \@ids;
+
+ return $self;
+}
+
+
+sub weed_sequences{
+ # weed out sequences which have poor overlap with others
+ my $self = shift;
+ my $fraction = shift || 0.3;
+ my $min_nongapcount = int($fraction * $self->{overlap_length});
+ #my %id_overlap_count = (); #
+ my @ids = $self->{id_overlapnongapcount};
+ foreach (@ids) {
+ if ($self->{id_overlapnongapcount}->{$_} < $min_nongapcount) {
+ # delete this sequence
+ delete $self->{id_overlapseq}->{$_};
+ delete $self->{id_overlapnongapcount}
+ }
+ }
+}
+
+
+sub align_fasta_string{
+ my $self = shift;
+ my $spacer = shift || '';
+ my $align_fasta = '';
+ foreach my $id (@{$self->{ids}}) {
+ my $sequence = $self->{id_seq}->{$id};
+ $align_fasta .= ">$spacer$id\n$sequence\n";
+ }
+ chomp $align_fasta;
+ return $align_fasta;
+}
+
+sub overlap_fasta_string{
+ my $self = shift;
+ my $spacer = shift || '';
+ my $overlap_fasta = '';
+ foreach my $id (@{$self->{ids}}) {
+ my $sequence = $self->{id_overlapseq}->{$id};
+ $overlap_fasta .= ">$spacer$id\n$sequence\n";
+ }
+ chomp $overlap_fasta;
+ return $overlap_fasta;
+}
+
+sub overlap_nexus_string{ # basic nexus format string for use by MrBayes.
+ my $self = shift;
+ my $n_leaves = scalar @{$self->{ids}};
+ my $overlap_length = length ($self->{id_overlapseq}->{$self->{ids}->[0]});
+ my $nexus_string = "#NEXUS\n" . "begin data;\n";
+ $nexus_string .= "dimensions ntax=$n_leaves nchar=$overlap_length;\n";
+ $nexus_string .= "format datatype=protein interleave=no gap=-;\n";
+ $nexus_string .= "matrix\n";
+
+ foreach my $id (@{$self->{ids}}) {
+ my $sequence = $self->{id_overlapseq}->{$id};
+ $id =~ s/[|].*//;
+#print "id, nexid: $id $nexid \n";
+
+ my $id50 = $id . " ";
+ $id50 = substr($id50, 0, 50);
+ $nexus_string .= "$id50$sequence\n";
+ }
+ $nexus_string .= "\n;\n\n" . "end;\n";
+ return $nexus_string;
+}
+
+sub bootstrap_overlap_fasta_string{
+ my $self = shift;
+ my $spacer = shift || '';
+ my %id_bootstrapoverlapseq = ();
+ my $overlap_length = $self->{overlap_length};
+
+ my @indices = ();
+ for (1..$overlap_length) {
+ my $index = int( rand($overlap_length) );
+ push @indices, $index;
+ # $index_count{$index}++;
+ }
+
+ for my $id (@{$self->{ids}}) {
+ my $std_overlap = $self->{id_overlapseq}->{$id};
+ my $string = '';
+ foreach my $index (@indices) {
+ $string .= substr($std_overlap, $index, 1);
+ }
+ $id_bootstrapoverlapseq{$id} = $string;
+ }
+
+ my $bofstring = '';
+ foreach my $id (@{$self->{ids}}) {
+ # print "id, seq: $id; $sequence\n";
+ my $sequence = $id_bootstrapoverlapseq{$id};
+ $bofstring .= ">$spacer$id\n$sequence\n";
+ }
+ chomp $bofstring;
+ return $bofstring;
+}
+
+sub get_overlap_length{
+ my $self = shift;
+ return $self->{overlap_length};
+}
+sub set_overlap_length{
+ my $self = shift;
+ $self->{overlap_length} = shift;
+}
+
+
+1;
diff --git a/lib/CXGN/Phylo/Parser.pm b/lib/CXGN/Phylo/Parser.pm
index 81d4592c..72d07379 100644
--- a/lib/CXGN/Phylo/Parser.pm
+++ b/lib/CXGN/Phylo/Parser.pm
@@ -112,6 +112,7 @@ $string = $1;
for (my $i=0; $i<@tokens; $i++) {
$self->set_error(\@tokens, $i) if($self->{do_set_error});
my $t = $tokens[$i];
+ next unless ($t =~ /\S/); # skip tokens with only whitespace
if ($t eq "(") {
#print STDERR "Encountered (. Creating a new child node [parent=".$current_node->get_name()."\n";
my $child = $current_node->add_child();
diff --git a/lib/CXGN/Phylo/Renderer.pm b/lib/CXGN/Phylo/Renderer.pm
index ab55806e..6d32aaa0 100644
--- a/lib/CXGN/Phylo/Renderer.pm
+++ b/lib/CXGN/Phylo/Renderer.pm
@@ -1,772 +1,791 @@
-=head1 Package CXGN::Phylo::Text_tree_renderer
-
-Abstract_tree_renderer - an interface to define tree renderers
-
-=head1 DESCRIPTION
-
-=head1 AUTHOR
-
-Lukas Mueller (lam87@cornell.edu)
-
-=cut
-
-=head1 Package Abstract_tree_renderer
-
-This class essentially defines an interface for tree renderers.
-It contains two functions: new() and render().
-Additional functions can be defined as necessary in the derived classes.
-
-=cut
-
-package CXGN::Phylo::Abstract_tree_renderer;
-
-use GD;
-GD::Image->trueColor(1);
-
-=head2 function new()
-
- Synopsis: abstract class from which tree renderer can be implemented.
- Arguments:
- Returns:
- Side effects:
- Description:
-
-=cut
-
-sub new {
- my $class = shift;
- my $tree = shift;
- my $args = {};
- my $self = bless $args, $class;
-
- # THE FOLLOWING WAS MOVED TO THE TREE OBJECT:
- # set a default layout object, which is Layout
- # (the tree is rendered from left to right)
- #
- #my $layout_object = CXGN::Phylo::Layout->new($tree);
- #$self->set_layout($layout_object);
-
- $self->set_tree($tree);
- return $self;
-}
-
-sub set_tree {
- my $self = shift;
- $self->{tree}=shift;
-}
-
-sub get_tree {
- my $self = shift;
- return $self->{tree};
-}
-
-# =head2 function get_layout()
-
-# Synopsis:
-# Arguments:
-# Returns:
-# Side effects:
-# Description: Accessor for the layout object for this renderer.
-
-# =cut
-
-# sub get_layout {
-# my $self=shift;
-# return $self->{layout};
-# }
-
-# =head2 function set_layout()
-
-# Synopsis:
-# Arguments: an instance of a Layout object or a subclass thereof.
-# Returns:
-# Side effects: the layout function determines how the tree is laid out,
-# the standard way is the root node is placed on the left
-# and the children on the right. Other layout objects will
-# lay out trees with other orientations.
-# Description: setter function for the layout object for this renderer.
-
-# =cut
-
-# sub set_layout {
-# my $self=shift;
-# $self->{layout}=shift;
-# }
-
-
-
-
-sub render {
- die "You are attempting to use the abstract class Abstract_tree_renderer. Please subclass.\n";
-}
-
-package CXGN::Phylo::Text_tree_renderer;
-
-use base qw/ CXGN::Phylo::Abstract_tree_renderer /;
-
-sub new {
- my $class = shift;
- my $self = $class->SUPER::new(@_);
- return $self;
-}
-
-=head2 function render()
-
- Synopsis:
- Arguments:
- Returns:
- Side effects:
- Description: a simple rendering of the tree, output to the STDOUT.
-
-=cut
-
-sub render {
- my $self = shift;
- $self->get_tree()->get_root()->recursive_text_render();
-}
-
-=head1 Package CXGN::Phylo::PNG_tree_renderer
-
-Renders tree as PNG
-
-=cut
-
-package CXGN::Phylo::PNG_tree_renderer;
-
-use base qw/ CXGN::Phylo::Abstract_tree_renderer /;
-
-=head2 function new()
-
- Synopsis:
- Arguments: a tree object
- Returns: An instance of a PNG_tree_renderer object.
- Side effects: Sets some defaults for drawing the tree.
- These can be overridden by calling the corresponding
- setter functions.
- Description:
-
-=cut
-
-sub new {
- my $class = shift;
- my $self = $class -> SUPER::new(@_);
- $self->set_font(GD::Font->Small());
- $self->set_transparent(0);
-
- $self->{bl_labels} = []; #need to keep array of these for image map
-
- return $self;
-}
-
-=head2 function get_font()
-
- Synopsis:
- Arguments:
- Returns:
- Side effects:
- Description:
-
-=cut
-
-sub get_font {
- my $self=shift;
- return $self->{font};
-}
-
-=head2 function set_font()
-
- Synopsis: $r->set_font($font)
- Arguments: a GD font
- Returns: nothing
- Side effects: $font will be used for rendering the tree labels
- Description:
-
-=cut
-
-sub set_font {
- my $self=shift;
- $self->{font}=shift;
-}
-
-=head2 function get_font_width
-
- Synopsis:
- Arguments:
- Returns:
- Side effects:
- Description:
-
-=cut
-
-sub get_font_width {
- my $self=shift;
- return $self->get_font()->width();
-}
-
-=head2 function get_font_height
-
- Synopsis:
- Arguments:
- Returns:
- Side effects:
- Description:
-
-=cut
-
-sub get_font_height {
- my $self=shift;
- return $self->get_font()->height();
-}
-
-=head2 function get_show_branch_length()
-
- Synopsis:
- Arguments:
- Returns:
- Side effects:
- Description:
-
-=cut
-
-sub get_show_branch_length {
- my $self=shift;
- return $self->{show_branch_length};
-}
-
-=head2 function set_show_branch_length()
-
- Synopsis:
- Arguments:
- Returns:
- Side effects:
- Description:
-
-=cut
-
-sub set_show_branch_length {
- my $self=shift;
- $self->{show_branch_length}=shift;
-}
-
-sub hide_alignment {
- my $self = shift;
- $self->{hide_alignment} = 1;
-}
-
-sub show_alignment {
- my $self = shift;
- $self->{hide_alignment} = 0;
-}
-
-sub set_transparent {
- my $self = shift;
- $self->{transparent} = shift;
-}
-
-sub get_transparent {
- my $self = shift;
- return $self->{transparent};
-}
-
-=head2 function render()
-
- Synopsis:
- Arguments: (optional) a boolean for printing labels for all nodes (default- label is printed only for leaves, unless get_hide_label() = true )
- Returns: a png image
- Side effects: creates a GD::Image object and renders the tree on it.
- Description: draws the tree
-
-=cut
-
-sub render {
- my $self = shift;
- my $print_all_labels=shift;
-# GD::Image->trueColor(1);
- # initialize image
- my $tree = $self->get_tree;
- my $layout = $tree->get_layout;
- $layout->layout();
- my ($phylo_width, $phylo_height) = ($layout->get_image_width, $layout->get_image_height);
-
-
- my $image = GD::Image->new($phylo_width,$phylo_height, 1);
- my $white = $image->colorResolve(255,255,255);
-# if($self->{transparent}){
-# $image->transparent($white);
-# }
- $image->filledRectangle(0 , 0, $phylo_width, $phylo_height, $white);
- $tree->get_root()->recursive_propagate_properties();
- # get the font
- #
- my $font = GD::Font->Small();
- if ($layout->get_vertical_gap()<12) {
- $font= GD::Font->Tiny();
- }
- $self->set_font($font);
- # percolate font information to all nodes
- foreach my $n ($tree->get_all_nodes()) {
- $n->get_label()->set_font($self->get_font);
- }
-
- my $color = $image->colorResolve($tree->get_line_color());
-
- if ($tree->get_alignment && !$self->{hide_alignment}) {
- my $alignment = $tree->get_alignment;
- my $partition_width;
- if($alignment->{label_shown}){
- $partition_width = $phylo_width/3;
- }
- else {
- $partition_width = $phylo_width/2;
- }
- print STDERR "Showing Alignment...\n";
-
- #Calculate node coordinates:
- $layout->set_image_width($partition_width);
- $layout->layout();
-
- #Set Alignment Image attributes
-
-
- $alignment->set_image($image);
- $alignment->set_display_type("alignment"); #alignment and ruler only
- $alignment->set_left_margin($partition_width+5);
- my $label_spacer = 5;
- my $gap = $label_spacer;
-# if($alignment->{label_shown}){
-# $gap = $alignment->_calculate_label_gap + $label_spacer;
-# }
- $alignment->set_width($partition_width);
- $alignment->set_height($layout->get_image_height);
- $alignment->_add_ruler();
- $alignment->{width_adjustment} = 0;
- $alignment->_add_ruler();
- $alignment->{ruler}->set_width($partition_width-$gap);
- $alignment->{ruler}->set_label_spacer($label_spacer);
- $alignment->{ruler}->hide_unit();
- $alignment->{ruler}->set_top_margin($alignment->{ruler}->{font}->height());
-
- my %member_is_shown = ();
- #Position each alignment member
- foreach my $leaf ($tree->get_leaf_list()) {
- my $m = $leaf->get_alignment_member;
- next unless $m;
- $m->set_width($partition_width-$gap);
- $m->set_label_spacer($label_spacer);
- my $height = $layout->get_vertical_gap;
-# $height = 20 if($height>20);
- $m->set_height($height);
- $m->set_top_margin($leaf->get_Y);
- my $id = $m->get_id;
- $member_is_shown{$id} = 1;
- }
-
- foreach my $m (@{$alignment->{members}}){
- my $id = $m->get_id;
- $m->hide_seq unless $member_is_shown{$id};
- }
-
- #Render the alignment on the set image
- $alignment->render();
- }
- $self->recursive_draw($tree->get_root(), $image, $color, "", $print_all_labels );
- return $image->png();
-}
-
-
-
-sub recursive_draw {
- my $self = shift;
- my $node = shift;
- my $image = shift;
- my $color = shift;
- my $parent_hilited = shift;
- my $print_all_labels=shift;
-
- my $line_color = $color;
-
- my @children = $node->get_children();
-
+
+=head1 Package CXGN::Phylo::Text_tree_renderer
+
+Abstract_tree_renderer - an interface to define tree renderers
+
+=head1 DESCRIPTION
+
+=head1 AUTHOR
+
+Lukas Mueller (lam87@cornell.edu)
+
+=cut
+
+=head1 Package Abstract_tree_renderer
+
+This class essentially defines an interface for tree renderers.
+It contains two functions: new() and render().
+Additional functions can be defined as necessary in the derived classes.
+
+=cut
+
+package CXGN::Phylo::Abstract_tree_renderer;
+
+use List::Util qw ( min max sum );
+use GD;
+
+GD::Image->trueColor(1);
+
+=head2 function new()
+
+ Synopsis: abstract class from which tree renderer can be implemented.
+ Arguments:
+ Returns:
+ Side effects:
+ Description:
+
+=cut
+
+sub new {
+ my $class = shift;
+ my $tree = shift;
+ my $args = {};
+ my $self = bless $args, $class;
+
+ # THE FOLLOWING WAS MOVED TO THE TREE OBJECT:
+ # set a default layout object, which is Layout
+ # (the tree is rendered from left to right)
+ #
+ #my $layout_object = CXGN::Phylo::Layout->new($tree);
+ #$self->set_layout($layout_object);
+
+ $self->set_tree($tree);
+ return $self;
+}
+
+sub set_tree {
+ my $self = shift;
+ $self->{tree} = shift;
+}
+
+sub get_tree {
+ my $self = shift;
+ return $self->{tree};
+}
+
+# =head2 function get_layout()
+
+# Synopsis:
+# Arguments:
+# Returns:
+# Side effects:
+# Description: Accessor for the layout object for this renderer.
+
+# =cut
+
+# sub get_layout {
+# my $self=shift;
+# return $self->{layout};
+# }
+
+# =head2 function set_layout()
+
+# Synopsis:
+# Arguments: an instance of a Layout object or a subclass thereof.
+# Returns:
+# Side effects: the layout function determines how the tree is laid out,
+# the standard way is the root node is placed on the left
+# and the children on the right. Other layout objects will
+# lay out trees with other orientations.
+# Description: setter function for the layout object for this renderer.
+
+# =cut
+
+# sub set_layout {
+# my $self=shift;
+# $self->{layout}=shift;
+# }
+
+sub render {
+ die "You are attempting to use the abstract class Abstract_tree_renderer. Please subclass.\n";
+}
+
+package CXGN::Phylo::Text_tree_renderer;
+
+use base qw/ CXGN::Phylo::Abstract_tree_renderer /;
+
+sub new {
+ my $class = shift;
+ my $self = $class->SUPER::new(@_);
+ return $self;
+}
+
+=head2 function render()
+
+ Synopsis:
+ Arguments:
+ Returns:
+ Side effects:
+ Description: a simple rendering of the tree, output to the STDOUT.
+
+=cut
+
+sub render {
+ my $self = shift;
+ $self->get_tree()->get_root()->recursive_text_render();
+}
+
+=head1 Package CXGN::Phylo::PNG_tree_renderer
+
+Renders tree as PNG
+
+=cut
+
+package CXGN::Phylo::PNG_tree_renderer;
+
+use base qw/ CXGN::Phylo::Abstract_tree_renderer /;
+
+=head2 function new()
+
+ Synopsis:
+ Arguments: a tree object
+ Returns: An instance of a PNG_tree_renderer object.
+ Side effects: Sets some defaults for drawing the tree.
+ These can be overridden by calling the corresponding
+ setter functions.
+ Description:
+
+=cut
+
+sub new {
+ my $class = shift;
+ my $self = $class->SUPER::new(@_);
+ $self->set_font( GD::Font->Small() );
+ $self->set_transparent(0);
+
+ $self->{bl_labels} = []; #need to keep array of these for image map
+
+ return $self;
+}
+
+=head2 function get_font()
+
+ Synopsis:
+ Arguments:
+ Returns:
+ Side effects:
+ Description:
+
+=cut
+
+sub get_font {
+ my $self = shift;
+ return $self->{font};
+}
+
+=head2 function set_font()
+
+ Synopsis: $r->set_font($font)
+ Arguments: a GD font
+ Returns: nothing
+ Side effects: $font will be used for rendering the tree labels
+ Description:
+
+=cut
+
+sub set_font {
+ my $self = shift;
+ $self->{font} = shift;
+}
+
+=head2 function get_font_width
+
+ Synopsis:
+ Arguments:
+ Returns:
+ Side effects:
+ Description:
+
+=cut
+
+sub get_font_width {
+ my $self = shift;
+ return $self->get_font()->width();
+}
+
+=head2 function get_font_height
+
+ Synopsis:
+ Arguments:
+ Returns:
+ Side effects:
+ Description:
+
+=cut
+
+sub get_font_height {
+ my $self = shift;
+ return $self->get_font()->height();
+}
+
+=head2 function get_show_branch_length()
+
+ Synopsis:
+ Arguments:
+ Returns:
+ Side effects:
+ Description:
+
+=cut
+
+sub get_show_branch_length {
+ my $self = shift;
+ return $self->{show_branch_length};
+}
+
+=head2 function set_show_branch_length()
+
+ Synopsis:
+ Arguments:
+ Returns:
+ Side effects:
+ Description:
+
+=cut
+
+sub set_show_branch_length {
+ my $self = shift;
+ $self->{show_branch_length} = shift;
+}
+
+sub hide_alignment {
+ my $self = shift;
+ $self->{hide_alignment} = 1;
+}
+
+sub show_alignment {
+ my $self = shift;
+ $self->{hide_alignment} = 0;
+}
+
+sub set_transparent {
+ my $self = shift;
+ $self->{transparent} = shift;
+}
+
+sub get_transparent {
+ my $self = shift;
+ return $self->{transparent};
+}
+
+=head2 function render()
+
+ Synopsis:
+ Arguments: (optional) a boolean for printing labels for all nodes (default- label is printed only for leaves, unless get_hide_label() = true )
+ Returns: a png image
+ Side effects: creates a GD::Image object and renders the tree on it.
+ Description: draws the tree
+
+=cut
+
+sub render {
+ my $self = shift;
+ my $print_all_labels = shift;
+
+ # initialize image
+ my $tree = $self->get_tree;
+ my $layout = $tree->get_layout;
+ $layout->layout();
+ my ( $phylo_width, $phylo_height ) = ( $layout->get_image_width, $layout->get_image_height );
+
+ my $image = GD::Image->new( $phylo_width, $phylo_height, 1 );
+ my $white = $image->colorResolve( 255, 255, 255 );
+
+ $image->filledRectangle( 0, 0, $phylo_width, $phylo_height, $white );
+ $tree->get_root()->recursive_propagate_properties('hidden');
+
+ # get the font
+ #
+ my $font = GD::Font->Small();
+ if ( $layout->get_vertical_gap() < 12 ) {
+ $font = GD::Font->Tiny();
+ }
+ $self->set_font($font);
+
+ # percolate font information to all nodes
+ foreach my $n ( $tree->get_all_nodes() ) {
+ $n->get_label()->set_font( $self->get_font );
+ }
+
+ my $color = $image->colorResolve( $tree->get_line_color() );
+
+ if ( $tree->get_alignment && !$self->{hide_alignment} ) {
+ my $alignment = $tree->get_alignment;
+ my $partition_width;
+ if ( $alignment->{label_shown} ) {
+ $partition_width = $phylo_width / 3;
+ } else {
+ $partition_width = $phylo_width / 2;
+ }
+ print STDERR "Showing Alignment...\n";
+
+ #Calculate node coordinates:
+ $layout->set_image_width($partition_width);
+ $layout->layout();
+
+ #Set Alignment Image attributes
+ $alignment->set_image($image);
+ $alignment->set_display_type("alignment"); #alignment and ruler only
+ $alignment->set_left_margin( $partition_width + 5 );
+ my $label_spacer = 5;
+ my $gap = $label_spacer;
+
+ $alignment->set_width($partition_width);
+ $alignment->set_height( $layout->get_image_height );
+ $alignment->_add_ruler();
+ $alignment->{width_adjustment} = 0;
+ $alignment->_add_ruler();
+ $alignment->{ruler}->set_width( $partition_width - $gap );
+ $alignment->{ruler}->set_label_spacer($label_spacer);
+ $alignment->{ruler}->hide_unit();
+ $alignment->{ruler}->set_top_margin( $alignment->{ruler}->{font}->height() );
+
+ my %member_is_shown = ();
+
+ #Position each alignment member
+ foreach my $leaf ( $tree->get_leaf_list() ) {
+ my $m = $leaf->get_alignment_member;
+ next unless $m;
+ $m->set_width( $partition_width - $gap );
+ $m->set_label_spacer($label_spacer);
+ my $height = $layout->get_vertical_gap;
+
+ # $height = 20 if($height>20);
+ $m->set_height($height);
+ $m->set_top_margin( $leaf->get_Y );
+ my $id = $m->get_id;
+ $member_is_shown{$id} = 1;
+ }
+
+ foreach my $m ( @{ $alignment->{members} } ) {
+ my $id = $m->get_id;
+ $m->hide_seq unless $member_is_shown{$id};
+ }
+
+ #Render the alignment on the set image
+ $alignment->render();
+ }
+ $self->recursive_draw( $tree->get_root(), $image, $color, $print_all_labels );
+ return $image->png();
+}
+
+sub recursive_draw {
+ my $self = shift;
+ my $node = shift;
+ my $image = shift;
+ my $color = shift;
+ # my $parent_hilited = shift; # why is this needed ????
+ my $print_all_labels = shift;
+
+ my $line_color = $color;
+
+ my @children = $node->get_children();
+
#print STDERR "Node: ".$node->get_name()." X: ".$node->get_X()." Y: ".$node->get_Y()."\n";
-
- # generate the color for the clickable area of the nodes
- #
- my $node_color = $image->colorResolve(150,150,250); # color for normal (non-hilited) nodes
-
- # get the hilite color for the hilited nodes
- #
-# print STDERR "name: ", $node->get_name(), "node implicit names:[", join(":", $node->get_implicit_names()), "] speciation:[", $node->get_attribute("speciation"), "] ";
- if ($node->get_hilited() ){ #or $node->get_attribute("speciation") ) {
- $color = $image->colorResolve($self->get_tree()->get_hilite_color()); # this is the color of the hilited subtree
- if (!$parent_hilited or $node->get_attribute("speciation")) {
- #Original Hilited Node
- my @hcolor = $self->get_tree()->get_hilite_color();
- my $max = 0;
- #Saturate the predominant color component
- foreach (@hcolor) {
- $max = $_ if $_>=$max;
- }
- foreach (@hcolor) {
- if ($_>=$max) {
- $_ = 255;
- } else {
- $_ *= 0.6;
- }
- }
- $node_color = $image->colorResolve(@hcolor); # color of root of hilited subtree (bright red)
- } else {
- $node_color = $color; # color of rest of hilited subtree (dull red)
- }
- }
-
- if ($node->is_root()) {
- my @line_color = $node->get_line_color();
- # print STDERR "line color: ", join(";", @line_color), "\n";
- $line_color = $image->colorResolve(@line_color) if (@line_color==3);
- my $model = $node->get_attribute("model");
- if ($model) {
- my $color_array = $self->get_model_color($model);
- $line_color = $image->colorResolve(@$color_array) if (ref($color_array) eq "ARRAY");
- }
- $line_color = $color if $node->get_hilited();
- $self->connect_nodes(undef, $node, $image, $line_color);
- }
-
- unless($node->get_hidden()){# (don't draw children of hidden nodes)
- foreach my $c (@children) {
- my @line_color = $c->get_line_color();
- $line_color = $image->colorResolve(@line_color) if (@line_color==3);
- my $model = $c->get_attribute("model");
- if ($model) {
- my $color_array = $self->get_model_color($model);
- $line_color = $image->colorResolve(@$color_array) if (ref($color_array) eq "ARRAY");
- }
- $line_color = $color if $node->get_hilited();
- # print STDERR "line color: ", join(";", @line_color), "\n";
- $self->connect_nodes($node, $c, $image, $line_color);
- $self->recursive_draw($c, $image, $color,$node->get_hilited(), $print_all_labels);
- }
- }
-
- if (!($node->is_leaf() || ($node->is_hidden()))) {
- $image -> filledRectangle(($node->get_X()-2), ($node->get_Y()-2),
- ($node->get_X()+2), ($node->get_Y+2), $node_color);
- } elsif ($node->is_leaf()) {
- $image -> filledArc(($node->get_X()), ($node->get_Y()), 7, 7, 0, 360, $node_color);
- # my $label = $node->get_label;
- # my $layout = $self->get_tree()->get_layout();
- # my $begin_x = $label->{font}->width()*length($label->get_name()) + $node->get_X + 5;
- # $begin_x = int($begin_x);
- # $image->line($begin_x, $node->get_Y, $begin_x+50, $node->get_Y, $node_color);
-
- }
-
- # if the node is hidden, draw the hidden symbol
- #
- if ($node->get_hidden()) {
- $image -> rectangle(($node->get_X()-3), ($node->get_Y()-3),
- ($node->get_X()+3), ($node->get_Y()+3), $color);
-
- $image -> line(($node->get_X()), ($node->get_Y()-2),
- ($node->get_X()), ($node->get_Y()+2), $color);
- $image -> line(($node->get_X()-2), ($node->get_Y()),
- ($node->get_X()+2), ($node->get_Y()), $color);
- }
-
- # draw the label if it it visible
- #
- if ( (!$node->get_hide_label() and $node->is_leaf()) || $print_all_labels ) {
- $node->get_label()->set_reference_point(($node->get_X()), ($node->get_Y()));
- if (!$node->is_leaf() && $node->get_children() % 2 > 0) {
- $node->get_label()->set_reference_point(($node->get_X()), ($node->get_Y()-1-$node->get_label()->get_font()->height/2));
- }
- $node->get_label()->set_orientation_horizontal();
- $node->get_label()->render($image);
- }
-}
-
-=head2 function connect_nodes()
-
- Synopsis:
- Arguments: a parent node object, a child node object, a GD image object and a GD color
- Returns:
- Side effects: draws the connection between a parent and a child node.
- Description: the default style is a connection that is broken into to perpendicular
- lines. This can be overridden in subclasses to draw different
- styles of trees
-
-=cut
-
-sub connect_nodes {
- my $self = shift;
- my $p_node = shift;
- my $c_node = shift;
- my $image = shift;
- my $color = shift;
-
- if (!$p_node) {
- $p_node=$c_node->copy();
- $p_node->set_X($self->get_tree()->get_layout()->get_left_margin());
- }
- $self->display_branch_length($image, $p_node, $c_node, $color);
- $image->setAntiAliased($color);
- $image -> line ($p_node->get_X(), $p_node->get_Y(),
- $p_node->get_X(), $c_node->get_Y(), $color);
-
- $image -> line ($p_node->get_X(), $c_node->get_Y(),
- $c_node->get_X(), $c_node->get_Y(), $color);
-
-}
-
-sub display_branch_length {
- my $self = shift;
-
- # if we are supposed to display the branch lengths, we do so...
- return unless $self->get_show_branch_length();
-
- my $image = shift;
- my $p_node = shift;
- my $c_node = shift;
- my $color = shift;
-
-
- my $label_text = $c_node->get_branch_length();
- my $branch_length_label = CXGN::Phylo::Label->new($label_text);
-
-
- my @color = (200, 200, 200);
- my $blc_sum = 300; # the branch length color will be the branch color normalized s.t. sum of r,g and b = blc_sum; so make this smaller if branch lengths are too light to read easily
- if ($color) {
- @color = $image->rgb($color);
- my $bc_sum = ($color[0] + $color[1] + $color[2]);
- my $norm_factor = ($bc_sum == 0)? 1: $blc_sum/$bc_sum;
- $norm_factor = 1 if($norm_factor > 1);
- @color = map { $_*$norm_factor } @color; #branch color normalized to have sum of r,g,b=tsum
-
- my $model = $c_node->get_attribute("model");
- if ($model =~ /\w+/) {
- $branch_length_label->set_tooltip("Calculated by " . ucfirst($model) . " model");
- }
- }
-
- $branch_length_label->set_reference_point(
- $self->get_blen_ref_pt($p_node, $c_node, $label_text)
- );
- $branch_length_label->set_font(GD::Font->Tiny());
- $branch_length_label->set_hidden(0);
- $branch_length_label->align_center();
- $branch_length_label->set_text_color(@color);
-
- $branch_length_label->render($image);
-
- push(@{$self->{bl_labels}}, $branch_length_label);
-}
-
-sub get_blen_ref_pt {
- my $self = shift;
- my $p_node = shift;
- my $c_node = shift;
-my $bl_scaled = ($c_node->get_transformed_branch_length($self->get_tree()->get_shown_branch_length_transformation()) * $self->get_tree()->get_layout()->get_horizontal_scaling_factor());
- return (
- $c_node->get_X()-$bl_scaled/2,
- $c_node->get_Y()-$self->get_font_height()/2
- );
-}
-
-
-sub hilite_model {
- my $self = shift;
- my $model = shift;
- my $color_array = shift;
- $self->{model_colors}->{$model} = $color_array;
-}
-
-sub get_model_color {
- my $self = shift;
- my $model = shift;
- my $color_array = $self->{model_colors}->{$model};
- return unless (ref($color_array) eq "ARRAY");
- return $color_array if (@$color_array == 3);
-}
-
-=head2 function get_html_image_map()
-
- Synopsis: my $image_map=$renderer->get_html_image_map($name, $temp_filename, $hilite_temp, $align_type );
- Arguments: image_map name, filename
- Returns: a html imagemap that can be used to embed links into the tree image.
- Side effects:
- Description: use $node->set_link($url) to set the link to $url.
-
-=cut
-
-sub get_html_image_map {
-# print STDERR "PNG_tree_renderer: in: get_html_image_map()\n";
- my $self=shift;
- my $name =shift;
- my ($whole_temp, $hilite_temp, $align_type) = @_;
- my $map = $self->recursive_image_map_coords($self->get_tree()->get_root());
- my $tree = $self->get_tree();
-
- #Image Map portion for Alignment, if align temp file provided:
- if($tree->get_alignment && $whole_temp){
- $hilite_temp = $whole_temp unless $hilite_temp;
- foreach my $l ($tree->get_leaf_list){
- my $m = $l->get_alignment_member();
- next unless $m;
- my $temp = "";
- my $title = "";
- ($l->get_hilited)?($temp = $hilite_temp):($temp = $whole_temp);
- ($l->get_hilited)?($title = "See alignment for highlighted region only"):($title = "See alignment for all visible members");
-
- ($temp) = $temp =~ /([^\/]+)$/;
- my $coords = join ",", ($m->get_enclosing_rect);
- my $url = "/tools/align_viewer/show_align.pl?temp_file=$temp&type=$align_type&title=Selection%20From%20Tree";
- my $target = "_SGN_ALIGN_" . int(rand(999999999));
- $map .= "\n";
- }
- }
- foreach my $bl_label (@{$self->{bl_labels}}){
- next unless $bl_label->get_tooltip();
- my $coords = join ",", ($bl_label->get_enclosing_rect);
- #print STDERR "--------RENDERED.PM------". $bl_label->get_onmouseover()."------";
- $map .= "\n";
- }
-
- my $maptag = qq( \n );
- return $maptag;
-}
-
-sub recursive_image_map_coords {
- my $self = shift;
- my $node =shift;
-
- my $tooltip = $node->get_tooltip();
- $tooltip ||= "Node " . $node->get_node_key() . ": " . $node->get_label()->get_name();
- #print STDERR "RECURSIVE_IMAGE_MAP_COORDS\n";
- my $map = do { no warnings 'uninitialized';
- "get_name()."\" coords=\""
- . int(($node->get_X())-3) . ",".int(($node->get_Y())-3).",".int(($node->get_X())+3).",".int(($node->get_Y())+3)
- . "\" href=\"".$node->get_link()
- ."\" title=\"$tooltip\" onmouseover=\"".$node->get_onmouseover()."\" onmouseout=\"".$node->get_onmouseout()."\" alt=\"$tooltip\" />\n"};
-
- $map .= $node->get_label()->get_html_image_map();
- my @children = $node->get_children();
- foreach my $c (@children) {
- #print STDERR "checking out the children...\n";
- $map .= $self->recursive_image_map_coords($c);
- }
- return $map;
-}
-
-
-
-package CXGN::Phylo::PNG_angle_tree_renderer;
-
-use GD;
-
-use base qw/ CXGN::Phylo::PNG_tree_renderer /;
-
-sub new {
- my $class = shift;
- my $self = $class -> SUPER::new(@_);
- return $self;
-}
-
-sub connect_nodes {
- my $self = shift;
- my $p_node= shift;
- my $c_node = shift;
- my $image = shift;
- my $color = shift;
-
-
- if (!$p_node) {
- $p_node=$c_node->copy();
- $p_node->set_X($self->get_tree()->get_layout()->get_left_margin());
- }
- $self->display_branch_length($image, $p_node, $c_node, $color);
- $image->setAntiAliased($color);
- $image->line ($p_node->get_X(),
- $p_node->get_Y(),
- $c_node->get_X(),
- $c_node->get_Y(),gdAntiAliased);
-
-
-
-}
-
-sub get_blen_ref_pt {
- my ($self, $p_node, $c_node, $text) = @_;
-
- my ($dx, $dy) = ($c_node->get_X() - $p_node->get_X(), abs($c_node->get_Y()-$p_node->get_Y()));
- my $angle = atan2($dy, $dx);
- my $pi = 3.14159265359;
- my $max = $pi/2;
- my $halfw = (length($text)*$self->get_font()->width)/3;
- my $x_shift = -$halfw*($angle)/$max;
-
- my $flip = 1;
- $flip = -1 if ($c_node->get_Y() > $p_node->get_Y());
-
-
-my $bl_scaled = ($c_node->get_transformed_branch_length($self->get_tree()->get_shown_branch_length_transformation()) * $self->get_tree()->get_layout()->get_horizontal_scaling_factor());
-
- return (
- $c_node->get_X() - $bl_scaled/2 + $x_shift,
- ($c_node->get_Y()+$p_node->get_Y())/2 - $flip*$self->get_font_height()/2
- );
-}
-
-package CXGN::Phylo::PNG_round_tree_renderer;
-use GD;
-use base qw/ CXGN::Phylo::PNG_tree_renderer /;
-sub new {
- my $class = shift;
- my $self = $class -> SUPER::new(@_);
- return $self;
-}
-
-sub connect_nodes {
- my $self = shift;
- my $p_node= shift;
- my $c_node = shift;
- my $image = shift;
- my $color = shift;
-
-
- if (!$p_node) {
- $p_node=$c_node->copy();
- $p_node->set_X($self->get_tree()->get_layout()->get_left_margin());
- }
-
- $self->display_branch_length($image, $p_node, $c_node, $color);
-
- $image->setAntiAliased($color);
- $image -> arc($c_node->get_X(),
- $p_node->get_Y(),
- ($c_node->get_X()-$p_node->get_X())*2,
- ($p_node->get_Y()-$c_node->get_Y())*2,
- 180,
- 270,
- gdAntiAliased
- );
-}
-
-sub get_blen_ref_pt {
- my ($self, $p_node, $c_node, $text) = @_;
- my ($dx, $dy) = ($c_node->get_X() - $p_node->get_X(), abs($c_node->get_Y()-$p_node->get_Y()));
- my $angle = atan2($dy, $dx);
- my $pi = 3.14159265359;
- my $max = $pi/2;
- my $halfw = (length($text)*$self->get_font()->width)/2;
- my $x_shift = -$halfw*($angle)/$max;
-
- my $flip = 1;
- $flip = -1 if ($c_node->get_Y() > $p_node->get_Y());
-
- #Try putting at "center of mass" of right triangle:
- return ( $p_node->get_X()+($c_node->get_X() - $p_node->get_X())/3 +$x_shift,
- $c_node->get_Y()-($c_node->get_Y() - $p_node->get_Y())/3 - $flip*($self->get_font()->height()/2) );
-}
-
-1;
+
+ # generate the color for the clickable area of the nodes
+
+ # set node_color to default color for normal (non-hilited) nodes
+ my $node_color = $image->colorResolve( 150, 150, 250 );
+
+ # get the hilite color for the hilited nodes
+ #
+# print STDERR "name: ", $node->get_name(), " node implicit names:[",
+# join( ":", @{ $node->get_implicit_names() } ), "] speciation:[",
+# $node->get_attribute("speciation"), "] hilite:[", $node->get_hilited(),
+# "]\n";
+
+ # if node's subtree has at least one species in species tree ...
+ if ( $node->get_hilited()
+ and ( $node->get_attribute('species_bit_pattern') != 0 ) )
+ { #or $node->get_attribute("speciation") ) {
+ my @hcolor = $self->get_tree()->get_hilite_color();
+
+ # this is the (dull red) color of the hilited subtree:
+ $color = $image->colorResolve(@hcolor);
+
+ # if ( # does this: '(!$parent_hilited) or' do anything useful?
+ if ( $node->get_attribute("speciation") ) { # Saturate the predominant color component(s)
+ my $max = List::Util::max(@hcolor);
+ foreach (@hcolor) {
+ $_ = ( $_ >= $max ) ? 255 : $_ * 0.6;
+ }
+
+ $node_color = $image->colorResolve(@hcolor); # color of speciation nodes (bright red).
+ } else {
+ $node_color = $color; # color of rest of hilited subtree (dull red).
+ }
+ }
+
+ if (0) { # is this needed for anything???
+ if ( $node->is_root() ) {
+ my @line_color = $node->get_line_color();
+
+ # print STDERR "line color: ", join(";", @line_color), "\n";
+ $line_color = $image->colorResolve(@line_color)
+ if ( @line_color == 3 );
+ my $model = $node->get_attribute("model");
+ if ($model) {
+ my $color_array = $self->get_model_color($model);
+ $line_color = $image->colorResolve(@$color_array)
+ if ( ref($color_array) eq "ARRAY" );
+ }
+ $line_color = $color if $node->get_hilited();
+ $self->connect_nodes( undef, $node, $image, $line_color ); # what does this do (with undef as arg)?
+ }
+ }
+
+ unless ( $node->get_hidden() ) { # (don't draw children of hidden nodes)
+ foreach my $c (@children) {
+ my @line_color = $c->get_line_color();
+
+ # @line_color = (0,200,0);
+ $line_color = $image->colorResolve(@line_color)
+ if ( @line_color == 3 );
+ my $model = $c->get_attribute("model");
+ if ($model) {
+ my $color_array = $self->get_model_color($model);
+ $line_color = $image->colorResolve(@$color_array)
+ if ( ref($color_array) eq "ARRAY" );
+ }
+ $line_color = $color if $node->get_hilited();
+
+ # print STDERR "line color: ", join(";", @line_color), "\n";
+ $self->connect_nodes( $node, $c, $image, $line_color );
+ $self->recursive_draw( $c, $image, $color, $print_all_labels );
+ }
+ }
+
+ # image is circle for leaf, square otherwise:
+ if ( !( $node->is_leaf() || ( $node->is_hidden() ) ) ) {
+ $image->filledRectangle(
+ ( $node->get_X() - 2 ),
+ ( $node->get_Y() - 2 ),
+ ( $node->get_X() + 2 ),
+ ( $node->get_Y + 2 ),
+ $node_color
+ );
+ } elsif ( $node->is_leaf() ) {
+ $image->filledArc( ( $node->get_X() ), ( $node->get_Y() ), 7, 7, 0, 360, $node_color );
+ }
+
+ # if the node is hidden, draw the hidden symbol
+ #
+ if ( $node->get_hidden() ) {
+ $image->rectangle(
+ ( $node->get_X() - 3 ),
+ ( $node->get_Y() - 3 ),
+ ( $node->get_X() + 3 ),
+ ( $node->get_Y() + 3 ),
+ $color
+ );
+
+ $image->line( ( $node->get_X() ), ( $node->get_Y() - 2 ), ( $node->get_X() ), ( $node->get_Y() + 2 ), $color );
+ $image->line( ( $node->get_X() - 2 ), ( $node->get_Y() ), ( $node->get_X() + 2 ), ( $node->get_Y() ), $color );
+ }
+
+ # draw the label if it it visible
+ #
+ if ( ( !$node->get_hide_label() and $node->is_leaf() )
+ || $print_all_labels )
+ {
+ $node->get_label()->set_reference_point( ( $node->get_X() ), ( $node->get_Y() ) );
+ if ( !$node->is_leaf() && $node->get_children() % 2 > 0 ) {
+ $node->get_label()->set_reference_point(
+ ( $node->get_X() ),
+ (
+ $node->get_Y() - 1 - $node->get_label()->get_font()->height / 2
+ )
+ );
+ }
+ $node->get_label()->set_orientation_horizontal();
+ $node->get_label()->render($image);
+ }
+}
+
+=head2 function connect_nodes()
+
+ Synopsis:
+ Arguments: a parent node object, a child node object, a GD image object and a GD color
+ Returns:
+ Side effects: draws the connection between a parent and a child node.
+ Description: the default style is a connection that is broken into perpendicular
+ lines. This can be overridden in subclasses to draw different
+ styles of trees
+
+=cut
+
+sub connect_nodes {
+ my $self = shift;
+ my $p_node = shift;
+ my $c_node = shift;
+ my $image = shift;
+ my $color = shift;
+
+ if ( !$p_node ) {
+ $p_node = $c_node->copy();
+ $p_node->set_X( $self->get_tree()->get_layout()->get_left_margin() );
+ }
+ $self->display_branch_length( $image, $p_node, $c_node, $color );
+ $image->setAntiAliased($color);
+ $image->line( $p_node->get_X(), $p_node->get_Y(), $p_node->get_X(), $c_node->get_Y(), $color );
+
+ $image->line( $p_node->get_X(), $c_node->get_Y(), $c_node->get_X(), $c_node->get_Y(), $color );
+
+}
+
+sub display_branch_length {
+ my $self = shift;
+
+ # if we are supposed to display the branch lengths, we do so...
+ return unless $self->get_show_branch_length();
+
+ my $image = shift;
+ my $p_node = shift;
+ my $c_node = shift;
+ my $color = shift;
+
+ my $label_text = $c_node->get_branch_length();
+ my $branch_length_label = CXGN::Phylo::Label->new($label_text);
+
+ my @color = ( 0, 0, 0 ); #default color for branch length text.
+ my $max_blc_sum = 200
+ ; # the branch length color will be the branch color normalized s.t. sum of r,g and b <= max_blc_sum; so make this smaller if branch lengths are too light to read easily
+ if ($color) {
+ @color = $image->rgb($color);
+ my $blc_sum = ( $color[0] + $color[1] + $color[2] );
+ my $norm_factor = ( $blc_sum == 0 ) ? 1 : $max_blc_sum / $blc_sum;
+ $norm_factor = 1
+ if ( $norm_factor > 1 ); # can make it darker but not lighter
+ @color = map { $_ * $norm_factor } @color; # normalize branch color.
+
+ my $model = $c_node->get_attribute("model");
+ if ( $model =~ /\w+/ ) {
+ $branch_length_label->set_tooltip( "Calculated by " . ucfirst($model) . " model" );
+ }
+ }
+
+ $branch_length_label->set_reference_point( $self->get_blen_ref_pt( $p_node, $c_node, $label_text ) );
+ $branch_length_label->set_font( GD::Font->Tiny() );
+ $branch_length_label->set_hidden(0);
+ $branch_length_label->align_center();
+ $branch_length_label->set_text_color(@color);
+ $branch_length_label->render($image);
+
+ push( @{ $self->{bl_labels} }, $branch_length_label );
+}
+
+sub get_blen_ref_pt {
+ my $self = shift;
+ my $p_node = shift;
+ my $c_node = shift;
+ my $bl_scaled =
+ ( $c_node->get_transformed_branch_length( $self->get_tree()->get_shown_branch_length_transformation() ) *
+ $self->get_tree()->get_layout()->get_horizontal_scaling_factor() );
+ return ( $c_node->get_X() - $bl_scaled / 2, $c_node->get_Y() - $self->get_font_height() / 2 );
+}
+
+sub hilite_model {
+ my $self = shift;
+ my $model = shift;
+ my $color_array = shift;
+ $self->{model_colors}->{$model} = $color_array;
+}
+
+sub get_model_color {
+ my $self = shift;
+ my $model = shift;
+ my $color_array = $self->{model_colors}->{$model};
+ return unless ( ref($color_array) eq "ARRAY" );
+ return $color_array if ( @$color_array == 3 );
+}
+
+=head2 function get_html_image_map()
+
+ Synopsis: my $image_map=$renderer->get_html_image_map($name, $temp_filename, $hilite_temp, $align_type );
+ Arguments: image_map name, filename
+ Returns: a html imagemap that can be used to embed links into the tree image.
+ Side effects:
+ Description: use $node->set_link($url) to set the link to $url.
+
+=cut
+
+sub get_html_image_map {
+
+ # print STDERR "PNG_tree_renderer: in: get_html_image_map()\n";
+ my $self = shift;
+ my $name = shift;
+ my ( $whole_temp, $hilite_temp, $align_type ) = @_;
+ my $map = $self->recursive_image_map_coords( $self->get_tree()->get_root() );
+ my $tree = $self->get_tree();
+
+ #Image Map portion for Alignment, if align temp file provided:
+ if ( $tree->get_alignment && $whole_temp ) {
+ $hilite_temp = $whole_temp unless $hilite_temp;
+ foreach my $l ( $tree->get_leaf_list ) {
+ my $m = $l->get_alignment_member();
+ next unless $m;
+ my $temp = "";
+ my $title = "";
+ ( $l->get_hilited )
+ ? ( $temp = $hilite_temp )
+ : ( $temp = $whole_temp );
+ ( $l->get_hilited )
+ ? ( $title = "See alignment for highlighted region only" )
+ : ( $title = "See alignment for all visible members" );
+
+ ($temp) = $temp =~ /([^\/]+)$/;
+ my $coords = join ",", ( $m->get_enclosing_rect );
+ my $url =
+ "/tools/align_viewer/show_align.pl?temp_file=$temp&type=$align_type&title=Selection%20From%20Tree";
+ my $target = "_SGN_ALIGN_" . int( rand(999999999) );
+ $map .= "\n";
+ }
+ }
+ foreach my $bl_label ( @{ $self->{bl_labels} } ) {
+ next unless $bl_label->get_tooltip();
+ my $coords = join ",", ( $bl_label->get_enclosing_rect );
+
+ #print STDERR "--------RENDERED.PM------". $bl_label->get_onmouseover()."------";
+ $map .= "\n";
+ }
+
+ my $maptag = qq( \n );
+ return $maptag;
+}
+
+sub recursive_image_map_coords {
+ my $self = shift;
+ my $node = shift;
+
+ my $tooltip = $node->get_tooltip();
+ $tooltip ||= "Node " . $node->get_node_key() . ": " . $node->get_label()->get_name();
+
+ #print STDERR "RECURSIVE_IMAGE_MAP_COORDS\n";
+ my $map = do {
+ no warnings 'uninitialized';
+ "get_name()
+ . "\" coords=\""
+ . int( ( $node->get_X() ) - 3 ) . ","
+ . int( ( $node->get_Y() ) - 3 ) . ","
+ . int( ( $node->get_X() ) + 3 ) . ","
+ . int( ( $node->get_Y() ) + 3 )
+ . "\" href=\""
+ . $node->get_link()
+ . "\" title=\"$tooltip\" onmouseover=\""
+ . $node->get_onmouseover()
+ . "\" onmouseout=\""
+ . $node->get_onmouseout()
+ . "\" alt=\"$tooltip\" />\n";
+ };
+
+ $map .= $node->get_label()->get_html_image_map();
+ my @children = $node->get_children();
+ foreach my $c (@children) {
+
+ #print STDERR "checking out the children...\n";
+ $map .= $self->recursive_image_map_coords($c);
+ }
+ return $map;
+}
+
+package CXGN::Phylo::PNG_angle_tree_renderer;
+
+use GD;
+
+use base qw/ CXGN::Phylo::PNG_tree_renderer /;
+
+sub new {
+ my $class = shift;
+ my $self = $class->SUPER::new(@_);
+ return $self;
+}
+
+sub connect_nodes {
+ my $self = shift;
+ my $p_node = shift;
+ my $c_node = shift;
+ my $image = shift;
+ my $color = shift;
+
+ if ( !$p_node ) {
+ $p_node = $c_node->copy();
+ $p_node->set_X( $self->get_tree()->get_layout()->get_left_margin() );
+ }
+ $self->display_branch_length( $image, $p_node, $c_node, $color );
+ $image->setAntiAliased($color);
+ $image->line( $p_node->get_X(), $p_node->get_Y(), $c_node->get_X(), $c_node->get_Y(), gdAntiAliased );
+
+}
+
+sub get_blen_ref_pt {
+ my ( $self, $p_node, $c_node, $text ) = @_;
+
+ my ( $dx, $dy ) = ( $c_node->get_X() - $p_node->get_X(), abs( $c_node->get_Y() - $p_node->get_Y() ) );
+ my $angle = atan2( $dy, $dx );
+ my $pi = 3.14159265359;
+ my $max = $pi / 2;
+ my $halfw = ( length($text) * $self->get_font()->width ) / 3;
+ my $x_shift = -$halfw * ($angle) / $max;
+
+ my $flip = 1;
+ $flip = -1 if ( $c_node->get_Y() > $p_node->get_Y() );
+
+ my $bl_scaled =
+ ( $c_node->get_transformed_branch_length( $self->get_tree()->get_shown_branch_length_transformation() ) *
+ $self->get_tree()->get_layout()->get_horizontal_scaling_factor() );
+
+ return ( $c_node->get_X() - $bl_scaled / 2 + $x_shift,
+ ( $c_node->get_Y() + $p_node->get_Y() ) / 2 - $flip * $self->get_font_height() / 2 );
+}
+
+package CXGN::Phylo::PNG_round_tree_renderer;
+use GD;
+use base qw/ CXGN::Phylo::PNG_tree_renderer /;
+
+sub new {
+ my $class = shift;
+ my $self = $class->SUPER::new(@_);
+ return $self;
+}
+
+sub connect_nodes {
+ my $self = shift;
+ my $p_node = shift;
+ my $c_node = shift;
+ my $image = shift;
+ my $color = shift;
+
+ if ( !$p_node ) {
+ $p_node = $c_node->copy();
+ $p_node->set_X( $self->get_tree()->get_layout()->get_left_margin() );
+ }
+
+ $self->display_branch_length( $image, $p_node, $c_node, $color );
+
+ $image->setAntiAliased($color);
+ $image->arc( $c_node->get_X(), $p_node->get_Y(),
+ ( $c_node->get_X() - $p_node->get_X() ) * 2,
+ ( $p_node->get_Y() - $c_node->get_Y() ) * 2,
+ 180, 270, gdAntiAliased );
+}
+
+sub get_blen_ref_pt {
+ my ( $self, $p_node, $c_node, $text ) = @_;
+ my ( $dx, $dy ) = ( $c_node->get_X() - $p_node->get_X(), abs( $c_node->get_Y() - $p_node->get_Y() ) );
+ my $angle = atan2( $dy, $dx );
+ my $pi = 3.14159265359;
+ my $max = $pi / 2;
+ my $halfw = ( length($text) * $self->get_font()->width ) / 2;
+ my $x_shift = -$halfw * ($angle) / $max;
+
+ my $flip = 1;
+ $flip = -1 if ( $c_node->get_Y() > $p_node->get_Y() );
+
+ #Try putting at "center of mass" of right triangle:
+ return ( $p_node->get_X() + ( $c_node->get_X() - $p_node->get_X() ) / 3 + $x_shift,
+ $c_node->get_Y() - ( $c_node->get_Y() - $p_node->get_Y() ) / 3 - $flip * ( $self->get_font()->height() / 2 ) );
+}
+
+1;
diff --git a/lib/CXGN/Phylo/Species_name_map.pm b/lib/CXGN/Phylo/Species_name_map.pm
index a4ccd81e..248ad490 100644
--- a/lib/CXGN/Phylo/Species_name_map.pm
+++ b/lib/CXGN/Phylo/Species_name_map.pm
@@ -90,7 +90,7 @@ sub new{
'Arabidopsis_lyrata',
'Arachis_hypogaea',
'Cajanus_cajan',
- 'Lotus_japonica',
+ 'Lotus_japonicus',
'Cannabis_sativa',
'Malus_domestica',
'Prunus_persica',
@@ -151,7 +151,7 @@ sub new{
$self->set_standard_name('peach', 'Prunus_persica');
$self->set_standard_name('peanut', 'Arachis_hypogaea');
$self->set_standard_name('pigeon_pea', 'Cajanus_cajan');
- $self->set_standard_name('lotus', 'Lotus_japonica');
+ $self->set_standard_name('lotus', 'Lotus_japonicus');
$self->set_standard_name('apple', 'Malus_domestica');
$self->set_standard_name('cannabis', 'Cannabis_sativa');
$self->set_standard_name('hemp', 'Cannabis_sativa');
diff --git a/lib/CXGN/Phylo/TLYstuff/utils.pm b/lib/CXGN/Phylo/TLYstuff/utils.pm
deleted file mode 100644
index 0ccfeca7..00000000
--- a/lib/CXGN/Phylo/TLYstuff/utils.pm
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use warnings;
-
-package Utils;
-
-# read fasta from file with file handle $filehandle
-# # read $max_sequences sequences at concatenate to $chunk_sequence
-# # unless hit eof
-# # return the chunk sequence, first line of next chunk (which is undef if no next chunk)
-# chunk_sequences keeps the newlines as in input
-
-
-sub get_next_chunk{
- my $filehandle = shift;
- my $max_sequences = shift;
- my $chunk_sequence = shift;
- $chunk_sequence ||= "";
- my $seqs_in_chunk = ($chunk_sequence eq "")? 0: 1;
-
- while (<$filehandle>) {
- if (/^\s*>/) { # next id line
- $seqs_in_chunk++;
- if ($seqs_in_chunk > $max_sequences) { # this id belongs to the next chunk
- return ($chunk_sequence, $_);
- }
- }
- $chunk_sequence .= $_;
- }
- return ($chunk_sequence, undef);
-}
-
-
-
-1;
-
diff --git a/lib/CXGN/Phylo/Tree.pm b/lib/CXGN/Phylo/Tree.pm
index 124a6354..b044d22e 100644
--- a/lib/CXGN/Phylo/Tree.pm
+++ b/lib/CXGN/Phylo/Tree.pm
@@ -34,11 +34,14 @@ The tree object also provides the layout and rendering functions. The both layou
use strict;
use warnings;
+use Math::BigInt;
+
use CXGN::Phylo::Node;
use CXGN::Phylo::Species_name_map;
use CXGN::Phylo::Layout;
use CXGN::Phylo::Renderer;
use CXGN::Phylo::Parser;
+use CXGN::Phylo::IdTaxonMap;
use base qw | CXGN::DB::Object |;
@@ -55,116 +58,122 @@ my $do_parse_set_error = 0;
=cut
sub new {
- my $class = shift;
- my $self = bless {}, $class;
- #You can feed constructor with a newick string, which will create
- #a parser object that creates a tree object *without* passing a
- #string, which would lead to an infinite loop. Watch out!
- my $arg = shift;
- my $newick_string = "";
- unless (ref($arg)) {
- # print STDERR "Tree::new. [$newick_string] \n";
- $newick_string = $arg;
-# print STDERR "Tree::new. [$newick_string] \n";
- } else {
- my $newick_file = '';
- if ($arg->{from_files}) {
- $newick_file = $arg->{from_files}->{newick};
- die "Need a newick file if 'from_files' is used\n" unless -f $newick_file;
-
- $self = _tree_from_file($newick_file);
- my $alignment_file = $arg->{from_files}->{alignment};
- if ($alignment_file) {
- die "Alignment file: $alignment_file not found" unless -f $alignment_file;
- my $alignment = CXGN::Phylo::Alignment->new( from_file=>$alignment_file);
- $self->set_alignment($alignment);
- $self->standard_alignment_leaf_association();
- }
- return $self;
- } elsif ($arg->{from_file}) {
- $newick_file = $arg->{from_file};
- $self = _tree_from_file($newick_file);
- return $self;
- }
- }
- if ($newick_string) {
- $newick_string =~ s/\s//g;
- $newick_string =~ s/\n|\r//sg;
- if ($newick_string =~ /^\(.*\)|;$/) { # start with (, end with ) or ;
- # print STDERR "in Tree::new, about to parse the newick_string \n";
- my $parser = CXGN::Phylo::Parse_newick->new($newick_string, $do_parse_set_error);
- print "calling parse in Tree constructor\n";
- my $self = $parser->parse();
- return $self;
- } elsif ($newick_string) {
- print STDERR "String passed not recognized as newick\n";
- }
- }
-
- ##############################################################
- #$self is a new tree, not predefined by newick; instead it will be
- #constructed by methods on this object and Phylo::Node's
-
- #print STDERR "constructing Tree not predefined by a newick\n";
-
- $self->set_unique_node_key(0);
-
- # initialize the root node
- #
- my $root = CXGN::Phylo::Node->new();
- $root->set_name(".");
- $root->set_tree($self);
- $root->set_node_key($self->get_unique_node_key());
- $self->add_node_hash($root, $root->get_node_key());
- $self->set_root($root);
-
- # initialize some imaging parameters
- #
- $self->set_show_labels(1);
- $self->set_hilite_color(255, 0 ,0);
- $self->set_line_color(100, 100, 100);
- $self->set_bgcolor(0, 0, 0);
- $self->set_show_species_in_label(0);
- $self->set_show_standard_species(0);
- $self->set_species_standardizer(CXGN::Phylo::Species_name_map->new());
-
- #Attribute names to show in newick extended format
- $self->{newick_shown_attributes} = {};
- $self->{shown_branch_length_transformation} = "branch_length"; # other possibilities: "proportion_different", equal
- $self->{min_shown_branch_length} = 0.001; # when showing branches graphically, this is added to the displayed length
- $self->set_min_branch_length(0.0001);
- # initialize a default layout and renderer
- #
- $self->set_layout( CXGN::Phylo::Layout->new($self) );
- $self->set_renderer( CXGN::Phylo::PNG_tree_renderer->new($self) );
-
- return $self;
+ my $class = shift;
+ my $self = bless {}, $class;
+
+ #You can feed constructor with a newick string, which will create
+ #a parser object that creates a tree object *without* passing a
+ #string, which would lead to an infinite loop. Watch out!
+ my $arg = shift;
+ my $newick_string = "";
+ unless ( ref($arg) ) {
+
+ # print STDERR "Tree::new. [$newick_string] \n";
+ $newick_string = $arg;
+
+ # print STDERR "Tree::new. [$newick_string] \n";
+ } else {
+ my $newick_file = '';
+ if ( $arg->{from_files} ) {
+ $newick_file = $arg->{from_files}->{newick};
+ die "Need a newick file if 'from_files' is used\n" unless -f $newick_file;
+
+ $self = _tree_from_file($newick_file);
+ my $alignment_file = $arg->{from_files}->{alignment};
+ if ($alignment_file) {
+ die "Alignment file: $alignment_file not found" unless -f $alignment_file;
+ my $alignment = CXGN::Phylo::Alignment->new( from_file => $alignment_file );
+ $self->set_alignment($alignment);
+ $self->standard_alignment_leaf_association();
+ }
+ return $self;
+ } elsif ( $arg->{from_file} ) {
+ $newick_file = $arg->{from_file};
+ $self = _tree_from_file($newick_file);
+ return $self;
+ }
+ }
+ if ($newick_string) {
+ $newick_string =~ s/\s//g;
+ $newick_string =~ s/\n|\r//sg;
+ if ( $newick_string =~ /^\(.*\)|;$/ ) { # start with (, end with ) or ;
+ # print STDERR "in Tree::new, about to parse the newick_string \n";
+ my $parser = CXGN::Phylo::Parse_newick->new( $newick_string, $do_parse_set_error );
+ print "calling parse in Tree constructor\n";
+ my $self = $parser->parse();
+ return $self;
+ } elsif ($newick_string) {
+ print STDERR "String passed not recognized as newick\n";
+ }
+ }
+
+ ##############################################################
+ #$self is a new tree, not predefined by newick; instead it will be
+ #constructed by methods on this object and Phylo::Node's
+
+ #print STDERR "constructing Tree not predefined by a newick\n";
+
+ $self->set_unique_node_key(0);
+
+ # initialize the root node
+ #
+ my $root = CXGN::Phylo::Node->new();
+ $root->set_name(".");
+ $root->set_tree($self);
+ $root->set_node_key( $self->get_unique_node_key() );
+ $self->add_node_hash( $root, $root->get_node_key() );
+ $self->set_root($root);
+
+ # initialize some imaging parameters
+ #
+ $self->set_show_labels(1);
+ $self->set_hilite_color( 255, 0, 0 );
+ $self->set_line_color( 100, 100, 100 );
+ $self->set_bgcolor( 0, 0, 0 );
+ $self->set_show_species_in_label(0);
+ $self->set_show_standard_species(0);
+ $self->set_species_standardizer( CXGN::Phylo::Species_name_map->new() );
+ $self->set_id_taxon_map( CXGN::Phylo::IdTaxonMap->new() );
+
+ #Attribute names to show in newick extended format
+ $self->{newick_shown_attributes} = {};
+ $self->{shown_branch_length_transformation} = "branch_length"; # other possibilities: "proportion_different", equal
+ $self->{min_shown_branch_length} = 0.001; # when showing branches graphically, this is added to the displayed length
+ $self->set_min_branch_length(0.0001);
+
+ # initialize a default layout and renderer
+ #
+ $self->set_layout( CXGN::Phylo::Layout->new($self) );
+ $self->set_renderer( CXGN::Phylo::PNG_tree_renderer->new($self) );
+
+ return $self;
}
# copy some of the tree's fields. Other fields will just have default values as set in constructor
# e.g. layout and renderer aren't copied because there is no copy method for these objects
-sub copy_tree_fields{
- my $self = shift; # source
- my $new = shift; # copy
+sub copy_tree_fields {
+ my $self = shift; # source
+ my $new = shift; # copy
- $new->set_name($self->get_name());
+ $new->set_name( $self->get_name() );
- # initialize some imaging parameters
- #
- $new->set_show_labels($self->get_show_labels());
- $new->set_hilite_color($self->get_hilite_color());
- $new->set_line_color($self->get_line_color());
- $new->set_bgcolor($self->set_bgcolor());
+ # initialize some imaging parameters
+ #
+ $new->set_show_labels( $self->get_show_labels() );
+ $new->set_hilite_color( $self->get_hilite_color() );
+ $new->set_line_color( $self->get_line_color() );
+ $new->set_bgcolor( $self->set_bgcolor() );
- $new->set_species_standardizer($self->get_species_standardizer()->copy()) if (defined $self->get_species_standardizer()) ;
- $new->set_show_species_in_label($self->get_show_species_in_label());
- $new->set_show_standard_species($self->get_show_standard_species());
+ $new->set_species_standardizer( $self->get_species_standardizer()->copy() )
+ if ( defined $self->get_species_standardizer() );
+ $new->set_show_species_in_label( $self->get_show_species_in_label() );
+ $new->set_show_standard_species( $self->get_show_standard_species() );
+
+ #Attribute names to show in newick extended format
+ # $new->{newick_shown_attributes} = $self->{newick_shown_attributes};
+ # @{$new->{newick_shown_attributes}} = @{$self->{newick_shown_attributes}};
+ %{ $new->{newick_shown_attributes} } = %{ $self->{newick_shown_attributes} };
- #Attribute names to show in newick extended format
-# $new->{newick_shown_attributes} = $self->{newick_shown_attributes};
- # @{$new->{newick_shown_attributes}} = @{$self->{newick_shown_attributes}};
- %{$new->{newick_shown_attributes}} = %{$self->{newick_shown_attributes}};
-
}
=head2 function copy()
@@ -178,60 +187,58 @@ sub copy_tree_fields{
=cut
sub copy {
- my $self = shift;
- my $new = $self->get_root()->copy_subtree();
- $new->update_label_names();
- return $new;
+ my $self = shift;
+ my $new = $self->get_root()->copy_subtree();
+ $new->update_label_names();
+ return $new;
}
-
sub _tree_from_file {
- my $file = shift;
- my $tree = _tree_from_newick(_newick_from_file($file));
- $tree->standard_layout();
- return $tree;
+ my $file = shift;
+ my $tree = _tree_from_newick( _newick_from_file($file) );
+ $tree->standard_layout();
+ return $tree;
}
sub _tree_from_newick {
- my $newick_string = shift;
- $newick_string =~ s/\s//g; # remove whitespace from newick string
- $newick_string =~ s/\n|\r//g;
- if($newick_string =~ /^\(.*\)|;$/){
- my $parser = CXGN::Phylo::Parse_newick->new($newick_string, $do_parse_set_error);
- print "parsing tree in Tree::_tree_from_newick\n";
- my $tree = $parser->parse();
- return $tree;
- }
- elsif($newick_string) {
- print STDERR "String passed not recognized as newick\n";
- return undef;
- }
+ my $newick_string = shift;
+ $newick_string =~ s/\s//g; # remove whitespace from newick string
+ $newick_string =~ s/\n|\r//g;
+ if ( $newick_string =~ /^\(.*\)|;$/ ) {
+ my $parser = CXGN::Phylo::Parse_newick->new( $newick_string, $do_parse_set_error );
+ print "parsing tree in Tree::_tree_from_newick\n";
+ my $tree = $parser->parse();
+ return $tree;
+ } elsif ($newick_string) {
+ print STDERR "String passed not recognized as newick\n";
+ return undef;
+ }
}
sub _newick_from_file {
- my $file = shift;
- open(FH, $file) or die "Can't open file: $file\n";
- my $newick = "";
- $newick .= $_ while ();
- close FH;
- $newick =~ s/\n//sg;
- $newick =~ s/\r//sg;
- $newick =~ s/\s//g;
- return $newick;
+ my $file = shift;
+ open( FH, $file ) or die "Can't open file: $file\n";
+ my $newick = "";
+ $newick .= $_ while ();
+ close FH;
+ $newick =~ s/\n//sg;
+ $newick =~ s/\r//sg;
+ $newick =~ s/\s//g;
+ return $newick;
}
sub get_alignment {
- my $self = shift;
- return $self->{alignment};
+ my $self = shift;
+ return $self->{alignment};
}
sub set_alignment {
- my $self = shift;
- $self->{alignment} = shift;
- unless(@{$self->{alignment}->{members}}){
- warn "The alignment set to the tree has no members. You must construct the alignment before setting it here";
- return -1;
- }
+ my $self = shift;
+ $self->{alignment} = shift;
+ unless ( @{ $self->{alignment}->{members} } ) {
+ warn "The alignment set to the tree has no members. You must construct the alignment before setting it here";
+ return -1;
+ }
}
=head2 function standard_alignment_leaf_association()
@@ -242,18 +249,18 @@ sub set_alignment {
=cut
sub standard_alignment_leaf_association {
- my $self = shift;
- my $alignment = $self->get_alignment();
- return unless $alignment;
- my %id2mem = ();
- foreach my $m ($alignment->get_members()) {
- $id2mem{$m->get_id()} = $m;
- }
- foreach my $l ($self->get_leaves()) {
- my $m = $id2mem{$l->get_name()};
- next unless $m;
- $l->set_alignment_member($m);
- }
+ my $self = shift;
+ my $alignment = $self->get_alignment();
+ return unless $alignment;
+ my %id2mem = ();
+ foreach my $m ( $alignment->get_members() ) {
+ $id2mem{ $m->get_id() } = $m;
+ }
+ foreach my $l ( $self->get_leaves() ) {
+ my $m = $id2mem{ $l->get_name() };
+ next unless $m;
+ $l->set_alignment_member($m);
+ }
}
=head2 function get_root()
@@ -267,8 +274,8 @@ sub standard_alignment_leaf_association {
=cut
-sub get_root {
- my $self=shift;
+sub get_root {
+ my $self = shift;
return $self->{root};
}
@@ -291,11 +298,11 @@ sub get_root {
=cut
sub set_root {
- my $self=shift;
- my $new_root=shift;
- $new_root->set_parent(undef); #is_root must be true
+ my $self = shift;
+ my $new_root = shift;
+ $new_root->set_parent(undef); #is_root must be true
$new_root->set_branch_length(undef);
- $self->{root}=$new_root;
+ $self->{root} = $new_root;
}
=head2 function delete_node() and del_node()
@@ -314,26 +321,27 @@ sub set_root {
=cut
sub delete_node {
- my $self = shift;
- my $node_key = shift;
+ my $self = shift;
+ my $node_key = shift;
- # get the node object from the key
- #
- my $node=$self->get_node($node_key);
- return $self->del_node($node);
+ # get the node object from the key
+ #
+ my $node = $self->get_node($node_key);
+ return $self->del_node($node);
}
# delete node by passing node object as argument
# rather than node key as with delete_node
-sub del_node{
- my $self = shift;
- my $node = shift;
- if (!$node) {
- warn 'The node you want to delete does not exist!'; return;
- }
- my $retval = $node->delete_self();
- $self->recalculate_tree_data();
- return $retval;
+sub del_node {
+ my $self = shift;
+ my $node = shift;
+ if ( !$node ) {
+ warn 'The node you want to delete does not exist!';
+ return;
+ }
+ my $retval = $node->delete_self();
+ $self->recalculate_tree_data();
+ return $retval;
}
=head2 function recalculate_tree_data()
@@ -347,11 +355,11 @@ sub del_node{
=cut
-sub recalculate_tree_data {
+sub recalculate_tree_data {
my $self = shift;
$self->calculate_leaf_list();
$self->clear_node_hash();
- $self->regenerate_node_hash($self->get_root());
+ $self->regenerate_node_hash( $self->get_root() );
$self->get_root()->calculate_distances_from_root();
$self->get_root()->recursive_clear_properties();
}
@@ -368,8 +376,8 @@ sub recalculate_tree_data {
=cut
-sub prune_to_subtree {
- my $self = shift;
+sub prune_to_subtree {
+ my $self = shift;
my $new_root_node = shift;
$self->set_root($new_root_node);
@@ -382,7 +390,7 @@ sub prune_to_subtree {
#=cut
-#sub sub_branch {
+#sub sub_branch {
# if (0) {
# my $self = shift;
# my $new_root_node = shift;
@@ -394,7 +402,6 @@ sub prune_to_subtree {
# }
#}
-
=head2 function reset_root()
Synopsis: $tree->reset_root($node);
@@ -410,41 +417,42 @@ sub prune_to_subtree {
=cut
sub reset_root {
- my $self = shift; # tree object
- my $new_root_node = shift; # node object
-
- if (0) { #either of these branches should work.
- my @parents = $new_root_node->get_all_parents(); # parent, grandparent, etc. up to & including root
- $new_root_node->set_parent(undef); # because it is to be the root
- my $pc_blen = $new_root_node->get_branch_length(); # branch length between $pc and $cp
- my $cp=$new_root_node;
- foreach my $pc (@parents) {
- my $former_p_blen = $pc->get_branch_length();
- $pc->remove_child($cp); # removes $cp from $pc's child list
- $cp->add_child_node($pc); # adds $pc as child of $cp, and set $pc's parent to $cp
- $pc->set_branch_length($pc_blen);
- $cp = $pc;
- $pc_blen = $former_p_blen;
- }
- } else {
- my @parents_root_down = reverse $new_root_node->get_all_parents();
- push @parents_root_down, $new_root_node; # need to include the new root in the array
- my $pc = shift @parents_root_down; # pc means goes from being parent to being child
-
- for (my $cp = shift @parents_root_down; defined $cp; $cp = shift @parents_root_down) {
- my $blen = $cp->get_branch_length();
- $pc->remove_child($cp); # remove $cp from children list of $pc
- $cp->set_parent(undef);
- $cp->add_child_node($pc); # now $cp is parent, $pc the child
- $pc->set_branch_length($blen);
- $pc = $cp;
- # at this point we still have a consistent tree, but with the root moved another step along the
- # path from original root to new root.
- }
- }
- $self->set_root($new_root_node);
- $new_root_node->set_branch_length(0);
- $self->recalculate_tree_data();
+ my $self = shift; # tree object
+ my $new_root_node = shift; # node object
+
+ if (0) { #either of these branches should work.
+ my @parents = $new_root_node->get_all_parents(); # parent, grandparent, etc. up to & including root
+ $new_root_node->set_parent(undef); # because it is to be the root
+ my $pc_blen = $new_root_node->get_branch_length(); # branch length between $pc and $cp
+ my $cp = $new_root_node;
+ foreach my $pc (@parents) {
+ my $former_p_blen = $pc->get_branch_length();
+ $pc->remove_child($cp); # removes $cp from $pc's child list
+ $cp->add_child_node($pc); # adds $pc as child of $cp, and set $pc's parent to $cp
+ $pc->set_branch_length($pc_blen);
+ $cp = $pc;
+ $pc_blen = $former_p_blen;
+ }
+ } else {
+ my @parents_root_down = reverse $new_root_node->get_all_parents();
+ push @parents_root_down, $new_root_node; # need to include the new root in the array
+ my $pc = shift @parents_root_down; # pc means goes from being parent to being child
+
+ for ( my $cp = shift @parents_root_down ; defined $cp ; $cp = shift @parents_root_down ) {
+ my $blen = $cp->get_branch_length();
+ $pc->remove_child($cp); # remove $cp from children list of $pc
+ $cp->set_parent(undef);
+ $cp->add_child_node($pc); # now $cp is parent, $pc the child
+ $pc->set_branch_length($blen);
+ $pc = $cp;
+
+ # at this point we still have a consistent tree, but with the root moved another step along the
+ # path from original root to new root.
+ }
+ }
+ $self->set_root($new_root_node);
+ $new_root_node->set_branch_length(0);
+ $self->recalculate_tree_data();
}
=head2 function get_leaf_count()
@@ -457,10 +465,11 @@ sub reset_root {
=cut
-sub get_leaf_count {
+sub get_leaf_count {
my $self = shift;
-# $self->get_root()->count_leaves();
- return scalar $self->get_leaf_list();
+
+ # $self->get_root()->count_leaves();
+ return scalar $self->get_leaf_list();
}
=head2 function get_unhidden_leaf_count()
@@ -470,8 +479,8 @@ sub get_leaf_count {
=cut
sub get_unhidden_leaf_count {
- my $self = shift;
- return scalar grep { !$_->is_hidden } $self->get_leaf_list;
+ my $self = shift;
+ return scalar grep { !$_->is_hidden } $self->get_leaf_list;
}
=head2 function set_unique_node_key()
@@ -489,9 +498,9 @@ sub get_unhidden_leaf_count {
=cut
-sub set_unique_node_key {
- my $self = shift;
- $self->{unique_node_key}=shift;
+sub set_unique_node_key {
+ my $self = shift;
+ $self->{unique_node_key} = shift;
}
=head2 function get_unique_node_key()
@@ -509,13 +518,14 @@ sub set_unique_node_key {
=cut
-sub get_unique_node_key {
- my $self = shift;
- $self->{unique_node_key}++; # increment the unique node key
- while (exists $self->{node_hash}->{$self->{unique_node_key}}) { # if key already in node_hash, increment again...
- $self->{unique_node_key}++;
- }
- return $self->{unique_node_key};
+sub get_unique_node_key {
+ my $self = shift;
+ $self->{unique_node_key}++; # increment the unique node key
+ while ( exists $self->{node_hash}->{ $self->{unique_node_key} } )
+ { # if key already in node_hash, increment again...
+ $self->{unique_node_key}++;
+ }
+ return $self->{unique_node_key};
}
=head2 function clear_node_hash()
@@ -528,9 +538,9 @@ sub get_unique_node_key {
=cut
-sub clear_node_hash {
+sub clear_node_hash {
my $self = shift;
- %{$self->{node_hash}}=();
+ %{ $self->{node_hash} } = ();
}
=head2 function regenerate_node_hash()
@@ -544,18 +554,19 @@ sub clear_node_hash {
=cut
-sub regenerate_node_hash {
+sub regenerate_node_hash {
my $self = shift;
my $node = shift;
- $node ||= $self->get_root();
-#print("in regenerate_node_hash. \n");
-#$node->print_node();
-#print("node key: ", $node->get_node_key());
- $self->add_node_hash($node, $node->get_node_key());
- foreach my $c ($node->get_children()) {
- $self->regenerate_node_hash($c);
+ $node ||= $self->get_root();
+
+ #print("in regenerate_node_hash. \n");
+ #$node->print_node();
+ #print("node key: ", $node->get_node_key());
+ $self->add_node_hash( $node, $node->get_node_key() );
+ foreach my $c ( $node->get_children() ) {
+ $self->regenerate_node_hash($c);
}
- $self->set_unique_node_key( scalar $self->get_all_nodes() );
+ $self->set_unique_node_key( scalar $self->get_all_nodes() );
}
=head2 function add_node_hash()
@@ -574,12 +585,12 @@ sub regenerate_node_hash {
=cut
-sub add_node_hash {
- my $self = shift;
- my $node = shift;
+sub add_node_hash {
+ my $self = shift;
+ my $node = shift;
my $unique_key = shift;
- ${$self->{node_hash}}{$unique_key}=$node;
+ ${ $self->{node_hash} }{$unique_key} = $node;
}
=head2 function get_all_nodes()
@@ -594,19 +605,19 @@ sub add_node_hash {
sub get_all_nodes {
my $self = shift;
- return (values %{$self->{node_hash}});
+ return ( values %{ $self->{node_hash} } );
}
sub get_all_node_keys {
my $self = shift;
- return (keys %{$self->{node_hash}});
+ return ( keys %{ $self->{node_hash} } );
}
-sub get_node_count {
+sub get_node_count {
my $self = shift;
- return scalar($self->get_all_nodes());
+ return scalar( $self->get_all_nodes() );
}
=head2 function get_node()
@@ -623,28 +634,27 @@ sub get_node_count {
=cut
-sub get_node {
- my $self = shift;
- my $key = shift;
- return ${$self->{node_hash}}{$key};
-}
-
-sub print_node_keys{
- my $self = shift;
- my $hashref = $self->{node_hash};
- foreach my $k (keys (%$hashref)) {
- my $n = $self->get_node($k);
- if (defined $n) {
- print("key, node: ", $k); $n->print_node();
- } else {
- print("key: ", $k, " has undefined node (returned by get_node($k) ). \n");
- }
- }
-print("present value of unique_node_key: ", $self->{unique_node_key}, "\n");
+sub get_node {
+ my $self = shift;
+ my $key = shift;
+ return ${ $self->{node_hash} }{$key};
+}
+
+sub print_node_keys {
+ my $self = shift;
+ my $hashref = $self->{node_hash};
+ foreach my $k ( keys(%$hashref) ) {
+ my $n = $self->get_node($k);
+ if ( defined $n ) {
+ print( "key, node: ", $k );
+ $n->print_node();
+ } else {
+ print( "key: ", $k, " has undefined node (returned by get_node($k) ). \n" );
+ }
+ }
+ print( "present value of unique_node_key: ", $self->{unique_node_key}, "\n" );
}
-
-
=head2 function incorporate_nodes()
Given a list of nodes, add them to this tree's membership
@@ -657,14 +667,14 @@ print("present value of unique_node_key: ", $self->{unique_node_key}, "\n");
=cut
sub incorporate_nodes {
- my $self = shift;
- my @nodes = @_;
- foreach my $n (@nodes) {
- my $new_key = $self->get_unique_node_key();
- $n->set_tree($self);
- $n->set_node_key($new_key);
- $self->add_node_hash($n, $new_key);
- }
+ my $self = shift;
+ my @nodes = @_;
+ foreach my $n (@nodes) {
+ my $new_key = $self->get_unique_node_key();
+ $n->set_tree($self);
+ $n->set_node_key($new_key);
+ $self->add_node_hash( $n, $new_key );
+ }
}
=head2 function incorporate_tree()
@@ -674,10 +684,10 @@ Given a tree, incorporate that tree's nodes into this tree. This does not affec
=cut
sub incorporate_tree {
- my $self = shift;
- my $sub_tree = shift;
- my @nodes = $sub_tree->get_root()->get_descendents();
- $self->incorporate_nodes(@nodes);
+ my $self = shift;
+ my $sub_tree = shift;
+ my @nodes = $sub_tree->get_root()->get_descendents();
+ $self->incorporate_nodes(@nodes);
}
=head2 function make_binary()
@@ -687,16 +697,17 @@ Inserts joint nodes at polyphetic points so that the tree is biphetic or monophe
=cut
sub make_binary {
- my $self = shift;
- my $node = shift;
- $node ||= $self->get_root();
- my $new_bl = shift;
- $new_bl ||= $self->get_min_branch_length();
-# warn "new bl in make_binary: $new_bl \n";
- $node->binarify_children($new_bl);
- foreach($node->get_children()){
- $self->make_binary($_, $new_bl);
- }
+ my $self = shift;
+ my $node = shift;
+ $node ||= $self->get_root();
+ my $new_bl = shift;
+ $new_bl ||= $self->get_min_branch_length();
+
+ # warn "new bl in make_binary: $new_bl \n";
+ $node->binarify_children($new_bl);
+ foreach ( $node->get_children() ) {
+ $self->make_binary( $_, $new_bl );
+ }
}
=head2 function traverse()
@@ -706,95 +717,183 @@ sub make_binary {
Arguments: a function to be performed on each node, taking
that node as its only argument
Returns: nothing
- Side effects: the function will be executed on each node object.
- Description: not yet implemented... UPDATE: C. Carpita attempts
+ Side effects: The function will be executed on each node object
+ BEFORE descending into the node's child subtrees, i.e. preorder traversal.
+ Description: Synonym for preorder_traversal.
=cut
-sub traverse {
- my $self = shift;
- my $function = shift;
- my $node = shift;
- die "You did not pass a subroutine reference" unless (ref($function) eq "CODE");
- $node ||= $self->get_root();
+sub traverse {
+ my $self = shift;
+ my $function = shift;
+ my $node = shift;
+ die "You did not pass a subroutine reference" unless ( ref($function) eq "CODE" );
+ $node ||= $self->get_root();
+
+ &$function($node);
+ foreach ( $node->get_children() ) {
+ $self->traverse( $function, $_ );
+ }
+}
+
+=head2 function preorder_traversal()
+
+ Synopsis: $tree->preorder_traversal( sub{ my $node = shift;
+ $node->set_hidden() } );
+ Arguments: a function to be performed on each node, taking
+ that node as its only argument
+ Returns: nothing
+ Side effects: The function will be executed on each node object.
+ Description: Preorder traversal. Call function on node, then descend into its child subtrees.
+
+=cut
+
+sub preorder_traversal {
+ my $self = shift;
+ my $function = shift;
+ my $node = shift;
+ die "You did not pass a subroutine reference" unless ( ref($function) eq "CODE" );
+ $node ||= $self->get_root();
- &$function($node);
+ # print STDERR 'In traverse. before. ' . $node->get_name() . " ";
+ &$function($node);
- foreach( $node->get_children() ){
- $self->traverse($function, $_);
- }
+ # print STDERR "after. \n";
+ foreach ( $node->get_children() ) {
+ $self->preorder_traversal( $function, $_ );
+ }
}
-sub newick_shown_attributes { # just return the keys (attributes), so everything should work the same.
- my $self = shift;
- return keys %{$self->{newick_shown_attributes}};
+=head2 function inorder_traversal()
+
+ Synopsis: $tree->inorder_traversal( sub{ my $node = shift;
+ $node->set_hidden() } );
+ Arguments: a function to be performed on each node, taking
+ that node as its only argument
+ Returns: nothing
+ Side effects: inorder traversal. The function will be executed on each node object.
+ Description: inorder traversal. The function will be called on the node in between
+ descending to child subtrees. E.g. for binary tree, descend into left subtree,
+ call function on node, descend into right subtree.
+
+=cut
+
+sub inorder_traversal {
+ my $self = shift;
+ my $function = shift;
+ die "You did not pass a subroutine reference" unless ( ref($function) eq "CODE" );
+ my $node = shift || $self->get_root(); # node is by default the root of tree.
+
+ my @children = $node->get_children();
+ if ( scalar @children > 0 ) {
+ my $leftmost_node = shift @children;
+ $self->inorder_traversal( $function, $leftmost_node );
+ foreach my $child (@children) {
+ &$function($node);
+ $self->inorder_traversal( $function, $child );
+ }
+ } else {
+ &$function($node);
+ }
+}
+
+=head2 function postorder_traversal()
+
+ Synopsis: $tree->postorder_traversal( sub{ my $node = shift;
+ $node->set_hidden() } );
+ Arguments: a function to be performed on each node, taking
+ that node as its only argument
+ Returns: nothing
+ Side effects: The function will be executed on each node object.
+ Description: postorder traversal. Call function on node AFTER descending into subtrees.
+
+=cut
+
+sub postorder_traversal {
+ my $self = shift;
+ my $function = shift;
+ my $node = shift;
+ die "You did not pass a subroutine reference" unless ( ref($function) eq "CODE" );
+ $node ||= $self->get_root(); # node is by default the root of tree.
+
+ foreach ( $node->get_children() ) {
+ $self->postorder_traversal( $function, $_ );
+ }
+ &$function($node);
+}
+
+sub newick_shown_attributes { # just return list of keys (attributes), so everything should work the same.
+ my $self = shift;
+ return keys %{ $self->{newick_shown_attributes} };
}
sub show_newick_attribute {
- my $self = shift;
- my $attr = shift;
-# push(@{$self->{newick_shown_attributes}}, $attr);
-$self->{newick_shown_attributes}->{$attr}++;
+ my $self = shift;
+ my $attr = shift;
+
+ # push(@{$self->{newick_shown_attributes}}, $attr);
+ $self->{newick_shown_attributes}->{$attr}++;
}
sub unshow_newick_attribute {
- my $self = shift;
- my $attr = shift;
+ my $self = shift;
+ my $attr = shift;
- delete $self->{newick_shown_attributes}->{$attr};
+ delete $self->{newick_shown_attributes}->{$attr};
-# my $size = scalar @{$self->{newick_shown_attributes}};
-# foreach my $index (0..$size-1) {
-# if ( ($self->{newick_shown_attributes})->[$index] eq $attr) {
-# delete $self->{newick_shown_attributes}->[$index];
-# last;
-# }
-# }
+ # my $size = scalar @{$self->{newick_shown_attributes}};
+ # foreach my $index (0..$size-1) {
+ # if ( ($self->{newick_shown_attributes})->[$index] eq $attr) {
+ # delete $self->{newick_shown_attributes}->[$index];
+ # last;
+ # }
+ # }
}
-sub get_min_branch_length{
-my $self = shift;
-return $self->{min_branch_length};
+sub get_min_branch_length {
+ my $self = shift;
+ return $self->{min_branch_length};
}
-sub set_min_branch_length{
-my $self = shift;
-$self->{min_branch_length} = shift;
+sub set_min_branch_length {
+ my $self = shift;
+ $self->{min_branch_length} = shift;
}
-sub get_shown_branch_length_transformation{
-my $self = shift;
-return $self->{shown_branch_length_transformation};
+sub get_shown_branch_length_transformation {
+ my $self = shift;
+ return $self->{shown_branch_length_transformation};
}
-sub set_shown_branch_length_transformation{
-my $self = shift;
-$self->{shown_branch_length_transformation} = shift;
+sub set_shown_branch_length_transformation {
+ my $self = shift;
+ $self->{shown_branch_length_transformation} = shift;
}
-sub set_min_shown_branch_length{
-my $self = shift;
-$self->{min_shown_branch_length} = shift;
+sub set_min_shown_branch_length {
+ my $self = shift;
+ $self->{min_shown_branch_length} = shift;
}
-sub get_min_shown_branch_length{
-my $self = shift;
-return $self->{min_shown_branch_length};
+sub get_min_shown_branch_length {
+ my $self = shift;
+ return $self->{min_shown_branch_length};
}
-sub shown_branch_length_transformation_reset{
- my $self = shift;
- $self->set_shown_branch_length_transformation(shift);
- $self->{longest_branch_length} = undef;
- $self->get_root()->calculate_distances_from_root();
+sub shown_branch_length_transformation_reset {
+ my $self = shift;
+ $self->set_shown_branch_length_transformation(shift);
+ $self->{longest_branch_length} = undef;
+ $self->get_root()->calculate_distances_from_root();
}
-sub subtree_newick{
-my $self = shift;
-my $subtree_root = shift || $self->get_root();
-#print "number of children: ", scalar $subtree_root->get_children(), "\n";
-my $newick_string = $subtree_root->recursive_subtree_newick();
-return $newick_string;
+sub subtree_newick {
+ my $self = shift;
+ my $subtree_root = shift || $self->get_root();
+
+ #print "number of children: ", scalar $subtree_root->get_children(), "\n";
+ my $newick_string = $subtree_root->recursive_subtree_newick();
+ return $newick_string;
}
=head2 function generate_newick()
@@ -807,15 +906,14 @@ return $newick_string;
=cut
sub generate_newick {
- my $self = shift;
- my $node = shift;
+ my $self = shift;
+ my $node = shift;
my $show_root = shift;
$node ||= $self->get_root();
return $node->recursive_generate_newick('');
}
-
=head2 function get_orthologs()
Synopsis: my $ortho_trees_ref = $tree->get_orthologs();
@@ -833,53 +931,54 @@ sub generate_newick {
=cut
sub get_orthologs {
- my $self=shift;
- my $root_node = $self->get_root();
+ my $self = shift;
+ my $root_node = $self->get_root();
- $root_node->recursive_set_leaf_count(); # set leaf_count attribute for all nodes
- $root_node->recursive_set_leaf_species_count(); # set leaf_species_count attribute for all nodes
- my $trees_ref = $root_node->collect_orthologs();
+ $root_node->recursive_set_leaf_count(); # set leaf_count attribute for all nodes
+ $root_node->recursive_set_leaf_species_count(); # set leaf_species_count attribute for all nodes
+ my $trees_ref = $root_node->collect_orthologs();
- # can delete the leaf_count and leaf_species_count attributes here
- my @node_list = $self->get_all_nodes();
- map($_->delete_attribute("leaf_count"), @node_list);
- map($_->delete_attribute("leaf_species_count"), @node_list);
-
- return $trees_ref;
+ # can delete the leaf_count and leaf_species_count attributes here
+ my @node_list = $self->get_all_nodes();
+ map( $_->delete_attribute("leaf_count"), @node_list );
+ map( $_->delete_attribute("leaf_species_count"), @node_list );
+
+ return $trees_ref;
}
#This should recursively get all the subtree leaf species counts, and then run over everything again,
-# comparing to the leaf counts for each species in the whole tree, to get the leaf species counts for the
+# comparing to the leaf counts for each species in the whole tree, to get the leaf species counts for the
# complement of each subtree.
-sub set_all_subtree_and_complement_leaf_species_counts{
- my $self = shift;
- my $leaf_species_count_hash = $self->get_root()->recursive_set_leaf_species_count();
- print "in set_all_subtree... ; number of species: ", scalar keys %$leaf_species_count_hash, "\n"; readline();
- $self->get_root()->recursive_set_leaf_species_count($leaf_species_count_hash);
-}
-
-sub get_complement_ortho_group_candidates{
- my $self = shift;
- my @node_list = $self->get_root()->recursive_subtree_node_list();
- foreach my $n (@node_list) {
- my $comp_leaf_count = $self->get_root()->get_attribute("leaf_count") - $n->get_attribute("leaf_count");
- my $comp_leaf_species_count = $n->get_attribute("comp_leaf_species_count");
- if ($comp_leaf_count == $comp_leaf_species_count && $comp_leaf_count >1) {
- print "complement to subtree : ", $n->get_name(), " is a og candidate \n";
- print "with $comp_leaf_count leaves and $comp_leaf_species_count leaf species \n";
- }
- }
+sub set_all_subtree_and_complement_leaf_species_counts {
+ my $self = shift;
+ my $leaf_species_count_hash = $self->get_root()->recursive_set_leaf_species_count();
+ print "in set_all_subtree... ; number of species: ", scalar keys %$leaf_species_count_hash, "\n";
+ readline();
+ $self->get_root()->recursive_set_leaf_species_count($leaf_species_count_hash);
+}
+
+sub get_complement_ortho_group_candidates {
+ my $self = shift;
+ my @node_list = $self->get_root()->recursive_subtree_node_list();
+ foreach my $n (@node_list) {
+ my $comp_leaf_count = $self->get_root()->get_attribute("leaf_count") - $n->get_attribute("leaf_count");
+ my $comp_leaf_species_count = $n->get_attribute("comp_leaf_species_count");
+ if ( $comp_leaf_count == $comp_leaf_species_count && $comp_leaf_count > 1 ) {
+ print "complement to subtree : ", $n->get_name(), " is a og candidate \n";
+ print "with $comp_leaf_count leaves and $comp_leaf_species_count leaf species \n";
+ }
+ }
}
-
-sub get_leaf_parents_list {
+sub get_leaf_parents_list {
my $self = shift;
- foreach my $leaf ($self->get_leaf_list()) {
- my $parent = $leaf->get_parent();
- ${$self->{leaf_parent_hash}}{$parent->get_node_key()}=$parent;
+ foreach my $leaf ( $self->get_leaf_list() ) {
+ my $parent = $leaf->get_parent();
+ ${ $self->{leaf_parent_hash} }{ $parent->get_node_key() } = $parent;
}
+
# return the parents as a neat list
- return map (${$self->{leaf_parent_hash}}{$_}, keys(%{$self->{leaf_parent_hash}}));
+ return map ( ${ $self->{leaf_parent_hash} }{$_}, keys( %{ $self->{leaf_parent_hash} } ) );
}
# helper functions that deal with the leaf list. It contains a list of nodes
@@ -897,10 +996,10 @@ sub get_leaf_parents_list {
=cut
-sub get_leaf_list {
- my $self=shift;
- if (!exists($self->{leaf_list}) || !@{$self->{leaf_list}}) { $self->calculate_leaf_list(); }
- return @{$self->{leaf_list}};
+sub get_leaf_list {
+ my $self = shift;
+ if ( !exists( $self->{leaf_list} ) || !@{ $self->{leaf_list} } ) { $self->calculate_leaf_list(); }
+ return @{ $self->{leaf_list} };
}
=head2 get_leaves
@@ -910,44 +1009,42 @@ sub get_leaf_list {
=cut
sub get_leaves {
- my $self = shift;
- return $self->get_leaf_list();
+ my $self = shift;
+ return $self->get_leaf_list();
}
-sub add_leaf_list {
- my $self = shift;
+sub add_leaf_list {
+ my $self = shift;
my $leaf_node = shift;
- push @{$self->{leaf_list}}, $leaf_node;
+ push @{ $self->{leaf_list} }, $leaf_node;
}
-sub clear_leaf_list {
+sub clear_leaf_list {
my $self = shift;
- @{$self->{leaf_list}}=();
+ @{ $self->{leaf_list} } = ();
}
-sub calculate_leaf_list {
+sub calculate_leaf_list {
my $self = shift;
$self->clear_leaf_list();
- my @leaf_list = $self->get_root()->recursive_leaf_list();
- foreach my $leaf (@leaf_list) {
- $self->add_leaf_list($leaf);
+ my @leaf_list = $self->get_root()->recursive_leaf_list();
+ foreach my $leaf (@leaf_list) {
+ $self->add_leaf_list($leaf);
}
}
-
-
-# the tree_topology_changed member variable contains the status of the
-# topology of the tree. If the tree has been changed, it should be 1,
-# otherwise it should be 0.
+# the tree_topology_changed member variable contains the status of the
+# topology of the tree. If the tree has been changed, it should be 1,
+# otherwise it should be 0.
#
-sub get_tree_topology_changed {
+sub get_tree_topology_changed {
my $self = shift;
return $self->{tree_topology_changed};
}
-sub _set_tree_topology_changed {
+sub _set_tree_topology_changed {
my $self = shift;
- $self->{tree_topology_changed}=shift;
+ $self->{tree_topology_changed} = shift;
}
=head2 function get_name()
@@ -960,8 +1057,8 @@ sub _set_tree_topology_changed {
=cut
-sub get_name {
- my $self=shift;
+sub get_name {
+ my $self = shift;
return $self->{name};
}
@@ -976,9 +1073,9 @@ sub get_name {
=cut
-sub set_name {
- my $self=shift;
- $self->{name}=shift;
+sub set_name {
+ my $self = shift;
+ $self->{name} = shift;
}
=head2 function get_longest_root_leaf_length()
@@ -991,13 +1088,14 @@ sub set_name {
=cut
-sub get_longest_root_leaf_length {
- my $self=shift;
- if (!$self->{longest_branch_length}) {
- $self->set_longest_root_leaf_length($self->calculate_longest_root_leaf_length());
- }
-# print "in get_longest_root_leaf_length: ", $self->{longest_branch_length}, "\n";
- return $self->{longest_branch_length};
+sub get_longest_root_leaf_length {
+ my $self = shift;
+ if ( !$self->{longest_branch_length} ) {
+ $self->set_longest_root_leaf_length( $self->calculate_longest_root_leaf_length() );
+ }
+
+ # print "in get_longest_root_leaf_length: ", $self->{longest_branch_length}, "\n";
+ return $self->{longest_branch_length};
}
=head2 function set_longest_root_leaf_length()
@@ -1012,21 +1110,21 @@ sub get_longest_root_leaf_length {
=cut
-sub set_longest_root_leaf_length {
- my $self=shift;
- $self->{longest_branch_length}=shift;
+sub set_longest_root_leaf_length {
+ my $self = shift;
+ $self->{longest_branch_length} = shift;
}
sub calculate_longest_root_leaf_length {
- my $self=shift;
- my $largest = 0;
- foreach my $leaf ($self->get_leaf_list()) {
- my $dist = $leaf->get_dist_from_root();
- if ($dist > $largest) {
- $largest=$dist;
- }
- }
- return $largest;
+ my $self = shift;
+ my $largest = 0;
+ foreach my $leaf ( $self->get_leaf_list() ) {
+ my $dist = $leaf->get_dist_from_root();
+ if ( $dist > $largest ) {
+ $largest = $dist;
+ }
+ }
+ return $largest;
}
=head2 function retrieve_longest_branch_node()
@@ -1039,15 +1137,12 @@ sub calculate_longest_root_leaf_length {
=cut
-sub retrieve_longest_branch_node {
- my $self=shift;
- my $longest_branch_node = $self->get_root()->_recursive_longest_branch_node(CXGN::Phylo::Node->new());
+sub retrieve_longest_branch_node {
+ my $self = shift;
+ my $longest_branch_node = $self->get_root()->_recursive_longest_branch_node( CXGN::Phylo::Node->new() );
return $longest_branch_node;
}
-
-
-
=head2 APPEARANCE OF THE TREE
=head2 function get_show_labels()
@@ -1060,8 +1155,8 @@ sub retrieve_longest_branch_node {
=cut
-sub get_show_labels {
- my $self=shift;
+sub get_show_labels {
+ my $self = shift;
return $self->{show_labels};
}
@@ -1076,22 +1171,32 @@ sub get_show_labels {
=cut
-sub set_show_labels {
- my $self=shift;
- $self->{show_labels}=shift;
+sub set_show_labels {
+ my $self = shift;
+ $self->{show_labels} = shift;
}
-
-sub get_show_species_in_label{
+sub get_show_species_in_label {
my $self = shift;
return $self->{show_species_in_labels};
}
-sub set_show_species_in_label{
+sub set_show_species_in_label {
my $self = shift;
$self->{show_species_in_labels} = shift;
}
+sub get_id_taxon_map{
+ my $self = shift;
+ return $self->{'id_taxon_map'};
+}
+
+sub set_id_taxon_map{
+ my $self = shift;
+ my $id_taxon_map = shift;
+ $self->{'id_taxon_map'} = $id_taxon_map;
+ return;
+}
=head2 accessors get_line_color(), set_line_color()
@@ -1104,17 +1209,16 @@ sub set_show_species_in_label{
=cut
-sub get_line_color {
- my $self=shift;
- return @{$self->{line_color}};
+sub get_line_color {
+ my $self = shift;
+ return @{ $self->{line_color} };
}
-sub set_line_color {
- my $self=shift;
- @{$self->{line_color}}=@_;
+sub set_line_color {
+ my $self = shift;
+ @{ $self->{line_color} } = @_;
}
-
=head2 accessors get_bgcolor(), set_bgcolor()
Synopsis: $tree->set_bgcolor(255, 255, 255);
@@ -1125,14 +1229,14 @@ sub set_line_color {
=cut
-sub get_bgcolor {
- my $self=shift;
- return @{$self->{bgcolor}};
+sub get_bgcolor {
+ my $self = shift;
+ return @{ $self->{bgcolor} };
}
-sub set_bgcolor {
- my $self=shift;
- @{$self->{bgcolor}}=@_;
+sub set_bgcolor {
+ my $self = shift;
+ @{ $self->{bgcolor} } = @_;
}
=head2 accessors get_hilite_color(), set_hilite_color()
@@ -1145,14 +1249,14 @@ sub set_bgcolor {
=cut
-sub get_hilite_color {
- my $self=shift;
- return @{$self->{hilite_color}};
+sub get_hilite_color {
+ my $self = shift;
+ return @{ $self->{hilite_color} };
}
-sub set_hilite_color {
- my $self=shift;
- @{$self->{hilite_color}}=@_;
+sub set_hilite_color {
+ my $self = shift;
+ @{ $self->{hilite_color} } = @_;
}
=head2 function get_node_by_name()
@@ -1169,48 +1273,48 @@ sub set_hilite_color {
=cut
-sub get_node_by_name {
+sub get_node_by_name {
my $self = shift;
my $name = shift;
- foreach my $n ($self->get_all_nodes()){
- return $n if ($n->get_name() eq $name);
- }
- return undef;
+ foreach my $n ( $self->get_all_nodes() ) {
+ return $n if ( $n->get_name() eq $name );
+ }
+ return undef;
}
#returns a list of nodes matching a certain reg expression depending on the argument
-sub search_node_name {
- my $self = shift;
- my $term = shift;
+sub search_node_name {
+ my $self = shift;
+ my $term = shift;
my @nodes = ();
- foreach my $n ($self->get_all_nodes()) {
- my $node_name = $n->get_name();
- if ($node_name =~ /\Q$term\E/i) {
- push @nodes, $n;
- }
+ foreach my $n ( $self->get_all_nodes() ) {
+ my $node_name = $n->get_name();
+ if ( $node_name =~ /\Q$term\E/i ) {
+ push @nodes, $n;
+ }
}
return @nodes;
}
#returns a list of nodes matching a certain reg expression depending on the argument
-sub search_label_name {
- my $self = shift;
- my $term = shift;
- my @nodes = ();
- foreach my $n ($self->get_all_nodes()) {
- my $label_name = $n->get_label()->get_name();
- if ($term =~ m/m\/(.*)\//) { # if enter m/stuff/ then treat stuff as perl regex
- my $match = $1;
- if ($match && $label_name =~ /$match/) {
- push @nodes, $n;
- }
- } else {
- if ($term && $label_name =~ /\Q$term\E/i) {
- push @nodes, $n;
- }
- }
- }
- return @nodes;
+sub search_label_name {
+ my $self = shift;
+ my $term = shift;
+ my @nodes = ();
+ foreach my $n ( $self->get_all_nodes() ) {
+ my $label_name = $n->get_label()->get_name();
+ if ( $term =~ m/m\/(.*)\// ) { # if enter m/stuff/ then treat stuff as perl regex
+ my $match = $1;
+ if ( $match && $label_name =~ /$match/ ) {
+ push @nodes, $n;
+ }
+ } else {
+ if ( $term && $label_name =~ /\Q$term\E/i ) {
+ push @nodes, $n;
+ }
+ }
+ }
+ return @nodes;
}
=head2 function compare()
@@ -1261,12 +1365,13 @@ Author: Tom York
=cut
-sub compare_rooted{
- my $self = shift;
- my $other_tree = shift;
-my $compare_field = shift;
-# print STDOUT "in compare_rooted. compare_field: $compare_field \n";
- return $self->get_root()->compare_subtrees($other_tree->get_root(), $compare_field);
+sub compare_rooted {
+ my $self = shift;
+ my $other_tree = shift;
+ my $compare_field = shift;
+
+ # print STDOUT "in compare_rooted. compare_field: $compare_field \n";
+ return $self->get_root()->compare_subtrees( $other_tree->get_root(), $compare_field );
}
=head2 function compare_unrooted
@@ -1286,35 +1391,35 @@ my $compare_field = shift;
=cut
-sub compare_unrooted {
- my $self = shift;
- my $other_tree = shift;
- my $compare_field = shift; # to control comparison of names (default) or species ("species")
- # copy the trees into temporary trees, so that the trees can
- # be manipulated (rerooted, collapsed) without changing the original trees.
- #
- # print STDOUT "in compare_unrooted. compare_field: $compare_feld \n";
- my $tree1 = $self->copy();
- my $tree2 = $other_tree->copy();
-
- # find a leaf - any leaf - of tree1 and the corresponding leaf (i.e. with the same name) of tree2
-
- my $leaf1 = $tree1->get_root()->recursive_get_a_leaf();
- my $corresponding_leaf = $tree2->get_node_by_name($leaf1->get_name());
-
- if (!$corresponding_leaf) {
- print("in compare_unrooted. leaf1 name: ", $leaf1->get_name(), ". Can't find corresponding leaf in other tree. \n");
- return 0;
- }
+sub compare_unrooted {
+ my $self = shift;
+ my $other_tree = shift;
+ my $compare_field = shift; # to control comparison of names (default) or species ("species")
+ # copy the trees into temporary trees, so that the trees can
+ # be manipulated (rerooted, collapsed) without changing the original trees.
+ #
+ # print STDOUT "in compare_unrooted. compare_field: $compare_feld \n";
+ my $tree1 = $self->copy();
+ my $tree2 = $other_tree->copy();
+
+ # find a leaf - any leaf - of tree1 and the corresponding leaf (i.e. with the same name) of tree2
+
+ my $leaf1 = $tree1->get_root()->recursive_get_a_leaf();
+ my $corresponding_leaf = $tree2->get_node_by_name( $leaf1->get_name() );
+
+ if ( !$corresponding_leaf ) {
+ print( "in compare_unrooted. leaf1 name: ",
+ $leaf1->get_name(), ". Can't find corresponding leaf in other tree. \n" );
+ return 0;
+ }
- # reset roots of trees to the two corresponding leaves:
- $tree1->reset_root($leaf1);
- $tree2->reset_root($corresponding_leaf);
+ # reset roots of trees to the two corresponding leaves:
+ $tree1->reset_root($leaf1);
+ $tree2->reset_root($corresponding_leaf);
- return $tree1->get_root()->compare_subtrees($tree2->get_root(), $compare_field);
+ return $tree1->get_root()->compare_subtrees( $tree2->get_root(), $compare_field );
}
-
=head2 function get_layout(), set_layout()
Synopsis: $tree->set_layout($layout)
@@ -1326,17 +1431,16 @@ sub compare_unrooted {
=cut
-sub get_layout {
- my $self=shift;
+sub get_layout {
+ my $self = shift;
return $self->{layout};
}
-sub set_layout {
- my $self=shift;
- $self->{layout}=shift;
+sub set_layout {
+ my $self = shift;
+ $self->{layout} = shift;
}
-
=head2 function layout()
Synopsis: $tree->layout()
@@ -1348,13 +1452,11 @@ sub set_layout {
=cut
-
-sub layout {
+sub layout {
my $self = shift;
$self->get_layout()->layout();
}
-
=head2 accessors get_renderer(), set_renderer()
Synopsis: $tree->set_renderer($renderer)
@@ -1365,14 +1467,14 @@ sub layout {
=cut
-sub get_renderer {
- my $self=shift;
+sub get_renderer {
+ my $self = shift;
return $self->{renderer};
}
-sub set_renderer {
- my $self=shift;
- $self->{renderer}=shift;
+sub set_renderer {
+ my $self = shift;
+ $self->{renderer} = shift;
}
=head2 function render()
@@ -1388,21 +1490,21 @@ sub set_renderer {
=cut
-sub render {
- my $self = shift;
- my $print_all_labels=shift;
+sub render {
+ my $self = shift;
+ my $print_all_labels = shift;
$self->get_renderer()->render($print_all_labels);
}
sub standard_layout {
- my $self = shift;
- my $layout = CXGN::Phylo::Layout->new($self);
- $layout->set_top_margin(20);
- $layout->set_bottom_margin(20);
- $layout->set_image_height(400);
- $layout->set_image_width(700);
- $self->set_layout($layout);
- $self->layout();
+ my $self = shift;
+ my $layout = CXGN::Phylo::Layout->new($self);
+ $layout->set_top_margin(20);
+ $layout->set_bottom_margin(20);
+ $layout->set_image_height(400);
+ $layout->set_image_width(700);
+ $self->set_layout($layout);
+ $self->layout();
}
=head2 function render_png()
@@ -1417,19 +1519,18 @@ sub standard_layout {
=cut
-sub render_png {
- my $self = shift;
- my $file = shift;
- my $print_all_labels= shift; ## Boolean for printing non-leaf node labels
+sub render_png {
+ my $self = shift;
+ my $file = shift;
+ my $print_all_labels = shift; ## Boolean for printing non-leaf node labels
$self->layout();
my $png_string = $self->render($print_all_labels);
- if(defined $file){
- open (my $T, ">$file") || die "PNG_tree_renderer: render_png(): Can't open file $file.";
- print $T $png_string;
- close $T ;
- }
- else {
- return $png_string;
+ if ( defined $file ) {
+ open( my $T, ">$file" ) || die "PNG_tree_renderer: render_png(): Can't open file $file.";
+ print $T $png_string;
+ close $T;
+ } else {
+ return $png_string;
}
}
@@ -1445,33 +1546,43 @@ sub render_png {
sub collapse_tree {
my $self = shift;
+
# first, collapse all nodes that have only one child onto the
# parent node
#
-#print STDERR "before rec..coll...single_nodes\n";
+ # print STDERR "Z0 BRANCHLENGTHS: ", join( ";", map( $_->get_branch_length(), $self->get_leaf_list() ) ), "\n";
+
+ #print STDERR "before rec..coll...single_nodes\n";
+ # print STDERR "newick: ", $self->generate_newick(), "\n";
$self->get_root()->recursive_collapse_single_nodes();
-#print STDERR "after rec..coll...single_nodes\n";
+# print STDERR "Z BRANCHLENGTHS: ", join( ";", map( $_->get_branch_length(), $self->get_leaf_list() ) ), "\n";
+
+ #print STDERR "after rec..coll...single_nodes\n";
$self->recalculate_tree_data();
+# print STDERR "ZZ BRANCHLENGTHS: ", join( ";", map( $_->get_branch_length(), $self->get_leaf_list() ) ), "\n";
- # then, collapse all nodes that have branch lengths of zero
+ # then, collapse all nodes that have branch lengths of zero
# with their parent node
#
-#print STDERR "before rec..coll...zero_branches\n";
+ #print STDERR "before rec..coll...zero_branches\n";
$self->get_root()->recursive_collapse_zero_branches();
-#print STDERR "after rec..coll...zero_branches\n";
+
+ #print STDERR "after rec..coll...zero_branches\n";
+# print STDERR "ZZZ BRANCHLENGTHS: ", join( ";", map( $_->get_branch_length(), $self->get_leaf_list() ) ), "\n";
# let's re-calculate the tree's properties
#
$self->recalculate_tree_data();
}
-sub collapse_unique_species_subtrees {
+sub collapse_unique_species_subtrees {
my $self = shift;
- # calculate, for each node, how many nodes are beneath it.
+
+ # calculate, for each node, how many nodes are beneath it.
# This information can then be accessed using the
# $node-> get_subtree_node_count() function.
#
- # $self->get_root()->calculate_subtree_node_count();
+ # $self->get_root()->calculate_subtree_node_count();
# calculate, for each node, how many different species are in the leaves of the subtree beneath it.
#
@@ -1495,12 +1606,12 @@ sub collapse_unique_species_subtrees {
=cut
-sub find_point_furthest_from_leaves{
- my $self = shift;
- $self->set_min_dist_to_leaf();
- my @furthest_point = $self->get_root()->recursive_find_point_furthest_from_leaves();
- $furthest_point[1] -= $furthest_point[0]->get_attribute("min_dist_to_leaf");
- return @furthest_point;
+sub find_point_furthest_from_leaves {
+ my $self = shift;
+ $self->set_min_dist_to_leaf();
+ my @furthest_point = $self->get_root()->recursive_find_point_furthest_from_leaves();
+ $furthest_point[1] -= $furthest_point[0]->get_attribute("min_dist_to_leaf");
+ return @furthest_point;
}
=head2 function find_point_closest_to_furthest_leaf()
@@ -1517,37 +1628,37 @@ sub find_point_furthest_from_leaves{
=cut
# returns a list containing a node object, and the distance of the point above that node
-sub find_point_closest_to_furthest_leaf{
- my $self = shift;
- $self->get_root()->recursive_set_max_dist_to_leaf_in_subtree();
-
- my @nodes = $self->get_root()->recursive_subtree_node_list();
- push @nodes, $self->get_root(); # we want the root in our list
-
- my @sorted_nodes = sort
- { $a->get_max_leaf_leaf_pathlength_in_subtree_thru_node()
- <=>
- $b->get_max_leaf_leaf_pathlength_in_subtree_thru_node() }
- @nodes;
-
- # using attribute "lptl_child" (longest path to leaf child) follow the longest path to leaf,
- # until you reach the midpoint of the longest leaf to leaf path
- my $current_node = pop @sorted_nodes;
- my $distance_to_go = 0.5*($current_node->get_attribute("dist_to_leaf_longest") - $current_node->get_attribute("dist_to_leaf_next_longest"));
- for (;;) {
- my $next_node = $current_node->get_attribute("lptl_child");
- my $branch_length = $next_node->get_branch_length();
- if ($branch_length >= $distance_to_go) {
- return ($next_node, $branch_length - $distance_to_go);
- } else {
- $distance_to_go -= $branch_length;
- $current_node = $next_node;
- }
- }
+sub find_point_closest_to_furthest_leaf {
+ my $self = shift;
+ $self->get_root()->recursive_set_max_dist_to_leaf_in_subtree();
+
+ my @nodes = $self->get_root()->recursive_subtree_node_list();
+ push @nodes, $self->get_root(); # we want the root in our list
+
+ my @sorted_nodes = sort {
+ $a->get_max_leaf_leaf_pathlength_in_subtree_thru_node()
+ <=> $b->get_max_leaf_leaf_pathlength_in_subtree_thru_node()
+ } @nodes;
+
+ # using attribute "lptl_child" (longest path to leaf child) follow the longest path to leaf,
+ # until you reach the midpoint of the longest leaf to leaf path
+ my $current_node = pop @sorted_nodes;
+ my $distance_to_go =
+ 0.5 *
+ ( $current_node->get_attribute("dist_to_leaf_longest") -
+ $current_node->get_attribute("dist_to_leaf_next_longest") );
+ for ( ; ; ) {
+ my $next_node = $current_node->get_attribute("lptl_child");
+ my $branch_length = $next_node->get_branch_length();
+ if ( $branch_length >= $distance_to_go ) {
+ return ( $next_node, $branch_length - $distance_to_go );
+ } else {
+ $distance_to_go -= $branch_length;
+ $current_node = $next_node;
+ }
+ }
}
-
-
=head2 function reset_root_to_point_on_branch()
Synopsis: $t->reset_root_to_point_on_branch($anode, $distance)
@@ -1561,16 +1672,14 @@ sub find_point_closest_to_furthest_leaf{
Author: Tom York.
=cut
-sub reset_root_to_point_on_branch{
- my $self = shift;
- my ($child_of_new_node, $dist_above) = @_;
-
- my $new_node = $child_of_new_node->add_parent($dist_above); # goes
- my $former_root = $self->get_root();
- $self->reset_root($new_node);
-
- $self->collapse_tree();
+sub reset_root_to_point_on_branch {
+ my $self = shift;
+ my ( $node, $dist_above ) = @_;
+ return if ( $node->is_root() ); # if desired root node is already the root do nothing.
+ my $new_node = $node->add_parent($dist_above); # goes
+ $self->reset_root($new_node);
+ $self->collapse_tree();
}
=head2 function set_min_dist_to_leaf()
@@ -1586,13 +1695,12 @@ sub reset_root_to_point_on_branch{
=cut
-sub set_min_dist_to_leaf{
- my $self = shift;
- $self->get_root()->recursive_set_min_dist_to_leaf();
- $self->get_root()->recursive_propagate_mdtl();
+sub set_min_dist_to_leaf {
+ my $self = shift;
+ $self->get_root()->recursive_set_min_dist_to_leaf();
+ $self->get_root()->recursive_propagate_mdtl();
}
-
=head2 function min_leaf_dist_variance_point()
Synopsis: $t->min_leaf_dist_variance_point()
@@ -1609,29 +1717,28 @@ sub set_min_dist_to_leaf{
=cut
-sub min_leaf_dist_variance_point{
- my $self = shift;
+sub min_leaf_dist_variance_point {
+ my $self = shift;
- $self->get_root()->recursive_set_dl_dlsqr_sums_down();
- $self->get_root()->recursive_set_dl_dlsqr_sums_up();
+ $self->get_root()->recursive_set_dl_dlsqr_sums_down();
+ $self->get_root()->recursive_set_dl_dlsqr_sums_up();
- my @node_list = $self->get_root()->recursive_subtree_node_list();
- my $opt_node = shift @node_list;
- my ($opt_dist_above, $opt_var) = $opt_node->min_leaf_dist_variance_point();
+ my @node_list = $self->get_root()->recursive_subtree_node_list();
+ my $opt_node = shift @node_list;
+ my ( $opt_dist_above, $opt_var ) = $opt_node->min_leaf_dist_variance_point();
- foreach my $n (@node_list) {
- my ($da, $var) = $n->min_leaf_dist_variance_point();
- if ($var < $opt_var) {
- $opt_node = $n;
- $opt_dist_above = $da;
- $opt_var = $var;
- }
- }
- $self->get_root()->recursive_delete_dl_dlsqr_attributes();
- return ($opt_node, $opt_dist_above, $opt_var);
+ foreach my $n (@node_list) {
+ my ( $da, $var ) = $n->min_leaf_dist_variance_point();
+ if ( $var < $opt_var ) {
+ $opt_node = $n;
+ $opt_dist_above = $da;
+ $opt_var = $var;
+ }
+ }
+ $self->get_root()->recursive_delete_dl_dlsqr_attributes();
+ return ( $opt_node, $opt_dist_above, $opt_var );
}
-
=head2 function test_tree_node_hash()
Synopsis: $t->test_tree_node_hash()
@@ -1656,34 +1763,35 @@ sub min_leaf_dist_variance_point{
=cut
-sub test_tree_node_hash{
- my $ok1 = 1; my $ok2 = 1; my $ok3 = 1;
- my $self = shift;
- my $node_hashref = $self->{node_hash};
- my $root = $self->get_root();
- my @node_list = $root->recursive_subtree_node_list();
- push @node_list, $root;
- my %nodekeys;
-
- foreach my $n (@node_list) { # test that each node in this list is found in the tree's node hash
- my $node_key = $n->get_node_key();
- $nodekeys{$node_key}++;
- if (!defined $node_hashref->{$node_key}) { # a node in node_list is not in the hash.
- $ok1 = 0;
- }
- }
-
- if (scalar keys %nodekeys != scalar @node_list) { # test that each node in node_list has a distinct key
- $ok2 = 0;
- }
-
- my @node_keys = keys (%$node_hashref); # test that each key in node hash is
- if (scalar @node_keys != scalar @node_list) {
- $ok3 = 0;
- }
- return $ok1*$ok2*$ok3;
-}
+sub test_tree_node_hash {
+ my $ok1 = 1;
+ my $ok2 = 1;
+ my $ok3 = 1;
+ my $self = shift;
+ my $node_hashref = $self->{node_hash};
+ my $root = $self->get_root();
+ my @node_list = $root->recursive_subtree_node_list();
+ push @node_list, $root;
+ my %nodekeys;
+
+ foreach my $n (@node_list) { # test that each node in this list is found in the tree's node hash
+ my $node_key = $n->get_node_key();
+ $nodekeys{$node_key}++;
+ if ( !defined $node_hashref->{$node_key} ) { # a node in node_list is not in the hash.
+ $ok1 = 0;
+ }
+ }
+ if ( scalar keys %nodekeys != scalar @node_list ) { # test that each node in node_list has a distinct key
+ $ok2 = 0;
+ }
+
+ my @node_keys = keys(%$node_hashref); # test that each key in node hash is
+ if ( scalar @node_keys != scalar @node_list ) {
+ $ok3 = 0;
+ }
+ return $ok1 * $ok2 * $ok3;
+}
=head2 function test_tree_parents_and_children()
@@ -1697,67 +1805,70 @@ sub test_tree_node_hash{
=cut
-sub test_tree_parents_and_children{
- my $self = shift;
- my $ok1 = $self->test_tree_nodes_are_parents_of_their_children();
- my $ok2 = $self->test_tree_nodes_are_children_of_their_parents();
- return ($ok1 && $ok2);
+sub test_tree_parents_and_children {
+ my $self = shift;
+ my $ok1 = $self->test_tree_nodes_are_parents_of_their_children();
+ my $ok2 = $self->test_tree_nodes_are_children_of_their_parents();
+ return ( $ok1 && $ok2 );
}
-sub test_tree{
- my $self = shift;
- return $self->test_tree_node_hash() && $self->test_tree_parents_and_children();
+sub test_tree {
+ my $self = shift;
+ return $self->test_tree_node_hash() && $self->test_tree_parents_and_children();
}
# tests that for all nodes n, each child of n has n as its parent.
-sub test_tree_nodes_are_parents_of_their_children{
- my $self = shift;
- my $root = $self->get_root();
- my @node_list = $root->recursive_subtree_node_list();
- push @node_list, $root;
- my $ok = 1;
-
- foreach my $n (@node_list) {
- my @children = $n->get_children();
- my $node_key = $n->get_node_key();
- foreach my $c (@children) {
- if(! defined $c->get_parent()){
- print("child node has undefined parent. \n"); $n->print_node(); $c->print_node();
- $ok = 0;
- } elsif ($c->get_parent()->get_node_key() != $node_key) {
- print("child node has wrong parent. \n"); $n->print_node(); $c->print_node();
- $ok = 0;
- }
- }
- }
- return $ok;
+sub test_tree_nodes_are_parents_of_their_children {
+ my $self = shift;
+ my $root = $self->get_root();
+ my @node_list = $root->recursive_subtree_node_list();
+ push @node_list, $root;
+ my $ok = 1;
+
+ foreach my $n (@node_list) {
+ my @children = $n->get_children();
+ my $node_key = $n->get_node_key();
+ foreach my $c (@children) {
+ if ( !defined $c->get_parent() ) {
+ print("child node has undefined parent. \n");
+ $n->print_node();
+ $c->print_node();
+ $ok = 0;
+ } elsif ( $c->get_parent()->get_node_key() != $node_key ) {
+ print("child node has wrong parent. \n");
+ $n->print_node();
+ $c->print_node();
+ $ok = 0;
+ }
+ }
+ }
+ return $ok;
}
-
# tests that for all nodes n, that if n has a parent, then n is among the children of that parent.
-sub test_tree_nodes_are_children_of_their_parents{
- my $self = shift;
- my $root = $self->get_root();
- my @node_list = $root->recursive_subtree_node_list();
- push @node_list, $root;
- my $ok = 1;
-
- foreach my $n (@node_list) { # test that $n is among the children of its parent
- my $p = $n->get_parent();
- if (defined $p) { # if not defined, do no test for this node
- my @children = $p->get_children();
- my $this_n_ok = 0;
- foreach my $c (@children) {
- if ($c->get_node_key() == $n->get_node_key()) {
- $this_n_ok = 1;
- last;
- }
- }
- if(! $this_n_ok){ print("This node not among the children of its parent: \n"), $n->print_node(); }
- $ok &&= $this_n_ok;
- }
- }
- return $ok;
+sub test_tree_nodes_are_children_of_their_parents {
+ my $self = shift;
+ my $root = $self->get_root();
+ my @node_list = $root->recursive_subtree_node_list();
+ push @node_list, $root;
+ my $ok = 1;
+
+ foreach my $n (@node_list) { # test that $n is among the children of its parent
+ my $p = $n->get_parent();
+ if ( defined $p ) { # if not defined, do no test for this node
+ my @children = $p->get_children();
+ my $this_n_ok = 0;
+ foreach my $c (@children) {
+ if ( $c->get_node_key() == $n->get_node_key() ) {
+ $this_n_ok = 1;
+ last;
+ }
+ }
+ if ( !$this_n_ok ) { print("This node not among the children of its parent: \n"), $n->print_node(); }
+ $ok &&= $this_n_ok;
+ }
+ }
+ return $ok;
}
=head2 function orthologs()
@@ -1780,31 +1891,32 @@ sub test_tree_nodes_are_children_of_their_parents{
=cut
-sub orthologs{
- my $self = shift; # tree
- my $species_t = shift; # species tree; if undefined,
- my $cssst = shift; # switch to collapse single-species subtrees to a single node
- my $qRFd_max = shift;
- if (!defined $qRFd_max) {
- $qRFd_max = 0;
- }
- if ($cssst) {
- $self->collapse_unique_species_subtrees();
- }
- # should we collapse the tree here?
- my $ortho_trees_ref = $self->get_orthologs();
- my @ortho_groups=(); # a list of Ortholog_group object that will contain the results
-
- # go through all the ortho_trees and compare to the species tree
- #
- foreach my $ortho_t (@$ortho_trees_ref) {
- my $ortho_group = CXGN::Phylo::Ortholog_group->new($ortho_t, $species_t, $qRFd_max);
- if ($ortho_group->get_ortholog_tree()->get_leaf_count()>1) {
- push @ortho_groups, $ortho_group;
- }
- } # end of foreach my $ortho_t (@$ortho_trees_ref) {
- return @ortho_groups;
-} # end of sub orthologs
+sub orthologs {
+ my $self = shift; # tree
+ my $species_t = shift; # species tree; if undefined,
+ my $cssst = shift; # switch to collapse single-species subtrees to a single node
+ my $qRFd_max = shift;
+ if ( !defined $qRFd_max ) {
+ $qRFd_max = 0;
+ }
+ if ($cssst) {
+ $self->collapse_unique_species_subtrees();
+ }
+
+ # should we collapse the tree here?
+ my $ortho_trees_ref = $self->get_orthologs();
+ my @ortho_groups = (); # a list of Ortholog_group object that will contain the results
+
+ # go through all the ortho_trees and compare to the species tree
+ #
+ foreach my $ortho_t (@$ortho_trees_ref) {
+ my $ortho_group = CXGN::Phylo::Ortholog_group->new( $ortho_t, $species_t, $qRFd_max );
+ if ( $ortho_group->get_ortholog_tree()->get_leaf_count() > 1 ) {
+ push @ortho_groups, $ortho_group;
+ }
+ } # end of foreach my $ortho_t (@$ortho_trees_ref) {
+ return @ortho_groups;
+} # end of sub orthologs
=head2 function set_missing_species_from_names()
@@ -1822,14 +1934,15 @@ sub orthologs{
=cut
-sub set_missing_species_from_names{
- my $self = shift;
- foreach my $n ($self->get_leaf_list()) {
- # print("defined \$n->get_species():{", defined $n->get_species(), "} ,\$n->get_species():{", $n->get_species(), "}\n");
- if (!$n->get_species()) {
- $n->set_species($n->determine_species_from_name());
- }
- }
+sub set_missing_species_from_names {
+ my $self = shift;
+ foreach my $n ( $self->get_leaf_list() ) {
+
+# print("defined \$n->get_species():{", defined $n->get_species(), "} ,\$n->get_species():{", $n->get_species(), "}\n");
+ if ( !$n->get_species() ) {
+ $n->set_species( $n->determine_species_from_name() );
+ }
+ }
}
=head2 function impose_branch_length_minimum()
@@ -1845,38 +1958,37 @@ sub set_missing_species_from_names{
=cut
-sub impose_branch_length_minimum{
- my $self = shift;
- my $minimum_bl = shift;
- $minimum_bl ||= $self->get_min_branch_length();
- foreach my $n ($self->get_all_nodes()) {
- unless (defined $n->get_branch_length() and $n->get_branch_length() > $minimum_bl) {
- $n->set_branch_length($minimum_bl);
- }
- }
- $self->get_root()->set_branch_length(0.0); # leave this at 0
+sub impose_branch_length_minimum {
+ my $self = shift;
+ my $minimum_bl = shift;
+ $minimum_bl ||= $self->get_min_branch_length();
+ foreach my $n ( $self->get_all_nodes() ) {
+ unless ( defined $n->get_branch_length() and $n->get_branch_length() > $minimum_bl ) {
+ $n->set_branch_length($minimum_bl);
+ }
+ }
+ $self->get_root()->set_branch_length(0.0); # leave this at 0
}
-
-sub set_show_standard_species{
- my $self = shift;
- $self->{show_standard_species} = shift;
-}
-sub get_show_standard_species{
- my $self = shift;
- return $self->{show_standard_species};
+sub set_show_standard_species {
+ my $self = shift;
+ $self->{show_standard_species} = shift;
}
-sub set_species_standardizer{
- my $self = shift;
- $self->{species_standardizer} = shift;
+sub get_show_standard_species {
+ my $self = shift;
+ return $self->{show_standard_species};
}
-sub get_species_standardizer{
- my $self = shift;
- return $self->{species_standardizer};
+sub set_species_standardizer {
+ my $self = shift;
+ $self->{species_standardizer} = shift;
}
+sub get_species_standardizer {
+ my $self = shift;
+ return $self->{species_standardizer};
+}
=head2 function update_label_names()
@@ -1889,21 +2001,24 @@ sub get_species_standardizer{
=cut
-sub update_label_names{
- my $self = shift;
- my $show_spec = $self->get_show_species_in_label();
-foreach my $n ($self->get_all_nodes()) {
-# print "node: ", $n->get_name(), " impl names: ", join("\t", @{$n->get_implicit_names()}), "\n";
- my $n_leaves = scalar @{$n->get_implicit_names()};
- my $label_text = $n->get_name();
- # print STDERR "in update_label_names. $n_leaves, [", $n->get_name(), "][", $label_text, "] \n";
- if ($show_spec) {
- my $species_text = $n->get_shown_species();
- # print STDERR "species text: ", $n->get_shown_species(), " is leaf:[", $n->is_leaf(), "]\n";
- $label_text .= " [".$species_text."]" if(defined $species_text);
- }
- $n->get_label()->set_name($label_text);
- }
+sub update_label_names {
+ my $self = shift;
+ my $show_spec = $self->get_show_species_in_label();
+ foreach my $n ( $self->get_all_nodes() ) {
+
+ # print "node: ", $n->get_name(), " impl names: ", join("\t", @{$n->get_implicit_names()}), "\n";
+ # my $n_leaves = scalar @{$n->get_implicit_names()};
+ my $label_text = $n->get_name();
+
+ # print STDERR "in update_label_names. $n_leaves, [", $n->get_name(), "][", $label_text, "] \n";
+ if ($show_spec) {
+ my $species_text = $n->get_shown_species();
+
+ # print STDERR "species text: ", $n->get_shown_species(), " is leaf:[", $n->is_leaf(), "]\n";
+ $label_text .= " [" . $species_text . "]" if ( defined $species_text );
+ }
+ $n->get_label()->set_name($label_text);
+ }
}
=head2 function prune_nameless_leaves()
@@ -1916,56 +2031,58 @@ foreach my $n ($self->get_all_nodes()) {
=cut
-sub prune_nameless_leaves{
-
- my $self = shift;
- my @leaf_list = $self->get_root()->recursive_leaf_list();
- my $count_leaves_deleted = 0;
- $self->get_root()->recursive_implicit_names(); # is this needed?
- foreach my $l (@leaf_list) {
- if ($l->get_name()) { # non-empty string. OK.
- } else {
- # print STDERR "Warning. Leaf node with key: ", $l->get_node_key(), " has empty or undefined name. Deleting nameless node. \n";
- $self->del_node($l);
- $self->collapse_tree();
- $count_leaves_deleted++;
- }
- }
- return $count_leaves_deleted;
+sub prune_nameless_leaves {
+
+ my $self = shift;
+ my @leaf_list = $self->get_root()->recursive_leaf_list();
+ my $count_leaves_deleted = 0;
+ $self->get_root()->recursive_implicit_names(); # is this needed?
+ foreach my $l (@leaf_list) {
+ if ( $l->get_name() ) { # non-empty string. OK.
+ } else {
+
+# print STDERR "Warning. Leaf node with key: ", $l->get_node_key(), " has empty or undefined name. Deleting nameless node. \n";
+ $self->del_node($l);
+ $self->collapse_tree();
+ $count_leaves_deleted++;
+ }
+ }
+ return $count_leaves_deleted;
}
# return key, node pair corresponding to the implicit name given as argument.
-sub node_from_implicit_name_string{
- #searches tree until the node with the specified implicit name string (tab separated) is found
- my $self = shift;
- my $in_string = shift;
- if (! scalar $self->get_root()->get_implicit_names() > 0) {
- $self->get_root()->recursive_implicit_names();
- }
+sub node_from_implicit_name_string {
- foreach my $k ($self->get_all_node_keys()) {
- my $n = $self->get_node($k);
- my $node_impl_name = join("\t", @{$n->get_implicit_names()});
- # print STDERR "node impl name: $node_impl_name \n";
- if ($node_impl_name eq $in_string) {
- return ($k, $n);
- }
- }
+ #searches tree until the node with the specified implicit name string (tab separated) is found
+ my $self = shift;
+ my $in_string = shift;
+ if ( !scalar $self->get_root()->get_implicit_names() > 0 ) {
+ $self->get_root()->recursive_implicit_names();
+ }
-# print STDERR "In Tree::node_from_implicit_name_string. Node not found which matches specified string: $in_string \n";
-# print STDERR $self->generate_newick(), "\n";
- return (undef, undef);
-}
+ foreach my $k ( $self->get_all_node_keys() ) {
+ my $n = $self->get_node($k);
+ my $node_impl_name = join( "\t", @{ $n->get_implicit_names() } );
+ # print STDERR "node impl name: $node_impl_name \n";
+ if ( $node_impl_name eq $in_string ) {
+ return ( $k, $n );
+ }
+ }
-sub leaf_species_string{
- my $self = shift;
-my $str = "species, standard species \n";
- foreach my $l ($self->get_leaf_list()) {
- my $std_species = ($l->get_standard_species())? $l->get_standard_species(): 'std_species_undefined';
- $str .= $l->get_species() . " " . $std_species . "\n";
- }
- return $str;
+ # print STDERR "In Tree::node_from_implicit_name_string. Node not found which matches specified string: $in_string \n";
+ # print STDERR $self->generate_newick(), "\n";
+ return ( undef, undef );
+}
+
+sub leaf_species_string {
+ my $self = shift;
+ my $str = "species, standard species \n";
+ foreach my $l ( $self->get_leaf_list() ) {
+ my $std_species = ( $l->get_standard_species() ) ? $l->get_standard_species() : 'std_species_undefined';
+ $str .= $l->get_species() . " " . $std_species . "\n";
+ }
+ return $str;
}
=head2 function quasiRF_distance
@@ -2000,95 +2117,102 @@ my $str = "species, standard species \n";
=cut
-sub quasiRF_distance{
-my $self = shift;
-my $tree1 = $self;
-my $tree2 = shift;
-my $compare_field = shift;
-
- my $root1 = $tree1->get_root();
- my $root2 = $tree2->get_root();
-
- my $distance = 0.0;
-
- # get the implicit names or species for each node in both trees
- #
- if (lc $compare_field eq "species") {
-#print STDOUT "top of quasiRF... compare_field eq species branch. \n";
- $root1->recursive_implicit_species();
- $root2->recursive_implicit_species();
-
- my %n_bl_2 = (); # set up the hash for tree2 nodes, with species as key (value unused)
- my $nhr2 = $tree2->{node_hash};
- foreach my $n2 (values ( %$nhr2)) {
- my $implicit_species = join("\t", @{$n2->get_implicit_species()});
- # print STDOUT "Y stree implicit species: $implicit_species
\n";
- $n_bl_2{$implicit_species}++; # values are not used, just count occurrences
- }
-
- my $nhr1 = $tree1->{node_hash};
- foreach my $n1 (values ( %$nhr1)) {
- my $implicit_species = join("\t", @{$n1->get_implicit_species()});
- # print STDOUT "otree implicit species: $implicit_species
\n";
- if (exists $n_bl_2{$implicit_species}) { # there are subtrees with this set of leaves in both trees, do nothing
- $n1->set_attribute("subtree_leaves_match", "true");
- # print STDOUT "true
\n";
- } else { # no node with this implicit name in tree2, so add branch length to total
- $distance += $n1->get_branch_length();
- $n1->set_attribute("subtree_leaves_match", "false");
- }
- }
- } else {
- $root1->recursive_implicit_names();
- $root2->recursive_implicit_names();
-
- # set up the hash for tree2 nodes, with name as key (value unused)
- my %n_bl_2 = ();
- my $nhr2 = $tree2->{node_hash};
- foreach my $n2 (values ( %$nhr2)) {
- $n_bl_2{$n2->get_name()}++; # values are not used, just count occurrences of the name
- }
-
- my $nhr1 = $tree1->{node_hash};
- foreach my $n1 (values ( %$nhr1)) {
- if (exists $n_bl_2{$n1->get_name()}) { # there are subtrees with this set of leaves in both trees, do nothing
- $n1->set_attribute("subtree_leaves_match", "true");
- } else { # no node with this implicit name in tree2, so add branch length to total
- $distance += $n1->get_branch_length();
- $n1->set_attribute("subtree_leaves_match", "false");
- }
- }
- }
-my $distance2 = $root1->recursive_quasiRF_distance(); # this works on tree1 - which is not a copy here.
-return $distance; # $tree1 has qRFd info at every node.
-}
-
-sub RF_distance {
- my $self = shift;
- my $other_tree = shift;
- my $compare_field = shift; # to control comparison of names (default) or species ("species")
- # copy the trees into temporary trees, so that the trees can
- # be manipulated (rerooted, collapsed) without changing the original trees.
- #
- # print STDOUT "in compare_unrooted. compare_field: $compare_feld \n";
- my $tree1 = $self->copy();
- my $tree2 = $other_tree->copy();
+sub quasiRF_distance {
+ my $self = shift;
+ my $tree1 = $self;
+ my $tree2 = shift;
+ my $compare_field = shift;
- # find a leaf - any leaf - of tree1 and the corresponding leaf (i.e. with the same name) of tree2
+ my $root1 = $tree1->get_root();
+ my $root2 = $tree2->get_root();
- my $leaf1 = $tree1->get_root()->recursive_get_a_leaf();
- my $corresponding_leaf = $tree2->get_node_by_name($leaf1->get_name());
+ my $distance = 0.0;
- if (!$corresponding_leaf) {
- print("in compare_unrooted. leaf1 name: ", $leaf1->get_name(), ". Can't find corresponding leaf in other tree. \n");
- return 0;
- }
+ # get the implicit names or species for each node in both trees
+ #
+ if ( lc $compare_field eq "species" ) {
+
+ #print STDOUT "top of quasiRF... compare_field eq species branch. \n";
+ $root1->recursive_implicit_species();
+ $root2->recursive_implicit_species();
+
+ my %n_bl_2 = (); # set up the hash for tree2 nodes, with species as key (value unused)
+ my $nhr2 = $tree2->{node_hash};
+ foreach my $n2 ( values(%$nhr2) ) {
+ my $implicit_species = join( "\t", @{ $n2->get_implicit_species() } );
+
+ # print STDOUT "Y stree implicit species: $implicit_species
\n";
+ $n_bl_2{$implicit_species}++; # values are not used, just count occurrences
+ }
+
+ my $nhr1 = $tree1->{node_hash};
+ foreach my $n1 ( values(%$nhr1) ) {
+ my $implicit_species = join( "\t", @{ $n1->get_implicit_species() } );
+
+ # print STDOUT "otree implicit species: $implicit_species
\n";
+ if ( exists $n_bl_2{$implicit_species} )
+ { # there are subtrees with this set of leaves in both trees, do nothing
+ $n1->set_attribute( "subtree_leaves_match", "true" );
+
+ # print STDOUT "true
\n";
+ } else { # no node with this implicit name in tree2, so add branch length to total
+ $distance += $n1->get_branch_length();
+ $n1->set_attribute( "subtree_leaves_match", "false" );
+ }
+ }
+ } else {
+ $root1->recursive_implicit_names();
+ $root2->recursive_implicit_names();
+
+ # set up the hash for tree2 nodes, with name as key (value unused)
+ my %n_bl_2 = ();
+ my $nhr2 = $tree2->{node_hash};
+ foreach my $n2 ( values(%$nhr2) ) {
+ $n_bl_2{ $n2->get_name() }++; # values are not used, just count occurrences of the name
+ }
+
+ my $nhr1 = $tree1->{node_hash};
+ foreach my $n1 ( values(%$nhr1) ) {
+ if ( exists $n_bl_2{ $n1->get_name() } )
+ { # there are subtrees with this set of leaves in both trees, do nothing
+ $n1->set_attribute( "subtree_leaves_match", "true" );
+ } else { # no node with this implicit name in tree2, so add branch length to total
+ $distance += $n1->get_branch_length();
+ $n1->set_attribute( "subtree_leaves_match", "false" );
+ }
+ }
+ }
+ my $distance2 = $root1->recursive_quasiRF_distance(); # this works on tree1 - which is not a copy here.
+ return $distance; # $tree1 has qRFd info at every node.
+}
+
+sub RF_distance {
+ my $self = shift;
+ my $other_tree = shift;
+ my $compare_field = shift; # to control comparison of names (default) or species ("species")
+ # copy the trees into temporary trees, so that the trees can
+ # be manipulated (rerooted, collapsed) without changing the original trees.
+ #
+ # print STDOUT "in compare_unrooted. compare_field: $compare_feld \n";
+ my $tree1 = $self->copy();
+ my $tree2 = $other_tree->copy();
+
+ # find a leaf - any leaf - of tree1 and the corresponding leaf (i.e. with the same name) of tree2
+
+ my $leaf1 = $tree1->get_root()->recursive_get_a_leaf();
+ my $corresponding_leaf = $tree2->get_node_by_name( $leaf1->get_name() );
+
+ if ( !$corresponding_leaf ) {
+ print( "in compare_unrooted. leaf1 name: ",
+ $leaf1->get_name(), ". Can't find corresponding leaf in other tree. \n" );
+ return 0;
+ }
- # reset roots of trees to the two corresponding leaves:
- $tree1->reset_root($leaf1);
- $tree2->reset_root($corresponding_leaf);
+ # reset roots of trees to the two corresponding leaves:
+ $tree1->reset_root($leaf1);
+ $tree2->reset_root($corresponding_leaf);
- return $tree1->RF_distance_inner($tree2, $compare_field);
+ return $tree1->RF_distance_inner( $tree2, $compare_field );
}
=head2 function RF_distance_inner
@@ -2098,9 +2222,8 @@ sub RF_distance {
Arguments: A tree object; and optionally a string specifying
whether to compare node name or species.
(Default is name)
- Returns: Compares tree1 and tree2. If they are topologically
- the same, 0 is returned. Otherwise returns a "distance"
- describing how different the two trees are.
+ Returns: Compares tree1 and tree2. Returns list of 3 measures of distance:
+ robinson-foulds distance, symmetric disance and
Side effects: Sets "subtree_leaves_match" field for each node
Description: Tree1, tree2 should be collapsed before calling this
function. For each node in tree1 add branch length to
@@ -2115,429 +2238,585 @@ sub RF_distance {
=cut
-sub RF_distance_inner{
- my $self = shift;
- my $tree1 = $self;
- my $tree2 = shift;
- my $compare_field = shift;
-
- my $root1 = $tree1->get_root();
- my $root2 = $tree2->get_root();
-
- my $sym_diff = 0; #symmetric difference, just one for each partition in only one tree
- my $distance = 0.0;
- my $in_both_sum = 0.0;
- my $in_one_only_sum = 0.0;
- my $branch_score = 0.0;
-
- # get the implicit names or species for each node in both trees
- #
- if (lc $compare_field eq "species") {
- # die "RF_distance with compare_field set to species is not implemented. \n";
- #print STDOUT "top of quasiRF... compare_field eq species branch. \n";
- $root1->recursive_implicit_species();
- $root2->recursive_implicit_species();
- unless(join("\t", $root1->get_implicit_species()) eq join("\t", $root1->get_implicit_species())){
- print STDERR "In RFdistance; trees do not have same set of leaves (by species).\n";
- return undef;
- }
- # set up the hash for tree nodes, with species as key, node obj as value
- my %n_bl_1 = ();
- my @nhr1 = $root1->recursive_subtree_node_list; #->{node_hash};
-
- foreach my $n1 (@nhr1) { #all tree1 nodes except root1
- $n_bl_1{$n1->get_species()} = $n1;
- }
- my %n_bl_2 = ();
- my @nhr2 = $root2->recursive_subtree_node_list; #$tree2->{node_hash};
- foreach my $n2 (@nhr2) { #all tree2 nodes except root2
- $n_bl_2{$n2->get_species()} = $n2;
- }
-
- # my $in_both_sum = 0.0;
- # my $in_one_only_sum = 0.0;
- foreach my $n1 (@nhr1) {
- if (exists $n_bl_2{$n1->get_species()}) { # there are subtrees with this set of leaves in both trees
- my $n2 = $n_bl_2{$n1->get_species()};
- $in_both_sum += abs($n1->get_branch_length() - $n2->get_branch_length());
- } else { # no node with this implicit species in tree2, so add branch length to total
- $in_one_only_sum += $n1->get_branch_length();
- $sym_diff++;
- }
- }
- # my $in_both_sum2 = 0.0;
- foreach my $n2 (@nhr2) {
- if (exists $n_bl_1{$n2->get_species()}) { # there are subtrees with this set of leaves in both trees
- # my $n1 = $n_bl_1{$n2->get_species()};
- # $in_both_sum2 += abs($n1->get_branch_length() - $n2->get_branch_length());
- } else { # no node with this implicit species in tree2, so add branch length to total
- $in_one_only_sum += $n2->get_branch_length();
- $sym_diff++;
- }
- }
- # print ("in_both_sum: ", $in_both_sum, " in_one_only_sum: ", $in_one_only_sum, "\n");
- # $distance = $in_both_sum + $in_one_only_sum;
- # print "distance: ", $distance, "\n";
-
-
- } else { # compare field is "name"
-# print "comparing trees by name fields \n";
- $root1->recursive_implicit_names();
- $root2->recursive_implicit_names();
- unless(join("\t", $root1->get_name()) eq join("\t", $root1->get_name())){
- print STDERR "In RFdistance; trees do not have same set of leaves (by name).\n";
- return undef;
- }
- # set up the hash for tree nodes, with name as key, node obj as value
- my %n_bl_1 = ();
- my @nhr1 = $root1->recursive_subtree_node_list(); #->{node_hash};
-
- foreach my $n1 (@nhr1) { #all tree1 nodes except root1
-# print "n1 name: ", $n1->get_name(), "\n";
- $n_bl_1{$n1->get_name()} = $n1;
- }
- my %n_bl_2 = ();
- my @nhr2 = $root2->recursive_subtree_node_list(); #$tree2->{node_hash};
- foreach my $n2 (@nhr2) { #all tree2 nodes except root2
-# print "n2 name: ", $n2->get_name(), "\n";
- $n_bl_2{$n2->get_name()} = $n2;
- }
-
- # my $in_both_sum = 0.0;
- # my $in_one_only_sum = 0.0;
- my $diff = 0.0;
- # foreach my $n1 (@nhr1) {
- foreach my $name1 (keys %n_bl_1){
- my $n1 = $n_bl_1{$name1};
- if (exists $n_bl_2{$n1->get_name()}) { # there are subtrees with this set of leaves in both trees
- my $n2 = $n_bl_2{$n1->get_name()};
- $diff = $n1->get_branch_length() - $n2->get_branch_length();
- $in_both_sum += abs($diff); # $n1->get_branch_length() - $n2->get_branch_length());
- # $branch_score += $diff*$diff;
- } else { # no node with this implicit name in tree2, so add branch length to total
- $diff = $n1->get_branch_length();
- $in_one_only_sum += $diff; # $n1->get_branch_length();
- # $branch_score += $diff*$diff;
- $sym_diff++;
-# print "name not present in hash 2: ", $n1->get_name(), "\n";
- }
- $branch_score += $diff*$diff;
- }
- # my $in_both_sum2 = 0.0;
- # foreach my $n2 (@nhr2) {
- foreach my $name2 (keys %n_bl_2){
- my $n2 = $n_bl_2{$name2};
- if (exists $n_bl_1{$n2->get_name()}) { # there are subtrees with this set of leaves in both trees
- # my $n1 = $n_bl_1{$n2->get_name()};
- # $in_both_sum2 += abs($n1->get_branch_length() - $n2->get_branch_length());
- } else { # no node with this implicit name in tree2, so add branch length to total
- $in_one_only_sum += $n2->get_branch_length();
- $sym_diff++;
- # print "name not present in hash 1: ", $n2->get_name(), "\n";
- }
- }
- }
-
- $distance = $in_both_sum + $in_one_only_sum;
+sub RF_distance_inner {
+ my $self = shift;
+ my $tree1 = $self;
+ my $tree2 = shift;
+ my $compare_field = shift;
+
+ my $root1 = $tree1->get_root();
+ my $root2 = $tree2->get_root();
+
+ my $sym_diff = 0; #symmetric difference, just one for each partition in only one tree
+ my $distance = 0.0;
+ my $in_both_sum = 0.0;
+ my $in_one_only_sum = 0.0;
+ my $branch_score = 0.0;
+
+ # get the implicit names or species for each node in both trees
+ #
+ if ( lc $compare_field eq "species" ) {
+
+ # die "RF_distance with compare_field set to species is not implemented. \n";
+ #print STDOUT "top of quasiRF... compare_field eq species branch. \n";
+ $root1->recursive_implicit_species();
+ $root2->recursive_implicit_species();
+ unless ( join( "\t", $root1->get_implicit_species() ) eq join( "\t", $root1->get_implicit_species() ) ) {
+ print STDERR "In RFdistance; trees do not have same set of leaves (by species).\n";
+ return undef;
+ }
+
+ # set up the hash for tree nodes, with species as key, node obj as value
+ my %n_bl_1 = ();
+ my @nhr1 = $root1->recursive_subtree_node_list; #->{node_hash};
+
+ foreach my $n1 (@nhr1) { #all tree1 nodes except root1
+ $n_bl_1{ $n1->get_species() } = $n1;
+ }
+ my %n_bl_2 = ();
+ my @nhr2 = $root2->recursive_subtree_node_list; #$tree2->{node_hash};
+ foreach my $n2 (@nhr2) { #all tree2 nodes except root2
+ $n_bl_2{ $n2->get_species() } = $n2;
+ }
+
+ # my $in_both_sum = 0.0;
+ # my $in_one_only_sum = 0.0;
+ foreach my $n1 (@nhr1) {
+ if ( exists $n_bl_2{ $n1->get_species() } ) { # there are subtrees with this set of leaves in both trees
+ my $n2 = $n_bl_2{ $n1->get_species() };
+ $in_both_sum += abs( $n1->get_branch_length() - $n2->get_branch_length() );
+ } else { # no node with this implicit species in tree2, so add branch length to total
+ $in_one_only_sum += $n1->get_branch_length();
+ $sym_diff++;
+ }
+ }
+
+ # my $in_both_sum2 = 0.0;
+ foreach my $n2 (@nhr2) {
+ if ( exists $n_bl_1{ $n2->get_species() } ) { # there are subtrees with this set of leaves in both trees
+ # my $n1 = $n_bl_1{$n2->get_species()};
+ # $in_both_sum2 += abs($n1->get_branch_length() - $n2->get_branch_length());
+ } else { # no node with this implicit species in tree2, so add branch length to total
+ $in_one_only_sum += $n2->get_branch_length();
+ $sym_diff++;
+ }
+ }
+
+ # print ("in_both_sum: ", $in_both_sum, " in_one_only_sum: ", $in_one_only_sum, "\n");
+ # $distance = $in_both_sum + $in_one_only_sum;
+ # print "distance: ", $distance, "\n";
+
+ } else { # compare field is "name"
+
+ # print "comparing trees by name fields \n";
+ $root1->recursive_implicit_names();
+ $root2->recursive_implicit_names();
+ unless ( join( "\t", $root1->get_name() ) eq join( "\t", $root1->get_name() ) ) {
+ print STDERR "In RFdistance; trees do not have same set of leaves (by name).\n";
+ return undef;
+ }
+
+ # set up the hash for tree nodes, with name as key, node obj as value
+ my %n_bl_1 = ();
+ my @nhr1 = $root1->recursive_subtree_node_list(); #->{node_hash};
+
+ foreach my $n1 (@nhr1) { #all tree1 nodes except root1
+
+ # print "n1 name: ", $n1->get_name(), "\n";
+ $n_bl_1{ $n1->get_name() } = $n1;
+ }
+ my %n_bl_2 = ();
+ my @nhr2 = $root2->recursive_subtree_node_list(); #$tree2->{node_hash};
+ foreach my $n2 (@nhr2) { #all tree2 nodes except root2
+
+ # print "n2 name: ", $n2->get_name(), "\n";
+ $n_bl_2{ $n2->get_name() } = $n2;
+ }
+
+ # my $in_both_sum = 0.0;
+ # my $in_one_only_sum = 0.0;
+ my $diff = 0.0;
+
+ # foreach my $n1 (@nhr1) {
+ foreach my $name1 ( keys %n_bl_1 ) {
+ my $n1 = $n_bl_1{$name1};
+ if ( exists $n_bl_2{ $n1->get_name() } ) { # there are subtrees with this set of leaves in both trees
+ my $n2 = $n_bl_2{ $n1->get_name() };
+ $diff = $n1->get_branch_length() - $n2->get_branch_length();
+ $in_both_sum += abs($diff); # $n1->get_branch_length() - $n2->get_branch_length());
+ # $branch_score += $diff*$diff;
+ } else { # no node with this implicit name in tree2, so add branch length to total
+ $diff = $n1->get_branch_length();
+ $in_one_only_sum += $diff; # $n1->get_branch_length();
+ # $branch_score += $diff*$diff;
+ $sym_diff++;
+
+ # print "name not present in hash 2: ", $n1->get_name(), "\n";
+ }
+ $branch_score += $diff * $diff;
+ }
+
+ # my $in_both_sum2 = 0.0;
+ # foreach my $n2 (@nhr2) {
+ foreach my $name2 ( keys %n_bl_2 ) {
+ my $n2 = $n_bl_2{$name2};
+ if ( exists $n_bl_1{ $n2->get_name() } ) { # there are subtrees with this set of leaves in both trees
+ # my $n1 = $n_bl_1{$n2->get_name()};
+ # $in_both_sum2 += abs($n1->get_branch_length() - $n2->get_branch_length());
+ } else { # no node with this implicit name in tree2, so add branch length to total
+ $in_one_only_sum += $n2->get_branch_length();
+ $sym_diff++;
+
+ # print "name not present in hash 1: ", $n2->get_name(), "\n";
+ }
+ }
+ }
+
+ $distance = $in_both_sum + $in_one_only_sum;
+
# print ("in_both_sum: ", $in_both_sum, " in_one_only_sum: ", $in_one_only_sum, " RFdistance: ", $distance, "\n");
- return ($distance, $sym_diff, $branch_score);
+ return ( $distance, $sym_diff, $branch_score );
}
-sub get_branch_length_sum{
- my $self = shift;
- my @nodelist = $self->get_root()->recursive_subtree_node_list;
- my $bl_sum = 0.0;
- foreach (@nodelist) {
- $bl_sum += $_->get_branch_length();
- }
- return $bl_sum;
+sub get_branch_length_sum {
+ my $self = shift;
+ my @nodelist = $self->get_root()->recursive_subtree_node_list;
+ my $bl_sum = 0.0;
+ foreach (@nodelist) {
+ $bl_sum += $_->get_branch_length();
+ }
+ return $bl_sum;
}
-sub get_branch_length_sum_noterm{ # sum of all non-terminal branch lengths
- my $self = shift;
- my @nodelist = $self->get_root()->recursive_subtree_node_list;
- my $bl_sum = 0.0;
- foreach (@nodelist) {
- next if($->is_leaf());
- $bl_sum += $_->get_branch_length();
- }
- return $bl_sum;
+sub get_branch_length_sum_noterm { # sum of all non-terminal branch lengths
+ my $self = shift;
+ my @nodelist = $self->get_root()->recursive_subtree_node_list;
+ my $bl_sum = 0.0;
+ foreach (@nodelist) {
+ next if ( $- > is_leaf() );
+ $bl_sum += $_->get_branch_length();
+ }
+ return $bl_sum;
}
-sub multiply_branch_lengths_by{
- my $self = shift;
- my $factor = shift;
- my @nodelist = $self->get_root()->recursive_subtree_node_list;
- foreach (@nodelist) {
- $_->set_branch_length($_->get_branch_length()*$factor);
- }
+sub multiply_branch_lengths_by {
+ my $self = shift;
+ my $factor = shift;
+ my @nodelist = $self->get_root()->recursive_subtree_node_list;
+ foreach (@nodelist) {
+ $_->set_branch_length( $_->get_branch_length() * $factor );
+ }
}
#scale branch lengths s.t. their sum is #desired_bl_sum (1.0 by default)
# returns original bl sum
-sub normalize_branch_length_sum{
- my $self = shift;
- my $desired_bl_sum = shift;
- $desired_bl_sum ||= 1.0;
- my $bl_sum = $self->get_branch_length_sum();
- if ($bl_sum <= 0.0) {
- print STDERR "Can\'t normalize branch length sum, sum is $bl_sum; <= zero. \n";
- } else {
- $self->multiply_branch_lengths_by($desired_bl_sum/$bl_sum);
- }
- return $bl_sum;
-}
-
-sub RFdist_over_totbl{ # this is (weighted, i.e. using branch lengths) RF distance, normalized by sum of all
-# branch lengths in both trees, so it will lie in range [0,1]
- my $self = shift;
- my $tree1 = $self;
- my $tree2 = shift;
- my $compare_field = shift;
- my $normalize_bl_sums = shift;
- $normalize_bl_sums = 0 unless(defined $normalize_bl_sums);
-
- if ($normalize_bl_sums) {
- $tree1->normalize_branch_length_sum();
- $tree2->normalize_branch_length_sum();
- }
- my $bl_sum = $tree1->get_branch_length_sum() + $tree2->get_branch_length_sum();
- #print "bl_sum: $bl_sum \n";
- my ($rfd, $symdiff, $branch_score) = $tree1->RF_distance($tree2, $compare_field);
-# print "bl_sum: $bl_sum . rfd: $rfd \n";
- return $rfd/$bl_sum;
+sub normalize_branch_length_sum {
+ my $self = shift;
+ my $desired_bl_sum = shift;
+ $desired_bl_sum ||= 1.0;
+ my $bl_sum = $self->get_branch_length_sum();
+ if ( $bl_sum <= 0.0 ) {
+ print STDERR "Can\'t normalize branch length sum, sum is $bl_sum; <= zero. \n";
+ } else {
+ $self->multiply_branch_lengths_by( $desired_bl_sum / $bl_sum );
+ }
+ return $bl_sum;
+}
+
+sub RFdist_over_totbl { # this is (weighted, i.e. using branch lengths) RF distance, normalized by sum of all
+
+ # branch lengths in both trees, so it will lie in range [0,1]
+ my $self = shift;
+ my $tree1 = $self;
+ my $tree2 = shift;
+ my $compare_field = shift;
+ my $normalize_bl_sums = shift;
+ $normalize_bl_sums = 0 unless ( defined $normalize_bl_sums );
+
+ if ($normalize_bl_sums) {
+ $tree1->normalize_branch_length_sum();
+ $tree2->normalize_branch_length_sum();
+ }
+ my $bl_sum = $tree1->get_branch_length_sum() + $tree2->get_branch_length_sum();
+
+ #print "bl_sum: $bl_sum \n";
+ my ( $rfd, $symdiff, $branch_score ) = $tree1->RF_distance( $tree2, $compare_field );
+
+ # print "bl_sum: $bl_sum . rfd: $rfd \n";
+ return $rfd / $bl_sum;
}
# divide into trees no bigger than $max_leaves
-sub divide_into_small_trees{
- my $self = shift;
- my $max_leaves = shift;
- $max_leaves ||= 100;
- # print "in Tree::divide_into_small_trees. ", $self->get_root()->get_attribute("leaf_count"), "\n\n";
- my $small_trees_array = $self->get_root()->recursive_divide_subtree_into_small_trees($max_leaves);
- return $small_trees_array;
+sub divide_into_small_trees {
+ my $self = shift;
+ my $max_leaves = shift;
+ $max_leaves ||= 100;
+
+ # print "in Tree::divide_into_small_trees. ", $self->get_root()->get_attribute("leaf_count"), "\n\n";
+ my $small_trees_array = $self->get_root()->recursive_divide_subtree_into_small_trees($max_leaves);
+ return $small_trees_array;
}
# get list of subtrees containing ortholog group candidate subtrees
# (trees with > 1 leaf, and distinct species in all leaves)
# the argument allows one to specify to go up some number of parent
# nodes above the nodes with the ortholog group candidate subtrees.
-sub get_ortholog_group_candidate_subtrees{
- my $self = shift;
- my $desired_levels_above = shift;
- $desired_levels_above = 0 unless($desired_levels_above > 0);
-# print "tree. levels_above: ", $desired_levels_above, "\n";
- my $ortholog_group_candidate_subtrees_array = [];
- $self->get_root()->recursive_set_levels_above_distinct_species_subtree();
- $self->get_root()->recursive_find_ortholog_group_candidate_subtrees($ortholog_group_candidate_subtrees_array, $desired_levels_above);
- return $ortholog_group_candidate_subtrees_array;
+sub get_ortholog_group_candidate_subtrees {
+ my $self = shift;
+ my $desired_levels_above = shift;
+ $desired_levels_above = 0 unless ( $desired_levels_above > 0 );
+
+ # print "tree. levels_above: ", $desired_levels_above, "\n";
+ my $ortholog_group_candidate_subtrees_array = [];
+ $self->get_root()->recursive_set_levels_above_distinct_species_subtree();
+ $self->get_root()->recursive_find_ortholog_group_candidate_subtrees( $ortholog_group_candidate_subtrees_array,
+ $desired_levels_above );
+ return $ortholog_group_candidate_subtrees_array;
+}
+
+sub make_names_urec_ok {
+ my $self = shift;
+ my @nodes = $self->get_all_nodes();
+ foreach my $a_node (@nodes) {
+ my $name = $a_node->get_name();
+ if ( $name =~ /^[a-zA-Z]/ ) { # ok - leave as is.
+ } else { # add X_ in front to make urec happy
+ $a_node->set_name("X_$name");
+ }
+ }
}
-
-sub make_names_urec_ok{
- my $self = shift;
- my @nodes = $self->get_all_nodes();
- foreach my $a_node (@nodes){
- my $name = $a_node->get_name();
- if($name =~ /^[a-zA-Z]/){ # ok - leave as is.
- }else{ # add X_ in front to make urec happy
- $a_node->set_name("X_$name");
+sub leaf_species_bit_pattern_string {
+ my $self = shift;
+ my @leaves = $self->get_leaf_list();
+ my $str = '';
+ foreach (@leaves) {
+ $str .= $_->get_name() . " [" . $_->get_attribute("species_bit_pattern") . "]\n";
}
- }
+ return $str;
}
- # using urec, find the node s.t. rooting on its branch gives minimal duplications and losses
- # w.r.t. a species tree
-sub find_mindl_node{
- my $gene_tree = shift; # a rooted gene tree
- my $species_t = shift; # a species tree
-#print "in find_mindl_node. species_tree newick: \n", $species_t->subtree_newick(), " \n";
-
-# print STDERR "##################### Top of find_mindl_node. #############\n";
- # urec requires binary tree - make sure the tree is binary
- # if polytomy at root, reroot a bit down one branch, to get binary root (if was tritomy)
- my @new_root_point;
- {
- my @root_children = $gene_tree->get_root()->get_children();
- if (scalar @root_children != 2) {
- @new_root_point = ($root_children[0], 0.9*$root_children[0]->get_branch_length());
- $gene_tree->reset_root_to_point_on_branch(@new_root_point);
- }
- }
- # binarify every non-binary node. At present doesn't attempt to choose in a smart way
- # among the various possible resolutions
- $gene_tree->make_binary($gene_tree->get_root()); # urec requires binary tree.
-
- my $store_show_std_species = $gene_tree->get_show_standard_species();
- # put the trees into form of newick strings with no whitespace, so urec will be happy
- $gene_tree->show_newick_attribute("species");
- $gene_tree->set_show_standard_species(1);
- # make sure node names start with alphabetic char to make urec happy.
- $gene_tree->make_names_urec_ok();
- # need to redo node implicit names here.
- my $gene_newick_string = $gene_tree->generate_newick();
-# print "binarified gene tree (urec input): ", $gene_newick_string, "\n";
- $gene_newick_string =~ s/\s//g;
-
- $species_t->show_newick_attribute("species");
- $species_t->set_show_standard_species(1);
-my @thenodes = $species_t->get_leaves();
- foreach my $a_node (@thenodes){
-# print "species,stdspecies: ", $a_node->get_species(), " ", $a_node->get_standard_species(), "\n";
- }
-#print "species tree (from subtree_newick): ", $species_t->subtree_newick(), "\n";
- my $species_newick_string = $species_t->generate_newick();
- $species_newick_string =~ s/\s//g; # remove whitespace
-#print "species tree newick string: $species_newick_string\n";
-#print "gene newick string: $gene_newick_string \n\n";
-#print "about to call urec. \n";
-# print STDERR "about to call urec, with gene tree newick:\n$gene_newick_string \n$species_newick_string \n";
+# using urec, find the node s.t. rooting on its branch gives minimal duplications and losses
+# w.r.t. a species tree
+sub find_mindl_node {
+ my $gene_tree = shift; # a rooted gene tree
+ my $species_tree = shift; # a species tree
+# print STDERR "0 BRANCHLENGTHS: ", join( ";", map( $_->get_branch_length(), $gene_tree->get_leaf_list() ) ), "\n";
+
+ # for($gene_tree->get_leaf_list()){ print STDERR "genetree sbts: ", $_->get_attribute('species_bit_pattern'), "\n"; }
+ # print STDERR "gtnewick: ", $gene_tree->generate_newick(), "\n";
+
+ my $gene_tree_copy = $gene_tree->copy();
+ # print STDERR "1 BRANCHLENGTHS: ", join( ";", map( $_->get_branch_length(), $gene_tree_copy->get_leaf_list() ) ),
+# "\n";
+
+# for($gene_tree->get_leaf_list()){
+# print STDERR $_->get_name(), " ", $_->get_species(), " ",
+# $_->get_attribute('species_bit_pattern'), "\n";
+# }
+# print STDERR "gtcopynewick before pruning: ", $gene_tree_copy->generate_newick(), "\n";
+
+ my @non_speciestree_leafnodes = values %{ $gene_tree_copy->non_speciestree_leafnode_names() };
+# print STDERR "nonspeciestreeleafnodes: \n", join("\n", map($_->get_name() . "--" . $_->get_species(), @non_speciestree_leafnodes)), "\n";
+
+ $gene_tree_copy->prune_leaves(@non_speciestree_leafnodes);
+ my $n_leaves_after_pruning = scalar $gene_tree_copy->get_leaf_list();
+ if ( $n_leaves_after_pruning < 2 ) {
+ warn "Problem with mindl rerooting. Only: ", $n_leaves_after_pruning, " leaves after pruning.\n Tree: ",
+ $gene_tree_copy->generate_newick(), "\n";
+ return ( undef, undef );
+ }
+ # print STDERR "gt newick after pruning: ", $gene_tree_copy->generate_newick(), "\n";
+ # urec requires binary tree - make sure the tree is binary
+ # if polytomy at root, reroot a bit down one branch, to get binary root (if was tritomy)
+ my @new_root_point;
+
+ # print STDERR "pruned copy newick: ", $gene_tree_copy->generate_newick(), "\n";
+ {
+ my @root_children = $gene_tree_copy->get_root()->get_children();
+ if ( scalar @root_children != 2 ) {
+
+ @new_root_point = ( $root_children[0], 0.9 * $root_children[0]->get_branch_length() );
+ $gene_tree_copy->reset_root_to_point_on_branch(@new_root_point);
+ }
+ }
+
+ # binarify every non-binary node. At present doesn't attempt to choose in a smart way
+ # among the various possible resolutions
+ $gene_tree_copy->make_binary( $gene_tree_copy->get_root() ); # urec requires binary tree.
+
+ my $store_show_std_species = $gene_tree_copy->get_show_standard_species();
+
+ # put the trees into form of newick strings with no whitespace, so urec will be happy
+ $gene_tree_copy->show_newick_attribute("species");
+ $gene_tree_copy->set_show_standard_species(1);
+
+ # make sure node names start with alphabetic char to make urec happy.
+ $gene_tree_copy->make_names_urec_ok();
+
+ # need to redo node implicit names here.
+ my $gene_newick_string = $gene_tree_copy->generate_newick();
+ $gene_newick_string =~ s/\s//g;
+
+ $species_tree->show_newick_attribute("species");
+ $species_tree->set_show_standard_species(1);
+
+ my $species_newick_string = $species_tree->generate_newick();
+ $species_newick_string =~ s/\s//g; # remove whitespace
+
+ # print STDERR "gene tree: $gene_newick_string \n";
+ # print STDERR "species tree: $species_newick_string \n";
+
# my $rerooted_newick = `/home/tomfy/cxgn/cxgn-corelibs/lib/CXGN/Phylo/Urec/urec -s "$species_newick_string" -g "$gene_newick_string" -b -O`;
-# my $rerooted_newick = `/data/local/cxgn/core/perllib/CXGN/Phylo/Urec/urec -s "$species_newick_string" -g "$gene_newick_string" -b -O`;
-
- my $rerooted_newick;
-# my $urec_dir = `which urec`;
-# if($urec_dir =~ /\S/){
- if(`which urec` =~ /\S/){
- $rerooted_newick = `urec -s "$species_newick_string" -g "$gene_newick_string" -b -O`;
-}else{
- $rerooted_newick = `/data/local/cxgn-old/core/perllib/CXGN/Phylo/Urec/urec -s "$species_newick_string" -g "$gene_newick_string" -b -O`;
-}
-
- # print STDERR "gene_newick_string: \n $gene_newick_string \n\nspecies_newick_string: \n $species_newick_string.\n\n";
-# print STDERR "Rerooted newick string: [$rerooted_newick].\n";
-
-# print "parsing mindl rerooted gene tree in Tree\n";
- my $minDL_rerooted_gene_tree = (CXGN::Phylo::Parse_newick->new($rerooted_newick, $do_parse_set_error))->parse(); # this is now rooted so as to minimize gene duplication and loss needed to reconcile with species tree,
- # but branch lengthswill be wrong for nodes whose parent has changed in the rerooting (they are just the branch lengths to the old parents).
- $minDL_rerooted_gene_tree->get_root()->recursive_implicit_names();
-
- # $minDL_rerooted_gene_tree should have 2 children and (at least) one should have it's subtree also present in the pre-rerooting tree.
- # identify the node at the root of this subtree (using implicit names) and reroot there.
- # Have to do this because some branch length info was lost in urec step.
-# warn "minDL_rerooted_gene_tree newick: \n", $minDL_rerooted_gene_tree->generate_newick(), "\n";
- my @root_children = $minDL_rerooted_gene_tree->get_root()->get_children();
- my ($node_key, $rr_node);
- foreach (@root_children) {
- my $implicit_name_string = join("\t", @{$_->get_implicit_names()});
- # warn "implicit name string: $implicit_name_string \n";
- ($node_key, $rr_node) = $gene_tree->node_from_implicit_name_string($implicit_name_string);
- if (defined $rr_node) {
- $minDL_rerooted_gene_tree->decircularize();
- # debug ("Reroot above this node: $implicit_name_string \n");
-# warn "branch length along which to reroot: ", $rr_node->get_branch_length(), "\n";
- return @new_root_point = ($rr_node, 0.5*($rr_node->get_branch_length()));
- }
- }
-$| = 1;
-
- warn "problem with mindl rerooting. minDL_rerooted_gene_tree newick: \n",
- $minDL_rerooted_gene_tree->generate_newick(), "\n";
- die "find_mindl_node failed. \n";
-
-# $gene_tree->set_shown_standard_species($store_show_standard_species);
-#$gene_tree->update_label_names();
- return (undef, undef);
-}
-
-sub get_species_bithash{ #get a hash giving a bit pattern for each species in both $gene_tree and $spec_tree
- my $gene_tree = shift;
- my $spec_tree = shift;
- my $bithash = {};
- my %genehash;
- my %spechash;
- $spec_tree->show_newick_attribute("species");
- my $stree_newick = $spec_tree->generate_newick();
-# print STDERR "SPECIES TREE: $stree_newick \n";
- my @leaf_list = $gene_tree->get_leaf_list();
- foreach (@leaf_list) {
- my $lspecies = $_->get_standard_species();
- print STDERR "In Tree::get_species_bithash; gtree species: $lspecies \n";
- $genehash{$lspecies}++; # keys are species in gene tree, values are number of leaves with that species
- }
- @leaf_list = $spec_tree->get_leaf_list();
- foreach (@leaf_list) {
- my $lspecies = $_->get_standard_species();
- # print STDERR "stree species, raw, std: ", $_->get_standard_species(), " $lspecies \n";
- if ($genehash{$lspecies}) {
- $spechash{$lspecies}++; # keys are species in both trees.
- }
- }
- my @species_list = sort (keys %spechash);
- # print join(" ", @species_list), "\n";
- my $bits = 1;
- foreach (@species_list) {
- $bithash->{$_} = $bits;
- $bits = $bits << 1; # mult by two
- # print "$_, $bits \n";
- }
-
- return $bithash;
-}
-
-sub prune_non{ # prune leaves from tree 1 if their species does not occur in tree2
+
+ my $urec_cmd = '/data/prod/bin/urec';
+ $urec_cmd = 'urec' if ( !-x $urec_cmd );
+
+ # print STDERR `which $urec_cmd`;
+ my $rerooted_newick = `$urec_cmd -s "$species_newick_string" -g "$gene_newick_string" -b -O`;
+ # print STDERR "in mindl rerooted newick: [$rerooted_newick] \n";
+ my $minDL_rerooted_gene_tree = ( CXGN::Phylo::Parse_newick->new( $rerooted_newick, $do_parse_set_error ) )->parse();
+
+# $minDL_rerooted_gene_tree is now rooted so as to minimize gene duplication and loss needed to reconcile with species tree,
+# but branch lengths will be wrong for nodes whose parent has changed in the rerooting (they are just the branch lengths to the old parents).
+ $minDL_rerooted_gene_tree->get_root()->recursive_implicit_names();
+
+ # the root of $minDL_rerooted_gene_tree should have 2 children
+ # and (at least) one hould have it's subtree also present in the pre-rerooting tree.
+ # identify the node at the root of this subtree (using implicit names) and reroot there.
+ # Have to do this because some branch length info was lost in urec step.
+
+ my @root_children = $minDL_rerooted_gene_tree->get_root()->get_children();
+ my ( $node_key, $rr_node );
+ foreach (@root_children) {
+ my $implicit_name_string = join( "\t", @{ $_->get_implicit_names() } );
+ ( $node_key, $rr_node ) = $gene_tree_copy->node_from_implicit_name_string($implicit_name_string);
+ if ( defined $rr_node ) {
+ $minDL_rerooted_gene_tree->decircularize();
+ $gene_tree_copy->decircularize();
+ my $rr_node_in_orig_gene_tree = $gene_tree->get_node($node_key);
+ return @new_root_point =
+ ( $rr_node_in_orig_gene_tree, 0.5 * ( $rr_node_in_orig_gene_tree->get_branch_length() ) );
+
+ }
+ }
+ $| = 1;
+
+ warn "problem with mindl rerooting. minDL_rerooted_gene_tree newick: \n",
+ $minDL_rerooted_gene_tree->generate_newick(), "\n";
+ die "find_mindl_node failed. \n";
+
+ return ( undef, undef );
+}
+
+sub get_species_bithash { #get a hash giving a bit pattern for each species in BOTH $gene_tree and $spec_tree
+ my $gene_tree = shift;
+ my $spec_tree = shift;
+ my $bithash = {};
+ my %genehash;
+ my %spechash;
+ $spec_tree->show_newick_attribute("species");
+ my $stree_newick = $spec_tree->generate_newick();
+
+ # count number of gene tree leaves of each species (use standard species)
+ my @leaf_list = $gene_tree->get_leaf_list();
+ foreach (@leaf_list) {
+
+ # print STDERR $_->get_name(), " ", $_->get_species(), " ", $_->get_standard_species(), "\n";
+ my $lspecies = $_->get_standard_species(); # gene tree leaf standard species
+ # print STDERR "In Tree::get_species_bithash; gtree species: $lspecies \n";
+ $genehash{$lspecies}++; # keys are species in gene tree, values are number of leaves with that species
+ }
+
+ # count number of species tree leaves of each species (use standard species)
+ @leaf_list = $spec_tree->get_leaf_list();
+ foreach (@leaf_list) {
+ my $lspecies = $_->get_standard_species();
+
+ # print STDERR "stree species, raw, std: ", $_->get_standard_species(), " $lspecies \n";
+ if ( $genehash{$lspecies} ) {
+ $spechash{$lspecies}++; # keys are species in both trees.
+ }
+ }
+ my @species_list = sort ( keys %spechash );
+
+ # assign 1,2,4,8, etc. to the various species (only the species in both gene and species trees)
+ my $bits = Math::BigInt->new(1);
+ foreach (@species_list) {
+ $bithash->{$_} = $bits;
+ $bits = $bits << 1; # mult by two
+ }
+
+ return $bithash;
+}
+
+
+# returns 1 if can't delete one of the leaf nodes in arg list, 0 if OK.
+sub prune_leaves { # prune leaves from tree 1 if their species does not occur in tree2
+ my $self = shift;
+ my @leaves = @_;
+
+ my $retval = 0;
+ foreach my $leaf_node (@leaves) {
+ if ( !$leaf_node ) {
+ warn 'The node you want to delete does not exist!';
+ return;
+ }
+ my $parent_of_pruned_node = $leaf_node->get_parent();
+ $retval ||= $leaf_node->delete_self();
+ $parent_of_pruned_node->recursive_collapse_single_nodes(); # so if e.g. you delete two neighbor leaves, their parent is also deleted
+ }
+
+ $self->collapse_tree();
+# print STDERR "Y BRANCHLENGTHS: ", join( ";", map( $_->get_branch_length(), $self->get_leaf_list() ) ), "\n";
+ return $retval;
+}
+
+# returns a ref to a hash whose keys are names of leaf nodes, values are refs to arrays of names of other
+# leaf nodes inferred to be orthologs
+sub leaf_ortholog_table{
+my $self = shift;
+my $hilited_only = shift || undef;
+my $query_species = shift || undef;
+my $query_id_regex = shift || undef;
+my $standard_query_species = undef;
+if (defined $query_species) {
+ $standard_query_species = $self->get_species_standardizer()->get_standard_name($query_species); #e.g. "Ipomoea_batatas";
+ }
+my %leaf_ortholog_hash = ();
+ my @leaves = $self->get_leaves();
+ my $non_species_tree_leaf_node_names = $self->non_speciestree_leafnode_names();
+ foreach my $leaf (@leaves) {
+# print STDERR $leaf->get_name(), " [", $leaf->get_label()->get_hilite(), "]\n";
+ next if($hilited_only && !$leaf->get_label()->get_hilite());
+ my $species_ok = (! defined $query_species or ( $leaf->get_standard_species() =~ /$standard_query_species/) );
+ my $name_ok = (! defined $query_id_regex or $leaf->get_name() =~ /$query_id_regex/);
+ next if(!$species_ok or !$name_ok);
+ my $leafname = $leaf->get_name();
+
+ next if( exists $non_species_tree_leaf_node_names->{$leafname} );
+ # $ortholog_str .= "orthologs of " . $leafname . ": ";
+
+ my @cand_orthologs = $leaf->collect_orthologs_of_leaf();
+
+ # keep only leaves whose species appear in species tree
+ my @orthologs = ();
+
+ # my $non_species_tree_leaf_node_names =
+ # $browser->get_tree()->non_speciestree_leafnode_names();
+ if ( scalar keys %$non_species_tree_leaf_node_names > 0 ) {
+ foreach (@cand_orthologs) {
+ if ( exists $non_species_tree_leaf_node_names->{$_} ) {
+ # unknown species, can't claim orthology
+ } else {
+ push @orthologs, $_;
+ }
+ }
+ } else {
+ @orthologs = @cand_orthologs;
+ }
+
+ # my @orthologs = $leaf->collect_orthologs_of_leaf(); # list of leaf names
+ # $ortholog_str .= join(" ", @orthologs) . "\n";
+ $leaf_ortholog_hash{$leafname} = \@orthologs;
+ } # loop over leaves
+return \%leaf_ortholog_hash;
+
}
# return a hash whose keys are leaf node names (hidden nodes excluded)
# and whose values are refs to arrays of 1's and 0's, the 1's indicating orthology.
-sub ortho_matrix_hash{
- my $self = shift;
- my @leaf_names = ();
- for ($self->get_leaves()) {
- next if($_->get_hide_label()); # do not include hidden labels
- push @leaf_names, $_->get_name();
- }
- @leaf_names = sort @leaf_names;
-# print STDERR join(" ", @leaf_names), "\n";
- my $n_leaves = scalar @leaf_names;
- my %name_hash;
- my %ortho_hash;
-
- my $i = 0;
- foreach (@leaf_names) {
- $name_hash{$_} = $i;
- my @zeroes = (0)x$n_leaves;
- $ortho_hash{$_} = \@zeroes;
- $i++;
- }
- my @leaves = $self->get_leaves();
- foreach (@leaves) {
- my $name = $_->get_name();
- my $o_array = $ortho_hash{$name};
- # print STDERR join(" ", @$o_array), "\n";
- my @orthologs = $_->collect_orthologs_of_leaf();
- foreach (@orthologs) {
- my $o_name = $_; #->get_name();
- # print STDERR $o_name, " ", $name_hash{$o_name}, "\n";
- $o_array->[$name_hash{$o_name}] = 1; # in the array for $name set the right element to 1
- }
- }
-# foreach (@leaf_names) {
-# my $ortho_array_ref = $ortho_hash{$_};
-# printf STDERR ("%50s ", $_); print STDERR join(" ", @$ortho_array_ref), "\n";
-# }
- return \%ortho_hash;
+sub ortho_matrix_hash {
+ my $self = shift;
+ my @leaf_names = ();
+ for ( $self->get_leaves() ) {
+ next if ( $_->get_hide_label() ); # do not include hidden labels
+ push @leaf_names, $_->get_name();
+ }
+ @leaf_names = sort @leaf_names;
+
+ # print STDERR join(" ", @leaf_names), "\n";
+ my $n_leaves = scalar @leaf_names;
+ my %name_hash;
+ my %ortho_hash;
+
+ my $i = 0;
+ foreach (@leaf_names) {
+ $name_hash{$_} = $i;
+ my @zeroes = (0) x $n_leaves;
+ $ortho_hash{$_} = \@zeroes;
+ $i++;
+ }
+ my @leaves = $self->get_leaves();
+ foreach (@leaves) {
+ my $name = $_->get_name();
+ my $o_array = $ortho_hash{$name};
+
+ # print STDERR join(" ", @$o_array), "\n";
+ my @orthologs = ();
+ my @cand_orthologs = $_->collect_orthologs_of_leaf();
+
+ # keep only leaves whose species appear in species tree
+ my $non_species_tree_leaf_node_names = $self->non_speciestree_leafnode_names();
+ if ( scalar keys %$non_species_tree_leaf_node_names > 0 ) {
+ foreach (@cand_orthologs) {
+ next if ( exists $non_species_tree_leaf_node_names->{$_} );
+ push @orthologs, $_;
+ }
+ } else {
+ @orthologs = @cand_orthologs;
+ }
+ foreach (@orthologs) {
+ my $o_name = $_; #->get_name();
+ # print STDERR $o_name, " ", $name_hash{$o_name}, "\n";
+ $o_array->[ $name_hash{$o_name} ] = 1; # in the array for $name set the right element to 1
+ }
+ }
+
+ # foreach (@leaf_names) {
+ # my $ortho_array_ref = $or tho_hash{$_};
+ # printf STDERR ("%50s ", $_); print STDERR join(" ", @$ortho_array_ref), "\n";
+ # }
+ return \%ortho_hash;
+}
+
+# return a hashref whose keys are the names of the leaf nodes
+# whose species don't appear in the species tree.
+# keys: names, values: node objects
+# must have already set the species bit patterns for each node, using
+sub non_speciestree_leafnode_names {
+ my $self = shift;
+ my @leaves = $self->get_leaf_list();
+ my %non_species_tree_leaf_node_names = ();
+ for (@leaves) {
+ if ( $_->get_attribute("species_bit_pattern") == 0 ) {
+ $non_species_tree_leaf_node_names{ $_->get_name() } = $_; # keys: names, values: node objects
+ }
+ }
+ return \%non_species_tree_leaf_node_names;
}
-sub set_branch_lengths_equal{
-my $self = shift;
-my $bl = shift || 1.0;
-$self->get_root()->recursive_set_branch_length($bl);
+sub set_branch_lengths_equal {
+ my $self = shift;
+ my $bl = shift || 1.0;
+ $self->get_root()->recursive_set_branch_length($bl);
}
-sub decircularize{
-my $self = shift;
-$self->get_root()->recursive_decircularize();
-$self->set_renderer(undef);
-$self->set_layout(undef);
+sub decircularize {
+ my $self = shift;
+ $self->get_root()->recursive_decircularize();
+ $self->set_renderer(undef);
+ $self->set_layout(undef);
}
sub DESTROY {
- my $self = shift;
-# $self->decircularize();
+ my $self = shift;
+
+ # $self->decircularize();
# check for an overridden destructor...
$self->SUPER::DESTROY if $self->can("SUPER::DESTROY");
+
# now do your own thing before or after
}
diff --git a/t/CXGN/Phylo/orthologger.t b/t/CXGN/Phylo/orthologger.t
new file mode 100644
index 00000000..efe15469
--- /dev/null
+++ b/t/CXGN/Phylo/orthologger.t
@@ -0,0 +1,153 @@
+#!/usr/bin/perl -w
+use strict;
+use warnings FATAL => 'all';
+
+# tests for Orthologger Module/
+use Test::More tests => 10;
+
+#use lib '/home/tomfy/cxgn/cxgn-corelibs/lib';
+#use lib '/home/tomfy/Orthologger/lib';
+
+use CXGN::Phylo::Parser;
+use CXGN::Phylo::Orthologger;
+
+
+# This is family 2830, with 25 genes in 13 taxa.
+my $gene_tree_newick =
+'(((Solyc03g121130.2.1[species=tomato]:0.03887,(((POPTR_0014s08830.1[species=poplar]:0.0001,POPTR_0014s08830.2[species=poplar]:0.0001):0.05550,(X_30131.m006857[species=castorbean]:0.01095,X_30131.m007044[species=castorbean]:0.00016):0.02176):0.04630,(evm.model.supercontig_184.11[species=papaya]:0.04198,(AT2G46230.1[species=arabidopsis]:0.01153,AT2G46230.2[species=arabidopsis]:0.07275):0.15150):0.02371):0.01878):0.01095,((GSVIVT01026915001[species=grape]:0.0001,GSVIVT01027024001[species=grape]:0.0001):0.07067,(((Glyma12g31740.1[species=soybean]:0.0001,Glyma13g38690.1[species=soybean]:0.0001):0.0001,(Glyma13g38690.2[species=soybean]:0.0001,Glyma13g38690.3[species=soybean]:0.0001):0.0001):0.01992,(IMGA_Medtr6g086290.1[species=medicago]:0.02191,IMGA_Medtr7g116720.1[species=medicago]:0.00015):0.06024):0.05419):0.01302):0.0148619097222222,((Bradi1g48740.1[species=brachypodium]:0.03551,(((Sb10g004030.1[species=sorghum]:0.0001,GRMZM2G140689_P02[species=maize]:0.0001):0.00015,(GRMZM2G016330_P01[species=maize]:0.0001,GRMZM2G016330_P03[species=maize]:0.0001):0.00536):0.02677,(LOC_Os06g06410.1[species=rice]:0.01662,LOC_Os06g06410.2[species=rice]:0.00015):0.00288):0.02333):0.08447,(jgi_Selmo1_91292[species=selaginella]:0.00532,jgi_Selmo1_158231[species=selaginella]:0.00016):0.13724):0.00183809027777778)';
+
+# $gene_tree_newick =~ s/\s*//g; # remove whitespace
+
+my $species_tree_newick =
+'(Selaginella[species=Selaginella]:1,(((sorghum[species=Sorghum_bicolor]:1,maize[species=Zea_mays]:1):1,(rice[species=Oryza_sativa]:1,brachypodium[species=Brachypodium_distachyon]:1):1):1,(tomato[species=Solanum_lycopersicum]:1,(grape[species=Vitis_vinifera]:1,((papaya[species=Carica_papaya]:1,arabidopsis[species=Arabidopsis_thaliana]:1):1,((soy[species=Glycine_max]:1,medicago[species=Medicago_truncatula]:1):1,(castorbean[species=Ricinus_communis]:1,Poplar[species=Populus_trichocarpa]:1):1):1):1):1):1):1)';
+
+$species_tree_newick =
+'( chlamydomonas[species=Chlamydomonas_reinhardtii]:1, ( physcomitrella[species=Physcomitrella_patens]:1, ( selaginella[species=Selaginella_moellendorffii]:1, ( loblolly_pine[species=Pinus_taeda]:1, ( amborella[species=Amborella_trichopoda]:1, ( ( date_palm[species=Phoenix_dactylifera]:1, ( ( foxtail_millet[species=Setaria_italica]:1, ( sorghum[species=Sorghum_bicolor]:1, maize[species=Zea_mays]:1 ):1 ):1, ( rice[species=Oryza_sativa]:1, ( brachypodium[species=Brachypodium_distachyon]:1, ( wheat[species=Triticum_aestivum]:1, barley[species=Hordeum_vulgare]:1 ):1 ):1 ):1 ):1 ):1, ( columbine[species=Aquilegia_coerulea]:1, ( ( ( ( ( ( ( ( ( ( tomato[species=Solanum_lycopersicum]:1, potato[species=Solanum_tuberosum]:1 ):1, eggplant[species=Solanum_melongena]:1 ):1, pepper[species=Capsicum_annuum]:1 ):1, tobacco[species=Nicotiana_tabacum]:1 ):1, petunia[species=Petunia]:1 ):1, sweet_potato[species=Ipomoea_batatas]:1 ):1, ( arabica_coffee[species=Coffea_arabica]:1, robusta_coffee[species=Coffea_canephora]:1 ):1 ):1, snapdragon[species=Antirrhinum]:1 ):1, ( ( sunflower[species=Helianthus_annuus]:1, lettuce[species=Lactuca_sativa]:1 ):1, carrot[species=Daucus_carota]:1 ):1 ):1, ( grape[species=Vitis_vinifera]:1, ( ( eucalyptus[species=Eucalyptus_grandis]:1, ( ( orange[species=Citrus_sinensis]:1, clementine[species=Citrus_clementina]:1 ):1, ( ( cacao[species=Theobroma_cacao]:1, cotton[species=Gossypium_raimondii]:1 ):1, ( papaya[species=Carica_papaya]:1, ( turnip[species=Brassica_rapa]:1, ( salt_cress[species=Thellungiella_parvula]:1, ( red_shepherds_purse[species=Capsella_rubella]:1, ( arabidopsis_thaliana[species=Arabidopsis_thaliana]:1, arabidopsis_lyrata[species=Arabidopsis_lyrata]:1 ):1 ):1 ):1 ):1 ):1 ):1 ):1 ):1, ( ( ( peanut[species=Arachis_hypogaea]:1, ( ( soy[species=Glycine_max]:1, pigeon_pea[species=Cajanus_cajan]:1 ):1, ( medicago[species=Medicago_truncatula]:1, lotus[species=Lotus_japonicus]:1 ):1 ):1 ):1, ( hemp[species=Cannabis_sativa]:1, ( ( ( apple[species=Malus_domestica]:1, peach[species=Prunus_persica]:1 ):1, woodland_strawberry[species=Fragaria_vesca]:1 ):1, cucumber[species=Cucumis_sativus]:1 ):1 ):1 ):1, ( ( castorbean[species=Ricinus_communis]:1, cassava[species=Manihot_esculenta]:1 ):1, ( poplar[species=Populus_trichocarpa]:1, flax[species=Linum_usitatissimum]:1 ):1 ):1 ):1 ):1 ):1 ):1 ):1 ):1 ):1 ):1 ):1 ):1 ) :1';
+
+# $species_tree_newick =~ s/\s*//g; # remove whitespace
+
+my $gt_parser = CXGN::Phylo::Parse_newick->new($gene_tree_newick);
+my $gene_tree = $gt_parser->parse();
+
+my $st_parser = CXGN::Phylo::Parse_newick->new($species_tree_newick);
+my $species_tree = $st_parser->parse();
+
+#********* test Orthologger->new returns Orthologger obj. **********
+my $Orthologger_obj =
+ CXGN::Phylo::Orthologger->new( { 'gene_tree' => $gene_tree, 'species_tree' => $species_tree, 'reroot_method' => 'mindl' } );
+ok( defined $Orthologger_obj, 'new() returned something.' );
+isa_ok( $Orthologger_obj, 'CXGN::Phylo::Orthologger' );
+
+#********* test mindl rerooting of tree ***************
+$Orthologger_obj->get_gene_tree()->show_newick_attribute('species');
+my $got_rerooted_tree = $Orthologger_obj->get_gene_tree()->generate_newick();
+
+my $expected_rerooted_tree =
+'((jgi_Selmo1_91292[species=Selaginella_moellendorffii]:0.00532,jgi_Selmo1_158231[species=Selaginella_moellendorffii]:0.00016)[speciation=0]:0.06862,((Bradi1g48740.1[species=Brachypodium_distachyon]:0.03551,(((Sb10g004030.1[species=Sorghum_bicolor]:0.0001,GRMZM2G140689_P02[species=Zea_mays]:0.0001)[speciation=1]:0.00015,(GRMZM2G016330_P01[species=Zea_mays]:0.0001,GRMZM2G016330_P03[species=Zea_mays]:0.0001)[speciation=0]:0.00536)[speciation=0]:0.02677,(LOC_Os06g06410.1[species=Oryza_sativa]:0.01662,LOC_Os06g06410.2[species=Oryza_sativa]:0.00015)[speciation=0]:0.00288)[speciation=1]:0.02333)[speciation=0]:0.08447,((Solyc03g121130.2.1[species=Solanum_lycopersicum]:0.03887,(((POPTR_0014s08830.1[species=Populus_trichocarpa]:0.0001,POPTR_0014s08830.2[species=Populus_trichocarpa]:0.0001)[speciation=0]:0.05550,(X_30131.m006857[species=Ricinus_communis]:0.01095,X_30131.m007044[species=Ricinus_communis]:0.00016)[speciation=0]:0.02176)[speciation=1]:0.04630,(evm.model.supercontig_184.11[species=Carica_papaya]:0.04198,(AT2G46230.1[species=Arabidopsis_thaliana]:0.01153,AT2G46230.2[species=Arabidopsis_thaliana]:0.07275)[speciation=0]:0.15150)[speciation=1]:0.02371)[speciation=1]:0.01878)[speciation=1]:0.01095,((GSVIVT01026915001[species=Vitis_vinifera]:0.0001,GSVIVT01027024001[species=Vitis_vinifera]:0.0001)[speciation=0]:0.07067,(((Glyma12g31740.1[species=Glycine_max]:0.0001,Glyma13g38690.1[species=Glycine_max]:0.0001)[speciation=0]:0.0001,(Glyma13g38690.2[species=Glycine_max]:0.0001,Glyma13g38690.3[species=Glycine_max]:0.0001)[speciation=0]:0.0001)[speciation=0]:0.01992,(IMGA_Medtr6g086290.1[species=Medicago_truncatula]:0.02191,IMGA_Medtr7g116720.1[species=Medicago_truncatula]:0.00015)[speciation=0]:0.06024)[speciation=1]:0.05419)[speciation=1]:0.01302)[speciation=0]:0.0167)[speciation=1]:0.06862)';
+
+is( $got_rerooted_tree, $expected_rerooted_tree, "Check rerooted tree newick is as expected." );
+
+#*********** test ortholog result string is as expected **********
+my $ortholog_result_string = $Orthologger_obj->ortholog_result_string();
+my $expected_ortholog_result_string =
+'orthologs of jgi_Selmo1_91292: AT2G46230.1 AT2G46230.2 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 GSVIVT01026915001 GSVIVT01027024001 Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 LOC_Os06g06410.1 LOC_Os06g06410.2 POPTR_0014s08830.1 POPTR_0014s08830.2 Sb10g004030.1 Solyc03g121130.2.1 X_30131.m006857 X_30131.m007044 evm.model.supercontig_184.11
+orthologs of jgi_Selmo1_158231: AT2G46230.1 AT2G46230.2 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 GSVIVT01026915001 GSVIVT01027024001 Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 LOC_Os06g06410.1 LOC_Os06g06410.2 POPTR_0014s08830.1 POPTR_0014s08830.2 Sb10g004030.1 Solyc03g121130.2.1 X_30131.m006857 X_30131.m007044 evm.model.supercontig_184.11
+orthologs of Bradi1g48740.1: AT2G46230.1 AT2G46230.2 GSVIVT01026915001 GSVIVT01027024001 Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 POPTR_0014s08830.1 POPTR_0014s08830.2 Solyc03g121130.2.1 X_30131.m006857 X_30131.m007044 evm.model.supercontig_184.11 jgi_Selmo1_158231 jgi_Selmo1_91292
+orthologs of Sb10g004030.1: GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 AT2G46230.1 AT2G46230.2 GSVIVT01026915001 GSVIVT01027024001 Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 POPTR_0014s08830.1 POPTR_0014s08830.2 Solyc03g121130.2.1 X_30131.m006857 X_30131.m007044 evm.model.supercontig_184.11 jgi_Selmo1_158231 jgi_Selmo1_91292
+orthologs of GRMZM2G140689_P02: Sb10g004030.1 LOC_Os06g06410.1 LOC_Os06g06410.2 AT2G46230.1 AT2G46230.2 GSVIVT01026915001 GSVIVT01027024001 Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 POPTR_0014s08830.1 POPTR_0014s08830.2 Solyc03g121130.2.1 X_30131.m006857 X_30131.m007044 evm.model.supercontig_184.11 jgi_Selmo1_158231 jgi_Selmo1_91292
+orthologs of GRMZM2G016330_P01: LOC_Os06g06410.1 LOC_Os06g06410.2 AT2G46230.1 AT2G46230.2 GSVIVT01026915001 GSVIVT01027024001 Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 POPTR_0014s08830.1 POPTR_0014s08830.2 Solyc03g121130.2.1 X_30131.m006857 X_30131.m007044 evm.model.supercontig_184.11 jgi_Selmo1_158231 jgi_Selmo1_91292
+orthologs of GRMZM2G016330_P03: LOC_Os06g06410.1 LOC_Os06g06410.2 AT2G46230.1 AT2G46230.2 GSVIVT01026915001 GSVIVT01027024001 Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 POPTR_0014s08830.1 POPTR_0014s08830.2 Solyc03g121130.2.1 X_30131.m006857 X_30131.m007044 evm.model.supercontig_184.11 jgi_Selmo1_158231 jgi_Selmo1_91292
+orthologs of LOC_Os06g06410.1: GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 Sb10g004030.1 AT2G46230.1 AT2G46230.2 GSVIVT01026915001 GSVIVT01027024001 Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 POPTR_0014s08830.1 POPTR_0014s08830.2 Solyc03g121130.2.1 X_30131.m006857 X_30131.m007044 evm.model.supercontig_184.11 jgi_Selmo1_158231 jgi_Selmo1_91292
+orthologs of LOC_Os06g06410.2: GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 Sb10g004030.1 AT2G46230.1 AT2G46230.2 GSVIVT01026915001 GSVIVT01027024001 Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 POPTR_0014s08830.1 POPTR_0014s08830.2 Solyc03g121130.2.1 X_30131.m006857 X_30131.m007044 evm.model.supercontig_184.11 jgi_Selmo1_158231 jgi_Selmo1_91292
+orthologs of Solyc03g121130.2.1: AT2G46230.1 AT2G46230.2 POPTR_0014s08830.1 POPTR_0014s08830.2 X_30131.m006857 X_30131.m007044 evm.model.supercontig_184.11 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292
+orthologs of POPTR_0014s08830.1: X_30131.m006857 X_30131.m007044 AT2G46230.1 AT2G46230.2 evm.model.supercontig_184.11 Solyc03g121130.2.1 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292
+orthologs of POPTR_0014s08830.2: X_30131.m006857 X_30131.m007044 AT2G46230.1 AT2G46230.2 evm.model.supercontig_184.11 Solyc03g121130.2.1 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292
+orthologs of X_30131.m006857: POPTR_0014s08830.1 POPTR_0014s08830.2 AT2G46230.1 AT2G46230.2 evm.model.supercontig_184.11 Solyc03g121130.2.1 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292
+orthologs of X_30131.m007044: POPTR_0014s08830.1 POPTR_0014s08830.2 AT2G46230.1 AT2G46230.2 evm.model.supercontig_184.11 Solyc03g121130.2.1 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292
+orthologs of evm.model.supercontig_184.11: AT2G46230.1 AT2G46230.2 POPTR_0014s08830.1 POPTR_0014s08830.2 X_30131.m006857 X_30131.m007044 Solyc03g121130.2.1 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292
+orthologs of AT2G46230.1: evm.model.supercontig_184.11 POPTR_0014s08830.1 POPTR_0014s08830.2 X_30131.m006857 X_30131.m007044 Solyc03g121130.2.1 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292
+orthologs of AT2G46230.2: evm.model.supercontig_184.11 POPTR_0014s08830.1 POPTR_0014s08830.2 X_30131.m006857 X_30131.m007044 Solyc03g121130.2.1 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292
+orthologs of GSVIVT01026915001: Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292
+orthologs of GSVIVT01027024001: Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292
+orthologs of Glyma12g31740.1: IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 GSVIVT01026915001 GSVIVT01027024001 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292
+orthologs of Glyma13g38690.1: IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 GSVIVT01026915001 GSVIVT01027024001 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292
+orthologs of Glyma13g38690.2: IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 GSVIVT01026915001 GSVIVT01027024001 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292
+orthologs of Glyma13g38690.3: IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 GSVIVT01026915001 GSVIVT01027024001 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292
+orthologs of IMGA_Medtr6g086290.1: Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 GSVIVT01026915001 GSVIVT01027024001 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292
+orthologs of IMGA_Medtr7g116720.1: Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 GSVIVT01026915001 GSVIVT01027024001 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292
+Leaves not in species tree: ';
+
+my @ortholog_lines = split( "\n", $ortholog_result_string );
+my @expected_ortholog_lines = split( "\n", $expected_ortholog_result_string );
+my ( $nlines, $nxlines ) = ( scalar @ortholog_lines, scalar @expected_ortholog_lines );
+is( $nlines, $nxlines, "Check number of output lines agrees with expectation: $nlines, $nxlines." );
+
+my @sorted_ortholog_lines = sort @ortholog_lines;
+my @sorted_expected_ortholog_lines = sort @expected_ortholog_lines;
+$ortholog_result_string = join( "\n", @sorted_ortholog_lines );
+$expected_ortholog_result_string = join( "\n", @sorted_expected_ortholog_lines );
+
+#$ortholog_result_string =~ s/ +/ /g;
+#$ortholog_result_string =~ s/ *\n */\n/g;
+#$expected_ortholog_result_string =~ s/ +/ /g;
+#$expected_ortholog_result_string =~ s/ *\n */\n/g;
+is( $ortholog_result_string, $expected_ortholog_result_string,
+ 'Check ortholog result string agrees with expectation.' );
+
+$gene_tree_newick =
+'((foxtail_millet[species=Setaria_italica]:1,(sorghum[species=Sorghum_bicolor]:1,maize[species=Zea_mays_x]:1):1):1,(rice[species=Oryza_sativa]:1,brachypodium[species=Brachypodium_distachyon]:1):1)';
+
+# the following is a misrooted tree
+$gene_tree_newick = '(
+ (
+ foxtail_millet_1[species=Setaria_italica]:1,
+ (
+ rice_1[species=Oryza_sativa]:1,
+ brachypodium_1[species=Brachypodium_distachyon]:1
+ ):1
+ ):0.5,
+ (
+ sorghum_1[species=Sorghum_bicolor]:1,
+ maize_1[species=Zea_mays_x]:1
+ ):0.5
+)';
+
+$gt_parser = CXGN::Phylo::Parse_newick->new($gene_tree_newick);
+$gene_tree = $gt_parser->parse();
+
+$st_parser = CXGN::Phylo::Parse_newick->new($species_tree_newick);
+$species_tree = $st_parser->parse();
+
+#********* test Orthologger->new returns Orthologger obj. **********
+$Orthologger_obj =
+ CXGN::Phylo::Orthologger->new( { 'gene_tree' => $gene_tree, 'species_tree' => $species_tree, 'reroot_method' => 'mindl' } );
+ok( defined $Orthologger_obj, 'new() returned something.' );
+isa_ok( $Orthologger_obj, 'CXGN::Phylo::Orthologger' );
+
+#********* test mindl rerooting of tree ***************
+$Orthologger_obj->get_gene_tree()->show_newick_attribute('species');
+$got_rerooted_tree = $Orthologger_obj->get_gene_tree()->generate_newick();
+$expected_rerooted_tree =
+'((rice_1[species=Oryza_sativa]:1,brachypodium_1[species=Brachypodium_distachyon]:1)[speciation=1]:0.5,(foxtail_millet_1[species=Setaria_italica]:1,(sorghum_1[species=Sorghum_bicolor]:1,maize_1[species=Zea_mays_x]:1)[speciation=0]:1)[speciation=1]:0.5)';
+is( $got_rerooted_tree, $expected_rerooted_tree, "Check rerooted tree newick is as expected." );
+$ortholog_result_string = $Orthologger_obj->ortholog_result_string();
+
+$expected_ortholog_result_string = 'orthologs of rice_1: brachypodium_1 foxtail_millet_1 sorghum_1
+orthologs of brachypodium_1: rice_1 foxtail_millet_1 sorghum_1
+orthologs of foxtail_millet_1: sorghum_1 brachypodium_1 rice_1
+orthologs of sorghum_1: foxtail_millet_1 brachypodium_1 rice_1
+Leaves not in species tree: maize_1';
+
+@ortholog_lines = split( "\n", $ortholog_result_string );
+@expected_ortholog_lines = split( "\n", $expected_ortholog_result_string );
+( $nlines, $nxlines ) = ( scalar @ortholog_lines, scalar @expected_ortholog_lines );
+is( $nlines, $nxlines, "Check number of output lines agrees with expectation: $nlines, $nxlines." );
+
+@sorted_ortholog_lines = sort @ortholog_lines;
+@sorted_expected_ortholog_lines = sort @expected_ortholog_lines;
+$ortholog_result_string = join( "\n", @sorted_ortholog_lines );
+$expected_ortholog_result_string = join( "\n", @sorted_expected_ortholog_lines );
+
+is( $ortholog_result_string, $expected_ortholog_result_string,
+ 'Check ortholog result string agrees with expectation.' );
+
diff --git a/t/CXGN/Phylo/phylo.t b/t/CXGN/Phylo/phylo.t
index fff63de2..d0c6ebb4 100755
--- a/t/CXGN/Phylo/phylo.t
+++ b/t/CXGN/Phylo/phylo.t
@@ -1,7 +1,8 @@
#!/usr/bin/perl
-use Test::Most qw/no_plan/;
+use Test::Most tests => 52; # qw/no_plan/;
use Modern::Perl;
+# tests Parser, Tree, and Node modules.
use CXGN::Phylo::Tree;
use CXGN::Phylo::Node;
@@ -12,14 +13,14 @@ use Carp;
# expression to test the Phylo packages with
#
-my $expression = "(1:0.082376,(2:0.196674,((3:0.038209,6:0.354293):0.026742,5:0.094338):0.064142):0.067562,4:0.295612)";
-
-my $parser = CXGN::Phylo::Parse_newick -> new($expression);
+my $newick_expression =
+ "(1:0.082376,(2:0.196674,((3:0.038209,6:0.354293):0.026742,5:0.094338):0.064142):0.067562,4:0.295612)";
+my $parser = CXGN::Phylo::Parse_newick -> new($newick_expression);
# test tokenizer
#
-my @tokens = $parser -> tokenize($expression);
-print STDERR "\tTOKENS: ".join("|", @tokens)."\n";
+my @tokens = $parser -> tokenize($newick_expression);
+# print STDERR "\tTOKENS: ".join("|", @tokens)."\n";
is (@tokens, 22, "Token count test");
my $tree = $parser-> parse();
@@ -45,7 +46,7 @@ my $n = $tree->get_node(5);
#$tree->get_root()->rotate_node();
#$n4->set_hidden(1);
-my $subtree_len = $tree->get_root()->calculate_subtree_distances();
+#my $subtree_len = $tree->get_root()->calculate_subtree_distances();
#is ($subtree_len, 12, "subtree length test");
# test the leaf functions in two different ways
@@ -63,6 +64,7 @@ is ($tree->get_root()->is_leaf, 0, "root leaf test");
is (@leaflist, 6, "leaf list test");
+
# test the root
#
my $root = $tree->get_root();
@@ -81,49 +83,59 @@ is ($inner_node_subnode_count, 6, "inner node subnode count test");
# set species information to test subtree_species_count stuff
#
-$tree->get_node(2)->set_species("coffee");
-$tree->get_node(4)->set_species("tomato");
-$tree->get_node(7)->set_species("potato");
-$tree->get_node(8)->set_species("pepper");
-$tree->get_node(9)->set_species("coffee");
-$tree->get_node(10)->set_species("tomato");
+my @species_list = ("coffee", "tomato", "potato", "pepper", "eggplant", "brachypodium");
+#my @node_list = values %{$tree->{node_hash}};
+my $i = 0;
+foreach my $n (values %{$tree->{node_hash}}) {
+ next if(scalar $n->get_children() > 0); # skip non-leaves
+ $n->set_species($species_list[$i % 6]);
+# print "i, species: $i ", $n->get_species(), "\n";
+ $i++;
+}
# test the subtree_species count functions
#$tree->get_root()->recursive_text_render();
-#exit;
-#
+#exit;#
$tree->get_root()->recursive_set_leaf_species_count();
#$tree->get_root()->calculate_subtree_species_count();
# pick out a node and test the count
#
-is ($tree->get_node(5)->get_attribute("leaf_species_count"), 3, "subtree leaf species count test");
+#print "node keys: ", join(" ", keys %{$tree->{node_hash}}), "\n";
+is($tree->get_root()->get_attribute("leaf_species_count"), 6, "subtree leaf species count test");
+#is ($tree->get_node(5)->get_attribute("leaf_species_count"), 3, "subtree leaf species count test");
# test the remove_child function
#
+#print STDERR "before tree copy\n";
my $rm_tree = $tree->copy();
-$n = $rm_tree->get_node(5);
-my @children = $n->get_children();
-print STDERR "\tRemove child\nbefore: ".$n->to_string()."\n";
-is ($n->get_children, 2, "get_children test");
-print STDERR "\t(Removing child ".$children[0]->get_node_key().")\n";
-$n->remove_child($children[0]);
-is ($n->get_children(), 1, "remove child test");
-print STDERR "\tafter : ".$n->to_string()."\n";
+#print STDERR 'after $tree->copy() \n';
+my @root_children = $rm_tree->get_root()->get_children();
+my $n1 = $root_children[1];
+my @children =$n1->get_children();
+#print STDERR "\tRemove child\nbefore: ".$n->to_string()."\n";
+is ($n1->get_children, 2, "get_children test");
+#print STDERR "\t(Removing child ".$children[0]->get_node_key().")\n";
+$n1->remove_child($children[0]);
+is ($n1->get_children(), 1, "remove child test");
+#print STDERR "\tafter : ".$n1->to_string()."\n";
my @root_kids = $rm_tree->get_root()->get_children();
is (@root_kids, 3, "root children count test");
$rm_tree->get_root()->remove_child($root_kids[1]);
-print STDERR "\tRemoving child key=".($root_kids[1]->get_name())."\n";
+#print STDERR "\tRemoving child key=".($root_kids[1]->get_name())."\n";
#foreach my $c ($rm_tree->get_root()->get_children()) { print "current children = ".$c->get_name()."\n"; }
is ($rm_tree->get_root()->get_children(), 2, "removed one root child test");
# test reset_root
#
-my $nn = $tree->get_node(5);
-$tree->reset_root($nn);
+#print STDERR $tree->generate_newick();
+
+$tree->reset_root( $tree->get_node(5) );
+
+#print STDERR $tree->generate_newick();
# test the compare function
# initialize two identical trees and compare
@@ -159,41 +171,51 @@ is ($tree_a->compare_rooted($tree_b), 1, "tree topology specification test");
#
my $new_tree = $tree->copy();
if ($tree->compare_rooted($new_tree)) { # should be the same, shouldn't it?
- print STDERR "Compared tree to newtree and found them to be identical.\n";
+ # print STDERR "Compared tree to newtree and found them to be identical.\n";
}
else { print STDERR "newtree and tree are not identical. Oops.\n"; }
is ($new_tree->compare_rooted($tree), 1, "copied tree identity check");
isnt ( $new_tree, $tree, "tree pointer non-identity check");
+my ($rfd, $symd, $d3) = $tree->RF_distance($new_tree);
+is($rfd, 0, "check RF distance between tree and copy is 0.");
+is($symd, 0, "check RF distance between tree and copy is 0.");
+
# check if I can remove a node in new_tree without affecting $tree
#
+#print "node keys: ", join(" ", keys %{$tree->{node_hash}}), "\n";
$new_tree->delete_node(3);
+
+
+# print $tree->generate_newick(), "\n";
+# print $new_tree->generate_newick(), "\n";
+
is($new_tree->compare_rooted($tree), 0, "changed copied tree identity check");
# test the collapsing function - test a tree with many nodes that
# have only one child.
#
-print STDERR "\tTesting CXGN::Phylo::Node::recursive_collapse_nodes\n";
+#print STDERR "\tTesting CXGN::Phylo::Node::recursive_collapse_nodes\n";
my $c_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:1)C:1)D:1)E:1)"))->parse();
$c_tree->set_renderer(CXGN::Phylo::Text_tree_renderer->new($c_tree));
-print STDERR "The original tree: \n";
-$c_tree->render();
-print STDERR "=====\n\n";
-# readline stdin;
+#print STDERR "The original tree: \n";
+#$c_tree->render();
+#print STDERR "=====\n\n";
is ($c_tree->get_all_nodes(), 6, "node count before collapse");
$c_tree->collapse_tree();
-print STDERR "The collapsed tree:\n";
-$c_tree->render();
-print STDERR "=====\n\n";
+#print STDERR "The collapsed tree:\n";
+#$c_tree->render();
+#print STDERR "=====\n\n";
is ($c_tree->get_all_nodes(), 3, "node count after collapse");
-$new_tree->set_renderer(CXGN::Phylo::Text_tree_renderer->new($new_tree));
-$new_tree->render();
+#$new_tree->set_renderer(CXGN::Phylo::Text_tree_renderer->new($new_tree));
+#$new_tree->render();
+
#if(1 || $c_tree->get_all_nodes() != 3){
#$c_tree->print_node_keys();
#$c_tree->get_root()->print_subtree();
@@ -203,23 +225,16 @@ $new_tree->render();
#
$c_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:1)C:1)D:1)E:1, (((G:1, F:1)H:1)I:1)J:1)"))->parse();
$c_tree->set_renderer(CXGN::Phylo::Text_tree_renderer->new($c_tree));
-print STDERR "Original tree\n";
-$c_tree->render();
$c_tree->collapse_tree();
-print STDERR "Collapsed tree:\n";
-$c_tree->render();
-print STDERR "=====\n\n";
# test a tree collapsing with a tree that has branch lengths of zero.
#
my $z_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:0)C:0)D:0)E:1, (((G:1, F:1)H:0)I:1)J:1)"))->parse();
-print STDERR "Testing the recursive_collapse_zero_branches() function...\nOriginal tree:\n";
-$z_tree->get_root()->recursive_text_render();
+#print STDERR "Testing the recursive_collapse_zero_branches() function...\nOriginal tree:\n";
+#$z_tree->get_root()->print_subtree();
+
my $z_tree_node_count = $z_tree->get_node_count();
$z_tree ->get_root()->recursive_collapse_zero_branches();
-print STDERR "New tree:\n";
-$z_tree->get_root()->recursive_text_render();
-print STDERR "====\n\n";
is ($z_tree->get_node_count(), $z_tree_node_count-4, "recursive_collapse_zero_nodes test");
@@ -230,7 +245,6 @@ print STDERR "\tDeleting internal node (key=4)...\n";
my $ind_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:1)C:1)D:1)E:1, (((G:1, F:1)H:1)I:1)J:1)"))->parse();
my $ind_tree_copy = $ind_tree->copy();
$ind_tree->delete_node(4);
-$ind_tree->render();
is ($ind_tree_copy->get_all_nodes(), ($ind_tree->get_all_nodes()+1), "node count after delete test");
is ($ind_tree->get_node(4), undef, "has node really disappeared test");
@@ -238,7 +252,7 @@ is ($ind_tree->get_node(4), undef, "has node really disappeared test");
#
print STDERR "\tDeleting a leaf node (key=2)...\n";
$ind_tree->delete_node(2);
-$ind_tree->render();
+#$ind_tree->render();
is ($ind_tree_copy->get_all_nodes(), ($ind_tree->get_all_nodes()+2), "node count after leaf node deletion");
# test the newick generation from the node
@@ -292,13 +306,13 @@ isnt($binary_fail, 1, "Binary tree test: all children count <= 2");
# recover original tree. Check that rooted and unrooted compares both give 1.
# $tree = CXGN::Phylo::Parse_newick->new("(A:1, (B:1, C:1):1)")->parse();
-$expression = "(A:0.082376,(B:0.196674,((C:0.038209,F:0.354293):0.026742,E:0.094338):0.064142):0.067562,D:0.295612)";
-#my $expression = "(A:1,(B:1,((C:2,F:4):1,E:1):2.02):1,D:2)";
-#my $expression = "((A:1, D:2):1, (B:1, C:2, E:3):2)";
-#my $expression = "((A:0.89, D:1.2):1.4, (B:1, C:1.1, E:0.9):1)";
-#my $expression = "(C:1, D:3, (A:5, B:2): 1)";
-#my $expression = "(A:3, ((B:1, C:2):1.5):1)";
-$tree = CXGN::Phylo::Parse_newick->new($expression)->parse();
+$newick_expression = "(A:0.082376,(B:0.196674,((C:0.038209,F:0.354293):0.026742,E:0.094338):0.064142):0.067562,D:0.295612)";
+#my $newick_expression = "(A:1,(B:1,((C:2,F:4):1,E:1):2.02):1,D:2)";
+#my $newick_expression = "((A:1, D:2):1, (B:1, C:2, E:3):2)";
+#my $newick_expression = "((A:0.89, D:1.2):1.4, (B:1, C:1.1, E:0.9):1)";
+#my $newick_expression = "(C:1, D:3, (A:5, B:2): 1)";
+#my $newick_expression = "(A:3, ((B:1, C:2):1.5):1)";
+$tree = CXGN::Phylo::Parse_newick->new($newick_expression)->parse();
ok($tree->test_tree(), "tree test 1");
$tree->get_root()->recursive_collapse_single_nodes();
ok($tree->test_tree(), "tree test 2");
@@ -312,7 +326,7 @@ $tree->reset_root_to_point_on_branch($tree->min_leaf_dist_variance_point());
$tree->get_root()->recursive_implicit_names();
-$tree->get_root()->print_subtree("\n");
+# $tree->get_root()->print_subtree("\n");
# readline stdin;
#$tree->reset_root_min_max_root_leaf_distance();
@@ -321,7 +335,7 @@ $tree->get_root()->print_subtree("\n");
##exit();
my $total_branch_length = subtree_branch_length($tree->get_root());
-$new_tree = $tree->copy();
+ $new_tree = $tree->copy();
my ($new_root, $da) = $new_tree->min_leaf_dist_variance_point();
#exit;
@@ -343,13 +357,12 @@ for (my $i = 0; $i < @node_list; $i++) {
my @new_node_list = $new_tree->get_root()->recursive_subtree_node_list();
my $n = $new_node_list[$i];
- my $small = 0.0;
- my $dab = ($small +(1.0 - $small)*rand())*$n->get_branch_length(); #random point on ith branch
+ my $small = 0.000001;
+ my $dab = ($small +(1.0 - 2*$small)*rand())*$n->get_branch_length(); #random point on ith branch
$new_tree->reset_root_to_point_on_branch($n, $dab);
$count_treetesta_ok += $new_tree->test_tree();
-
$count_compare_rooted1 += $comp_rooted1 = $tree->compare_rooted($new_tree); # compare_rooted should be true only for $n a child of $new_tree's root.
$count_compare_unrooted1 += $comp_unrooted1 = $tree->compare_unrooted($new_tree); # compare_unrooted should be true
@@ -377,23 +390,158 @@ for (my $i = 0; $i < @node_list; $i++) {
# exit;
}
- $blc = abs($total_branch_length - subtree_branch_length($new_tree->get_root()));
+ my $subtree_bl = subtree_branch_length($new_tree->get_root());
+ $blc = abs($total_branch_length - $subtree_bl);
if ($blc > $max_branch_length_change) {
$max_branch_length_change = $blc;
+# print STDERR "tbl, stbl: $total_branch_length, $subtree_bl \n";
}
}
-ok($max_branch_length_change < 5.0e-15*$total_branch_length, "Test that resetting root leaves total branch length unchanged. \n");
-print($count_compare_rooted1, " ", $count_compare_unrooted1, " ", $count_compare_rooted2, " ", $count_compare_unrooted2, "\n");
-is($count_treetesta_ok, @node_list, "tree_test ok on trees rooted at random points.\n");
-is($count_treetestb_ok, @node_list, "tree_test ok on trees rooted at min variance point.\n");
-is($count_compare_rooted1, scalar $tree->get_root()->get_children(), "tree reset_root and compare test 1\n");
-is($count_compare_unrooted1, @node_list, "tree reset_root and compare test 2\n");
-is($count_compare_rooted2, @node_list, "tree reset_root and compare test 3\n");
-is($count_compare_unrooted2, @node_list, "tree reset_root and compare test 4\n");
+ok($max_branch_length_change < 5.0e-15*$total_branch_length, "Test that resetting root leaves total branch length unchanged.");
+# print($count_compare_rooted1, " ", $count_compare_unrooted1, " ", $count_compare_rooted2, " ", $count_compare_unrooted2, ".\n");
+is($count_treetesta_ok, @node_list, "tree_test ok on trees rooted at random points.");
+is($count_treetestb_ok, @node_list, "tree_test ok on trees rooted at min variance point.");
+is($count_compare_rooted1, scalar $tree->get_root()->get_children(), "tree reset_root and compare test 1.");
+is($count_compare_unrooted1, @node_list, "tree reset_root and compare test 2.");
+is($count_compare_rooted2, @node_list, "tree reset_root and compare test 3.");
+is($count_compare_unrooted2, @node_list, "tree reset_root and compare test 4.");
+
+# Test pre- in- post- order traversals.
+my $t_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:1):1, C:1):1, D:1):1, E:1)"))->parse();
+
+my $preorder_names_by_hand = "node: .\n" . "node: \n" . "node: \n" . "node: \n" . "node: A\n" . "node: B\n"
+ . "node: C\n" . "node: D\n" . "node: E\n";
+#print STDERR "$preorder_names_by_hand\n";
+$t_tree->{node_names} = undef;
+$t_tree->preorder_traversal( \&traverse_test_function );
+my $preorder_names = $t_tree->{'node_names'};
+is($preorder_names, $preorder_names_by_hand, "preorder traversal test.");
+
+
+my $inorder_names_by_hand = "node: A\n" . "node: \n" . "node: B\n" . "node: \n" . "node: C\n" . "node: \n"
+ . "node: D\n" . "node: .\n" . "node: E\n";
+#print STDERR "$inorder_names_by_hand\n";
+$t_tree->{node_names} = undef;
+$t_tree->inorder_traversal( \&traverse_test_function );
+my $inorder_names = $t_tree->{'node_names'};
+is($inorder_names, $inorder_names_by_hand, "inorder traversal test.");
+
+
+$t_tree->{node_names} = undef;
+my $postorder_names_by_hand = "node: A\n" . "node: B\n" . "node: \n" . "node: C\n" . "node: \n" . "node: D\n"
+ . "node: \n" . "node: E\n" . "node: .\n";
+#print STDERR "$postorder_names_by_hand \n";
+$t_tree->postorder_traversal( \&traverse_test_function );
+# print STDERR "after. \n";
+ #sub{ my $node = shift; my $str = "node: " . $node->get_name() . "\n"; print STDERR $node->get_name(), "\n"; return $str;} );
+my $postorder_names = $t_tree->{'node_names'};
+is($postorder_names, $postorder_names_by_hand, "postorder traversal test.");
+
+
+# Test the species bit hash using a bigger tree
+my $species_tree_newick_expression = "( chlamydomonas[species=Chlamydomonas_reinhardtii]:1, ( physcomitrella[species=Physcomitrella_patens]:1, ( selaginella[species=Selaginella_moellendorffii]:1, ( loblolly_pine[species=Pinus_taeda]:1, ( amborella[species=Amborella_trichopoda]:1, ( date_palm[species=Phoenix_dactylifera]:1, ( ( foxtail_millet[species=Setaria_italica]:1, ( sorghum[species=Sorghum_bicolor]:1, maize[species=Zea_mays]:1 ):1 ):1, ( rice[species=Oryza_sativa]:1, ( brachypodium[species=Brachypodium_distachyon]:1, ( (wheat[species=Triticum_aestivum]:1, wheat_x[species=Triticum_aestivum_x]:1):1, barley[species=Hordeum_vulgare]:1 ):1 ):1 ):1 ):1):1):1):1):1):1)";
+$species_tree = CXGN::Phylo::Parse_newick -> new($species_tree_newick_expression)->parse();
+
+my $gene_tree_newick_expression = "( chlamydomonas[species=Chlamydomonas_reinhardtii]:1, ( physcomitrella[species=Physcomitrella_patens]:1, ( selaginella[species=Selaginella_moellendorffii]:1, ( loblolly_pine[species=Pinus_taeda]:1, ( amborella[species=Amborella_trichopoda]:1, ( date_palm[species=Phoenix_dactylifera]:1, ( ( foxtail_millet[species=Setaria_italica]:1, ( sorghum[species=Sorghum_bicolor]:1, maize[species=Zea_mays]:1 ):1 ):1, ( rice[species=Oryza_sativa]:1, ( brachypodium[species=Brachypodium_distachyon]:1, ( wheat[species=Triticum_aestivum]:1, barley[species=Hordeum_vulgare]:1 ):1 ):1 ):1 ):1):1):1):1):1):1)";
+my $gene_tree = CXGN::Phylo::Parse_newick -> new($gene_tree_newick_expression)->parse();
+
+$gene_tree->show_newick_attribute('species');
+my $nwck = $gene_tree->generate_newick(); #print $nwck, "\n";
+
+my $spec_bithash = $gene_tree->get_species_bithash($species_tree);
+my $spec_bithash_got = '';
+ foreach (sort keys %$spec_bithash){
+ $spec_bithash_got .= $_ . " " . $spec_bithash->{$_} . " \n";
+}
+#print $spec_bithash_got, "\n";
+
+my $spec_bithash_expected =
+"Amborella_trichopoda 1 \n" .
+"Brachypodium_distachyon 2 \n" .
+"Chlamydomonas_reinhardtii 4 \n" .
+"Hordeum_vulgare 8 \n" .
+"Oryza_sativa 16 \n" .
+"Phoenix_dactylifera 32 \n" .
+"Physcomitrella_patens 64 \n" .
+"Pinus_taeda 128 \n" .
+"Selaginella_moellendorffii 256 \n" .
+"Setaria_italica 512 \n" .
+"Sorghum_bicolor 1024 \n" .
+"Triticum_aestivum 2048 \n" .
+#"Triticum_aestivum_x 4096 \n" .
+"Zea_mays 4096 \n";
+
+is($spec_bithash_got, $spec_bithash_expected, "Species bithash test 1.");
+
+
+$gene_tree_newick_expression = "( chlamydomonas[species=Chlamydomonas_reinhardtii]:1, ( physcomitrella[species=Physcomitrella_patens]:1, ( selaginella_x[species=Selaginella_moellendorffii_x]:1, ( loblolly_pine[species=Pinus_taeda]:1, ( amborella[species=Amborella_trichopoda]:1, ( date_palm[species=Phoenix_dactylifera]:1, ( ( foxtail_millet[species=Setaria_italica]:1, ( sorghum[species=Sorghum_bicolor]:1, maize[species=Zea_mays]:1 ):1 ):1, ( rice[species=Oryza_sativa]:1, ( brachypodium[species=Brachypodium_distachyon]:1, ( wheat[species=Triticum_aestivum]:1, barley[species=Hordeum_vulgare]:1 ):1 ):1 ):1 ):1):1):1):1):1):1)";
+$gene_tree = CXGN::Phylo::Parse_newick -> new($gene_tree_newick_expression)->parse();
+
+$gene_tree->show_newick_attribute('species');
+$nwck = $gene_tree->generate_newick(); #print $nwck, "\n";
+
+$spec_bithash = $gene_tree->get_species_bithash($species_tree);
+$spec_bithash_got = '';
+ foreach (sort keys %$spec_bithash){
+ $spec_bithash_got .= $_ . " " . $spec_bithash->{$_} . " \n";
+}
+#print $spec_bithash_got, "\n";
+
+$spec_bithash_expected =
+"Amborella_trichopoda 1 \n" .
+"Brachypodium_distachyon 2 \n" .
+"Chlamydomonas_reinhardtii 4 \n" .
+"Hordeum_vulgare 8 \n" .
+"Oryza_sativa 16 \n" .
+"Phoenix_dactylifera 32 \n" .
+"Physcomitrella_patens 64 \n" .
+"Pinus_taeda 128 \n" .
+#"Selaginella_moellendorffii 256 \n" .
+"Setaria_italica 256 \n" .
+"Sorghum_bicolor 512 \n" .
+"Triticum_aestivum 1024 \n" .
+#"Triticum_aestivum_x 2048 \n" .
+"Zea_mays 2048 \n";
+
+is($spec_bithash_got, $spec_bithash_expected, "Species bithash test 2.");
+
+
+$gene_tree_newick_expression = "( chlamydomonas[species=Chlamydomonas_reinhardtii]:1, ( physcomitrella[species=Physcomitrella_patens]:1, ( selaginella_x[species=Selaginella_moellendorffii_x]:1, ( loblolly_pine[species=Pinus_taeda]:1, ( amborella[species=Amborella_trichopoda]:1, ( date_palm_x[species=Phoenix_dactylifera_x]:1, ( ( foxtail_millet[species=Setaria_italica]:1, ( sorghum[species=Sorghum_bicolor]:1, maize[species=Zea_mays]:1 ):1 ):1, ( rice[species=Oryza_sativa]:1, ( brachypodium_x[species=Brachypodium_distachyon_x]:1, ( wheat[species=Triticum_aestivum]:1, barley[species=Hordeum_vulgare]:1 ):1 ):1 ):1 ):1):1):1):1):1):1)";
+$gene_tree = CXGN::Phylo::Parse_newick -> new($gene_tree_newick_expression)->parse();
+
+$gene_tree->show_newick_attribute('species');
+$nwck = $gene_tree->generate_newick(); #print $nwck, "\n";
+
+$spec_bithash = $gene_tree->get_species_bithash($species_tree);
+$spec_bithash_got = '';
+ foreach (sort keys %$spec_bithash){
+ $spec_bithash_got .= $_ . " " . $spec_bithash->{$_} . " \n";
+}
+#print $spec_bithash_got, "\n";
+
+$spec_bithash_expected =
+"Amborella_trichopoda 1 \n" .
+#"Brachypodium_distachyon 2 \n" .
+"Chlamydomonas_reinhardtii 2 \n" .
+"Hordeum_vulgare 4 \n" .
+"Oryza_sativa 8 \n" .
+#"Phoenix_dactylifera 32 \n" .
+"Physcomitrella_patens 16 \n" .
+"Pinus_taeda 32 \n" .
+#"Selaginella_moellendorffii 256 \n" .
+"Setaria_italica 64 \n" .
+"Sorghum_bicolor 128 \n" .
+"Triticum_aestivum 256 \n" .
+#"Triticum_aestivum_x 2048 \n" .
+"Zea_mays 512 \n";
+
+is($spec_bithash_got, $spec_bithash_expected, "Species bithash test 3.");
+
+exit;
sub subtree_branch_length{
- my $self = shift; # node
+ my $self = shift; # node
my @node_list = $self->recursive_subtree_node_list();
my $total_branch_length = 0.0;
foreach my $n (@node_list) {
@@ -403,3 +551,14 @@ sub subtree_branch_length{
}
+sub traverse_test_function{
+my $node = shift;
+my $tree = $node->get_tree();
+#my $new_node_names = (defined $tree->{'node_names'})? $tree->{'node_names'}: '';
+#$new_node_names .= "node: [" . $node->get_name() . "]\n"; print STDERR "node: [", $node->get_name(), "]\n";
+#$tree->{'node_names'} = $new_node_names;
+$tree->{'node_names'} .= "node: " . $node->get_name() . "\n";
+}
+
+
+
diff --git a/t/CXGN/Phylo/phylo_test.t b/t/CXGN/Phylo/phylo_test.t
deleted file mode 100755
index b270f376..00000000
--- a/t/CXGN/Phylo/phylo_test.t
+++ /dev/null
@@ -1,452 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use Test::More qw/no_plan/;
-
-use CXGN::Phylo::Tree;
-use CXGN::Phylo::Node;
-use CXGN::Phylo::Parser;
-use Data::Dumper;
-
-use Carp;
-
-# expression to test the Phylo packages with
-#
-my $newick_expression =
- "(1:0.082376,(2:0.196674,((3:0.038209,6:0.354293):0.026742,5:0.094338):0.064142):0.067562,4:0.295612)";
-my $parser = CXGN::Phylo::Parse_newick -> new($newick_expression);
-
-# test tokenizer
-#
-my @tokens = $parser -> tokenize($newick_expression);
-print STDERR "\tTOKENS: ".join("|", @tokens)."\n";
-is (@tokens, 22, "Token count test");
-
-my $tree = $parser-> parse();
-
-#print STDERR Dumper($tree);
-
-#print STDERR "Total Nodes: ".(keys(%{$tree->{node_hash}}))."\n";
-#is (keys(%{$tree->{node_hash}}), 10, "node count test");
-
-# check the number of nodes returned by get_all_nodes
-#
-is ($tree->get_all_nodes(), 10, "node count test [node_hash]");
-
-# pick an element and verify if it is a CXGN::Phylo::Node object
-#
-is ( UNIVERSAL::isa(($tree->get_all_nodes())[4], "CXGN::Phylo::Node"), 1, "node id test");
-
-my $n = $tree->get_node(5);
-#print STDERR "NODE 5: ".$n->get_name()."\n";
-#$n->set_hilited(1);
-#print STDERR "Set node 5 to hilited ".$n->get_hilited()."\n";
-#$n->rotate_node();
-#$tree->get_root()->rotate_node();
-#$n4->set_hidden(1);
-
-#my $subtree_len = $tree->get_root()->calculate_subtree_distances();
-#is ($subtree_len, 12, "subtree length test");
-
-# test the leaf functions in two different ways
-#
-is ($tree->get_leaf_count(), 6, "leaf count test");
-
-my @leaflist = $tree->get_leaf_list();
-# foreach my $leaf (@leaflist) {
-# print STDERR "Leaf: ".$leaf->get_name()."\n";
-# }
-
-is ($tree->get_root()->is_leaf, 0, "root leaf test");
-
-# foreach my $l (@leaflist) { print STDERR "LEAFLIST: ". ($l->get_name())."\n"; }
-
-is (@leaflist, 6, "leaf list test");
-
-#$tree->preorder_traversal(sub {my $node = shift; print STDERR "Preorder. node name: ", $node->get_name(), " ", $node->get_branch_length(), "\n";});
-#print STDERR "\n";
-
-#$tree->postorder_traversal(sub {my $node = shift; print STDERR "Postorder. node name: ", $node->get_name(), " ", $node->get_branch_length(), "\n";});
-
-#exit;
-
-# test the root
-#
-my $root = $tree->get_root();
-is ($root->is_root(), 1, "root test");
-
-# test the subtree node count functions
-#
-#my @ortho_groups = $tree->get_orthologs();
-$tree->get_root()->calculate_subtree_node_count();
-my $root_subnode_count = $tree->get_root()->get_subtree_node_count();
-my $leaf_subnode_count = $tree->get_node(7)->get_subtree_node_count();
-my $inner_node_subnode_count = $tree->get_node(3)->get_subtree_node_count();
-is ($root_subnode_count, 9, "root subnode count test");
-is ($leaf_subnode_count, 0, "leaf subnode count test");
-is ($inner_node_subnode_count, 6, "inner node subnode count test");
-
-# set species information to test subtree_species_count stuff
-#
-my @species_list = ("coffee", "tomato", "potato", "pepper", "eggplant", "brachypodium");
-my @node_list = values %{$tree->{node_hash}};
-my $i = 0;
-foreach my $n (@node_list) {
- next if(scalar $n->get_children() > 0); # skip non-leaves
- $n->set_species($species_list[$i % 6]);
- print "i, species: $i ", $n->get_species(), "\n";
- $i++;
-}
-
-# test the subtree_species count functions
-
-#$tree->get_root()->recursive_text_render();
-#exit;#
-$tree->get_root()->recursive_set_leaf_species_count();
-#$tree->get_root()->calculate_subtree_species_count();
-
-# pick out a node and test the count
-#
-print "node keys: ", join(" ", keys %{$tree->{node_hash}}), "\n";
-is($tree->get_root()->get_attribute("leaf_species_count"), 6, "subtree leaf species count test");
-#is ($tree->get_node(5)->get_attribute("leaf_species_count"), 3, "subtree leaf species count test");
-
-# test the remove_child function
-#
-print STDERR 'before $tree->copy() ', "\n";
-my $rm_tree = $tree->copy();
-print STDERR 'after $tree->copy() \n';
-my @root_children = $rm_tree->get_root()->get_children();
-my $n = $root_children[1];
-my @children =$n->get_children();
-print STDERR "\tRemove child\nbefore: ".$n->to_string()."\n";
-is ($n->get_children, 2, "get_children test");
-print STDERR "\t(Removing child ".$children[0]->get_node_key().")\n";
-$n->remove_child($children[0]);
-is ($n->get_children(), 1, "remove child test");
-print STDERR "\tafter : ".$n->to_string()."\n";
-
-my @root_kids = $rm_tree->get_root()->get_children();
-is (@root_kids, 3, "root children count test");
-$rm_tree->get_root()->remove_child($root_kids[1]);
-print STDERR "\tRemoving child key=".($root_kids[1]->get_name())."\n";
-#foreach my $c ($rm_tree->get_root()->get_children()) { print "current children = ".$c->get_name()."\n"; }
-is ($rm_tree->get_root()->get_children(), 2, "removed one root child test");
-
-# test reset_root
-#
-
-#my $nn = $tree->get_node(5);
-#$tree->reset_root($nn);
-
-
-
-
-# test the compare function
-# initialize two identical trees and compare
-# (should return 1)
-#
-my $species_tree_newick = "((((( tomato_tomato:1, potato_potato:1):1, pepper_pepper:1 ):1, eggplant_eggplant):1, nicotiana_nicotiana:1):1, coffee_coffee:1)";
-my $species_tree_parser = CXGN::Phylo::Parse_newick->new($species_tree_newick);
-my $species_tree = $species_tree_parser->parse();
-
-my $species_tree_newick2 = "((((( tomato_tomato:5, potato_potato:1):1, pepper_pepper:1 ):1, eggplant_eggplant):1, nicotiana_nicotiana:1):1, coffee_coffee:1)";
-my $species_tree_parser2 = CXGN::Phylo::Parse_newick->new($species_tree_newick2);
-my $species_tree2 = $species_tree_parser2->parse();
-
-# compare the tree to itself
-#
-is ($species_tree->compare_rooted($species_tree), 1, "tree self comparison test (tree1-tree1)");
-is ($species_tree->compare_rooted($species_tree2), 1, "tree comparison test (tree1-tree2)");
-is ($species_tree2->compare_rooted($species_tree2), 1, "tree self comparison test (tree2-tree2)");
-is ($species_tree2->compare_rooted($species_tree), 1, "tree comparion test (tree2-tree1)");
-
-# test that a different tree returns 0
-#
-is($species_tree->compare_rooted($tree), 0, "tree inequality test");
-
-# test that two topologically identical but specified differently match in the comparison
-#
-my $tree_a = CXGN::Phylo::Parse_newick->new("(A:1, B:1)")->parse();
-my $tree_b = CXGN::Phylo::Parse_newick->new("(B:1, A:1)")->parse();
-is ($tree_a->compare_rooted($tree_b), 1, "tree topology specification test");
-
-
-# test the copy function
-#
-my $new_tree = $tree->copy();
-if ($tree->compare_rooted($new_tree)) { # should be the same, shouldn't it?
- print STDERR "Compared tree to newtree and found them to be identical.\n";
-}
-else { print STDERR "newtree and tree are not identical. Oops.\n"; }
-is ($new_tree->compare_rooted($tree), 1, "copied tree identity check");
-isnt ( $new_tree, $tree, "tree pointer non-identity check");
-
-my ($rfd, $symd, $d3) = $tree->RF_distance($new_tree);
-is($rfd, 0, "check RF distance between tree and copy is 0.\n");
-is($symd, 0, "check RF distance between tree and copy is 0.\n");
-
-# check if I can remove a node in new_tree without affecting $tree
-#
-#print "node keys: ", join(" ", keys %{$tree->{node_hash}}), "\n";
-$new_tree->delete_node(3);
-
-
-# print $tree->generate_newick(), "\n";
-# print $new_tree->generate_newick(), "\n";
-
-is($new_tree->compare_rooted($tree), 0, "changed copied tree identity check");
-
-# test the collapsing function - test a tree with many nodes that
-# have only one child.
-#
-print STDERR "\tTesting CXGN::Phylo::Node::recursive_collapse_nodes\n";
-my $c_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:1)C:1)D:1)E:1)"))->parse();
-
-$c_tree->set_renderer(CXGN::Phylo::Text_tree_renderer->new($c_tree));
-
-#print STDERR "The original tree: \n";
-#$c_tree->render();
-#print STDERR "=====\n\n";
-
-is ($c_tree->get_all_nodes(), 6, "node count before collapse");
-
-$c_tree->collapse_tree();
-#print STDERR "The collapsed tree:\n";
-#$c_tree->render();
-#print STDERR "=====\n\n";
-
-is ($c_tree->get_all_nodes(), 3, "node count after collapse");
-
-#$new_tree->set_renderer(CXGN::Phylo::Text_tree_renderer->new($new_tree));
-#$new_tree->render();
-
-#if(1 || $c_tree->get_all_nodes() != 3){
-#$c_tree->print_node_keys();
-#$c_tree->get_root()->print_subtree();
-#}exit;
-
-# test a more complex case for collapsing
-#
-$c_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:1)C:1)D:1)E:1, (((G:1, F:1)H:1)I:1)J:1)"))->parse();
-$c_tree->set_renderer(CXGN::Phylo::Text_tree_renderer->new($c_tree));
-$c_tree->collapse_tree();
-
-# test a tree collapsing with a tree that has branch lengths of zero.
-#
-my $z_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:0)C:0)D:0)E:1, (((G:1, F:1)H:0)I:1)J:1)"))->parse();
-print STDERR "Testing the recursive_collapse_zero_branches() function...\nOriginal tree:\n";
-$z_tree->get_root()->print_subtree();
-
-my $z_tree_node_count = $z_tree->get_node_count();
-$z_tree ->get_root()->recursive_collapse_zero_branches();
-
-is ($z_tree->get_node_count(), $z_tree_node_count-4, "recursive_collapse_zero_nodes test");
-
-# check the delete node function
-# first, check if we can delete an internal node...
-#
-print STDERR "\tDeleting internal node (key=4)...\n";
-my $ind_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:1)C:1)D:1)E:1, (((G:1, F:1)H:1)I:1)J:1)"))->parse();
-my $ind_tree_copy = $ind_tree->copy();
-$ind_tree->delete_node(4);
-is ($ind_tree_copy->get_all_nodes(), ($ind_tree->get_all_nodes()+1), "node count after delete test");
-is ($ind_tree->get_node(4), undef, "has node really disappeared test");
-
-# let's delete a leaf node...
-#
-print STDERR "\tDeleting a leaf node (key=2)...\n";
-$ind_tree->delete_node(2);
-#$ind_tree->render();
-is ($ind_tree_copy->get_all_nodes(), ($ind_tree->get_all_nodes()+2), "node count after leaf node deletion");
-
-# test the newick generation from the node
-#
-my $original_newick = "((((A:1,B:1)C:1)D:1)E:1,(((G:1,F:1)H:1)I:1)J:1)";
-my $t = (CXGN::Phylo::Parse_newick->new($original_newick))->parse();
-my $new = $t->get_root()->recursive_generate_newick();
-# print STDERR "Original: $original_newick\n";
-# print STDERR "Regenerated newick = $new\n";
-my $t2 = (CXGN::Phylo::Parse_newick->new($new))->parse();
-is($t->compare_rooted($t2), 1, "Newick regeneration from tree test");
-
-
-my $incorp_tree = $tree->copy();
-$incorp_tree->incorporate_nodes(CXGN::Phylo::Node->new());
-is($incorp_tree->get_all_nodes(), 11, "Incorporate Node Test");
-
-my $b_tree = $tree->copy();
-is($b_tree->get_all_nodes(), 10, "Binary tree test: copy");
-$b_tree->make_binary();
-my @nodes = $b_tree->get_all_nodes();
-is(@nodes, 11, "Binary tree test: node count");
-my $binary_fail = 0;
-foreach(@nodes){
- my @children = $_->get_children();
- $binary_fail = 1 if @children > 2;
-}
-isnt($binary_fail, 1, "Binary tree test: all children count <= 2");
-
-# render the tree
-#
-# $tree->get_layout()->set_image_width(500);
-# $tree->get_layout()->set_image_height(300);
-
-# my $PNG_tree_renderer = CXGN::Phylo::PNG_tree_renderer -> new($tree);
-# $tree->get_layout()->set_left_margin(50);
-# $tree->get_layout()->set_right_margin(40);
-# $PNG_tree_renderer->render();
-
-# my $renderer = CXGN::Phylo::Text_tree_renderer -> new($tree);
-# $renderer->render();
-
-
-# test tree root resetting, and tree comparison
-#
-# Get tree from newick expression. Reset root so as to minimize maximum root-leaf distance.
-# copy a tree, then, for each branch, reset the root to a point along the branch,
-# compare to original tree in both rooted and unrooted senses,
-# unrooted comparison should give 1, rooted 0, except for branches to orig. root.
-# Then reset root again so as to minimize max distance to leaves from root, and should
-# recover original tree. Check that rooted and unrooted compares both give 1.
-
-# $tree = CXGN::Phylo::Parse_newick->new("(A:1, (B:1, C:1):1)")->parse();
-my $newick_expression = "(A:0.082376,(B:0.196674,((C:0.038209,F:0.354293):0.026742,E:0.094338):0.064142):0.067562,D:0.295612)";
-#my $newick_expression = "(A:1,(B:1,((C:2,F:4):1,E:1):2.02):1,D:2)";
-#my $newick_expression = "((A:1, D:2):1, (B:1, C:2, E:3):2)";
-#my $newick_expression = "((A:0.89, D:1.2):1.4, (B:1, C:1.1, E:0.9):1)";
-#my $newick_expression = "(C:1, D:3, (A:5, B:2): 1)";
-#my $newick_expression = "(A:3, ((B:1, C:2):1.5):1)";
-$tree = CXGN::Phylo::Parse_newick->new($newick_expression)->parse();
-ok($tree->test_tree(), "tree test 1");
-$tree->get_root()->recursive_collapse_single_nodes();
-ok($tree->test_tree(), "tree test 2");
-
-
-#my ($mldv_node, $mldv_dist_above, $min_var) = $tree->min_leaf_dist_variance_point();
-#$tree->reset_root_to_point_on_branch($mldv_node, $mldv_dist_above);
-
-$tree->reset_root_to_point_on_branch($tree->min_leaf_dist_variance_point());
-# print("tree initially rerooted at min variance point, (i.e. before loop): \n");
-
-
-$tree->get_root()->recursive_implicit_names();
-# $tree->get_root()->print_subtree("\n");
-# readline stdin;
-#$tree->reset_root_min_max_root_leaf_distance();
-
-#my ($anode, $adist, $avar) = $tree->min_leaf_dist_variance_point();
-#print("opt node name, dist above, variance, stddev: ", $anode->get_name(), " ", $adist, " ", $avar, " ", sqrt($avar), "\n");
-##exit();
-
-my $total_branch_length = subtree_branch_length($tree->get_root());
-my $new_tree = $tree->copy();
-my ($new_root, $da) = $new_tree->min_leaf_dist_variance_point();
-#exit;
-
-my $count_compare_rooted1 = 0;
-my $count_compare_unrooted1 = 0;
-my $count_compare_rooted2 = 0;
-my $count_compare_unrooted2 = 0;
-my $count_treetesta_ok = 0;
-my $count_treetestb_ok = 0;
-my @node_list = $tree->get_root()->recursive_subtree_node_list();
-
-my $max_branch_length_change = -1.0;
-my $blc;
-my ($comp_rooted1, $comp_unrooted1, $comp_rooted2, $comp_unrooted2) = (-1, -1, -1, -1);
-
-srand(132456);
-for (my $i = 0; $i < @node_list; $i++) {
- my $new_tree = $tree->copy();
-
- my @new_node_list = $new_tree->get_root()->recursive_subtree_node_list();
- my $n = $new_node_list[$i];
- my $small = 0.0;
- my $dab = ($small +(1.0 - $small)*rand())*$n->get_branch_length(); #random point on ith branch
-
- $new_tree->reset_root_to_point_on_branch($n, $dab);
-my $testres = $new_tree->test_tree();
-# print "testres: $testres \n";
- $count_treetesta_ok += $testres;
-# print "count_treetesta_ok: $count_treetesta_ok \n";
-
- $count_compare_rooted1 += $comp_rooted1 = $tree->compare_rooted($new_tree); # compare_rooted should be true only for $n a child of $new_tree's root.
- $count_compare_unrooted1 += $comp_unrooted1 = $tree->compare_unrooted($new_tree); # compare_unrooted should be true
-
-#put in some RF distance tests here.
-
- $blc = abs($total_branch_length - subtree_branch_length($new_tree->get_root()));
- if ($blc > $max_branch_length_change) {
- $max_branch_length_change = $blc;
- }
-
- # my ($new_root, $da, $var) = $new_tree->min_leaf_dist_variance_point();
- # $new_tree->reset_root_to_point_on_branch($new_root, $da);
-
- $new_tree->reset_root_to_point_on_branch($new_tree->min_leaf_dist_variance_point());
- $count_treetestb_ok += $new_tree->test_tree();
-
- $count_compare_rooted2 += $comp_rooted2 = $tree->compare_rooted($new_tree);
- $count_compare_unrooted2 += $comp_unrooted2 = $tree->compare_unrooted($new_tree);
-
- if (!$comp_rooted2 || !$comp_unrooted2) {
- print("tree : \n");
- $tree->get_root()->print_subtree("\n");
- print("new_tree: \n");
- $new_tree->get_root()->print_subtree("\n");
- # exit;
- }
-
- my $subtree_bl = subtree_branch_length($new_tree->get_root());
- $blc = abs($total_branch_length - $subtree_bl);
- if ($blc > $max_branch_length_change) {
- $max_branch_length_change = $blc;
- print STDERR "tbl, stbl: $total_branch_length, $subtree_bl \n";
- }
-}
-ok($max_branch_length_change < 5.0e-15*$total_branch_length, "Test that resetting root leaves total branch length unchanged. \n");
-print($count_compare_rooted1, " ", $count_compare_unrooted1, " ", $count_compare_rooted2, " ", $count_compare_unrooted2, "\n");
-is($count_treetesta_ok, @node_list, "tree_test ok on trees rooted at random points.\n");
-is($count_treetestb_ok, @node_list, "tree_test ok on trees rooted at min variance point.\n");
-is($count_compare_rooted1, scalar $tree->get_root()->get_children(), "tree reset_root and compare test 1\n");
-is($count_compare_unrooted1, @node_list, "tree reset_root and compare test 2\n");
-is($count_compare_rooted2, @node_list, "tree reset_root and compare test 3\n");
-is($count_compare_unrooted2, @node_list, "tree reset_root and compare test 4\n");
-
-# Test pre- in- post- order traversals.
-my $t_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:1):1, C:1):1, D:1):1, E:1)"))->parse();
-my $preorder_names_by_hand = "node: .\n" . "node: \n" . "node: \n" . "node: \n" . "node: A\n" . "node: B\n"
- . "node: C\n" . "node: D\n" . "node: E\n";
-#our $preorder_names = "";
-my $preorder_names = $t_tree->preorder_traversal( sub{ my $str = "node: " . shift->get_name() . "\n"; return $str;} );
-is($preorder_names, $preorder_names_by_hand, "preorder traversal test.\n");
-#print STDERR "preorder_names: \n", $preorder_names, "\n\n", $preorder_names_by_hand. "\n\n";
-
-my $inorder_names_by_hand = "node: A\n" . "node: \n" . "node: B\n" . "node: \n" . "node: C\n" . "node: \n"
- . "node: D\n" . "node: .\n" . "node: E\n";
-my $inorder_names = $t_tree->inorder_traversal( sub{ my $str = "node: " . shift->get_name() . "\n"; return $str; } );
-is($inorder_names, $inorder_names_by_hand, "inorder traversal test.\n");
-#print STDERR "inorder_names: \n", $inorder_names, "\n\n", $inorder_names_by_hand. "\n\n";
-
-my $postorder_names_by_hand = "node: A\n" . "node: B\n" . "node: \n" . "node: C\n" . "node: \n" . "node: D\n"
- . "node: \n" . "node: E\n" . "node: .\n";
-my $postorder_names = $t_tree->postorder_traversal( sub{ my $str = "node: " . shift->get_name() . "\n"; return $str; } );
-is($postorder_names, $postorder_names_by_hand, "postorder traversal test.\n");
-#print STDERR "postorder_names: \n", $postorder_names, "\n\n", $postorder_names_by_hand. "\n\n";
-
-
-sub subtree_branch_length{
- my $self = shift; # node
- my @node_list = $self->recursive_subtree_node_list();
- my $total_branch_length = 0.0;
- foreach my $n (@node_list) {
- $total_branch_length += $n->get_branch_length();
- }
- return $total_branch_length;
-}
-
-
-sub traverse_test_function{
- my $node = shift;
- my $str = "node: " . $node->get_name() . "\n"; return $str;
-}