diff --git a/lib/CXGN/Phylo/IdTaxonMap.pm b/lib/CXGN/Phylo/IdTaxonMap.pm new file mode 100644 index 00000000..0c6cc2df --- /dev/null +++ b/lib/CXGN/Phylo/IdTaxonMap.pm @@ -0,0 +1,70 @@ +package CXGN::Phylo::IdTaxonMap; + +use strict; +use List::Util qw ( min max sum ); + +my %default_map = + ( + '^AT' => 'arabidopsis', + '^Bradi' => 'brachypodium', + '^(?:X_)?\d{5}[.]m\d{6}' => 'castorbean', # can have X_ prefix or not + '^GSVIV' => 'grape', + '^(?:GRMZM|AC\d{6})' => 'maize', + '^IMGA[|_](?:Medtr|AC|CU)' => 'medicago', + '^evm' => 'papaya', + '^POPTR' => 'poplar', + '^LOC_Os' => 'rice', + '^jgi_Selmo' => 'selaginella', + '^Sb' => 'sorghum', + '^Glyma' => 'soybean', + '^Solyc' => 'tomato' + ); + + +sub new { + my $class = shift; + my $arg = shift; # hash ref, keys are regular expressions, values are corresponding taxon names. + my $args= {}; + my $self = bless $args, $class; + + my %map = (); + foreach (keys %default_map) { + $map{$_} = $default_map{$_}; + } + + foreach (keys %$arg) { + $map{$_} = $arg->{$_}; + } + + $self->{map} = \%map; + return $self; +} + +sub get_map{ + my $self = shift; + return $self->{map}; +} + + +sub add_idregex_taxonname{ + my $self = shift; + my $idregex = shift; + my $taxonname = shift; + $self->{map}->{$idregex} = $taxonname; +} + + +sub id_to_taxonname{ + my $self = shift; + my $id = shift; + + my $map = $self->get_map(); + foreach (keys %$map) { +# print "id regex: $_\n"; + return $map->{$_} if($id =~ /$_/); + } + warn "No taxon name found for id: $id\n"; + return; +} + +1; diff --git a/lib/CXGN/Phylo/Layout.pm b/lib/CXGN/Phylo/Layout.pm index 76ae4ae0..be508be6 100644 --- a/lib/CXGN/Phylo/Layout.pm +++ b/lib/CXGN/Phylo/Layout.pm @@ -357,8 +357,7 @@ sub _layout_vertical { my $total_leaves = $self->get_tree()->get_leaf_count(); my $image_height = $self->get_image_height(); if ($total_leaves < 1) { return; } - my $vertical_gap = ($image_height - $self->get_top_margin() - $self->get_bottom_margin()) / ($total_leaves); - + my $vertical_gap = ($total_leaves > 1)?($image_height - $self->get_top_margin() - $self->get_bottom_margin()) / ($total_leaves-1): 0; # the leaf nodes should be easy. # traverse the tree, find all the leaves, and set the vertical # coordinate in the leaves to an increasing value with step $vertical_gap. diff --git a/lib/CXGN/Phylo/Mrbayes.pm b/lib/CXGN/Phylo/Mrbayes.pm new file mode 100644 index 00000000..a98ac963 --- /dev/null +++ b/lib/CXGN/Phylo/Mrbayes.pm @@ -0,0 +1,671 @@ +package Mrbayes; +use strict; +use List::Util qw ( min max sum ); +#use TlyUtil qw ( Kolmogorov_Smirnov_D ); + +# this is an object to facilitate running MrBayes (Bayesian phylogeny +# inference program). The main functionality it adds is a relatively +# easy way to control the criteria for deciding when to end a run. + +my $default_chunk_size = 2000; + +sub new { + my $class = shift; + my $arg = shift; # a hashref for setting various options + my $default_arguments = {'alignment_nex_filename' => undef, + 'file_basename' => undef, + 'seed' => undef, + 'swapseed' => undef, + 'n_runs' => 2, + 'n_temperatures' => 4, + 'temperature_gap' => 0.25, + 'chunk_size' => $default_chunk_size, + 'print_freq' => undef, + 'sample_freq' => 20, + 'burnin_frac' => 0.1, + 'diag_freq' => undef, + 'converged_chunks_required' => 10, + + 'fixed_pinvar' => undef, # undef -> leaves default of uniform(0,1) in effect + # convergence criteria + 'splits_min_hits' => 25, + 'splits_max_ok_stddev' => 0.03, + 'splits_max_ok_avg_stddev' => 0.01, + 'modelparam_min_ok_ESS' => 250, + 'modelparam_max_ok_PSRF' => 1.02, + 'modelparam_max_ok_KSD' => 0.2, + 'ngens_run' => 0, + 'id_species_map' => {} + }; + my $self = bless $default_arguments, $class; + + foreach my $option (keys %$arg) { + warn "Unknown option: $option in Mrbayes constructor.\n" if(!exists $self->{$option}); + $self->{$option} = $arg->{$option}; + } + $self->{print_freq} = $self->{chunk_size} if(!defined $self->{print_freq}); + $self->{diag_freq} = $self->{chunk_size} if(!defined $self->{diag_freq}); + # print "print, diag freq: ", $self->{print_freq}, " ", $self->{diag_freq}, "\n"; + + my $alignment_nex_filename = $self->{alignment_nex_filename}; + # if(defined $self->{file_basename}){ + # my $file_basename; + if (!defined $self->{file_basename}) { + my $file_basename = $alignment_nex_filename; + $file_basename =~ s/[.]nex$//; # delete .nex ending + $self->{file_basename} = $file_basename; + } + my $n_runs = $self->{n_runs}; + my $burnin_frac = $self->{burnin_frac}; + my $n_temperatures = $self->{n_temperatures}; + my $temperature_gap = $self->{temperature_gap}; + my $sample_freq = $self->{sample_freq}; + my $print_freq = $self->{print_freq}; + my $chunk_size = $self->{chunk_size}; + my $fixed_pinvar = $self->{fixed_pinvar}; + my $prset_pinvarpr = (defined $fixed_pinvar)? "prset pinvarpr=fixed($fixed_pinvar);\n" : ''; + + my $begin_piece = + "begin mrbayes;\n" . + "set autoclose=yes nowarn=yes;\n"; + my $seed_piece = ''; + if (defined $self->{seed}) { + my $seed = $self->{seed}; $seed_piece .= "set seed=$seed;\n"; + } + if (defined $self->{swapseed}) { + my $swapseed = $self->{swapseed}; $seed_piece .= "set swapseed=$swapseed;\n"; + } + my $middle_piece = "execute $alignment_nex_filename;\n" . + "set precision=6;\n" . + "lset rates=invgamma;\n" . + "prset aamodelpr=fixed(wag);\n" . + "$prset_pinvarpr" . + # "prset pinvarpr=fixed(0.15);\n" . + "mcmcp minpartfreq=0.02;\n" . # bipartitions with freq. less than this are not used in the diagnostics (default is 0.10) + "mcmcp allchains=yes;\n" . + "mcmcp burninfrac=$burnin_frac;\n" . + "mcmcp nchains=$n_temperatures;\n" . + "mcmcp nruns=$n_runs;\n" . + "mcmcp temp=$temperature_gap;\n" . + "mcmcp samplefreq=$sample_freq;\n" . + "mcmcp printfreq=$print_freq;\n" . + # "mcmcp filename=$file_basename;\n" . + "mcmcp checkpoint=yes checkfreq=$chunk_size;\n"; + my $end_piece = "sump;\n" . "sumt;\n" . "end;\n"; + + $self->{mrbayes_block1} = + $begin_piece . $seed_piece . + $middle_piece . "mcmc ngen=$chunk_size;\n" . + $end_piece; + + $self->{mrbayes_block2} = + $begin_piece . $middle_piece . + "mcmc append=yes ngen=$chunk_size;\n" . + $end_piece; + + $self->setup_id_species_map(); + + return $self; +} + +sub run{ + my $self = shift; + + my $chunk_size = $self->{chunk_size}; + my $ngen = $chunk_size; + my $mrbayes_block1 = $self->{mrbayes_block1}; + + open my $fh, ">tmp_mrb1.nex"; + print $fh "$mrbayes_block1"; + close $fh; + + my $mb_output_string = `mb tmp_mrb1.nex`; + $self->{ngens_run} = $ngen; + + my $mc3swap_filename = $self->{file_basename} . ".mc3swap"; + open my $fhmc3, ">$mc3swap_filename"; + print $fhmc3 "$ngen ", $self->extract_swap_info($mb_output_string), "\n"; + + open $fh, ">mb1.stdout"; + print $fh "$mb_output_string \n"; + close $fh; + + my ($converged, $conv_string) = $self->test_convergence($self->{file_basename}); + my $converge_count += $converged; + open my $fhc, ">MB.converge"; + print $fhc "$ngen $converge_count $conv_string\n"; + + foreach (my $i=1; $i>0; $i++) { # infinite loop + $ngen += $chunk_size; + my $mrbayes_block2 = $self->{mrbayes_block2}; + $mrbayes_block2 =~ s/ngen=\d+;/ngen=$ngen;/; # subst in the new ngen + + open $fh, ">tmp_mrb2.nex"; + print $fh "$mrbayes_block2"; + close $fh; + + $mb_output_string = `mb tmp_mrb2.nex`; + + $self->{ngens_run} = $ngen; + + print $fhmc3 "$ngen ", $self->extract_swap_info($mb_output_string), "\n"; + open $fh, ">mb2.stdout"; + print $fh "$mb_output_string \n"; + close $fh; + + ($converged, $conv_string) = $self->test_convergence($self->{file_basename}); + $converge_count += $converged; + print $fhc "$ngen $converge_count $conv_string\n"; + last if($converge_count >= $self->{converged_chunks_required}); + } + close $fhmc3; close $fhc; + return; +} + + +sub splits_convergence{ + my $self = shift; + my $file_basename = shift; # e.g. fam9877 + my $min_hits = $self->{splits_min_hits}; # ignore splits with fewer hits than this. + my $max_ok_stddev = $self->{splits_max_ok_stddev}; # convergence is 'ok' for a split if stddev < this. + my $max_ok_avg_stddev = $self->{splits_max_ok_avg_stddev}; # convergence is 'ok' for a split if stddev < this. + #print "in splits convergence file basename: ", $file_basename, "\n"; #exit; + + my $filename = $file_basename . ".nex.tstat"; + # print "filename: $filename\n"; + open my $fh, "<$filename"; + my @lines = <$fh>; + + my ($avg_stddev, $count, $bad_count) = (0, 0, 0); + foreach (@lines) { + # print; + next unless(/^\s*\d/); # skip if first non-whitespace is not numeral. + my @cols = split(" ", $_); + my $hits = $cols[1]; my $stddev = $cols[3]; + # print "$hits, $min_hits, $stddev\n"; + last if($hits < $min_hits); + $count++; + $avg_stddev += $stddev; + if ($stddev > $max_ok_stddev) { + $bad_count++; + next; + } + } + $avg_stddev = ($count == 0)? 100 : $avg_stddev/$count; + my $splits_converged = ($bad_count == 0 and $avg_stddev < $max_ok_avg_stddev); + return ($splits_converged, $count, $bad_count, $avg_stddev); +} + + +sub modelparam_convergence{ # look at numbers in *.pstat file + # to test convergence + my $self = shift; + my $file_basename = shift; + my $min_ok_ESS = $self->{modelparam_min_ok_ESS}; + my $max_ok_PSRF = $self->{modelparam_max_ok_PSRF}; + my $max_ok_KSD = $self->{modelparam_max_ok_KSD}; + my $string = ''; + my $ngens_skip = int($self->{burnin_frac} * $self->{ngens_run}); + + open my $fh, "<$file_basename.nex.pstat"; + my @lines = <$fh>; + close $fh; + my $discard = shift @lines; + my $count_param_lines = 0; + my $KSD_datacol = 1; + my $LL_KSD = $self->KSDmax($ngens_skip, 1); + my $n_bad = ($LL_KSD <= $max_ok_KSD)? 0 : 1; + my @KSDmaxes = ($LL_KSD); + # $n_bad++ if($LL_KSD > $self->{modelparam_max_ok_KSD}); # require LogL just to pass KSD test + foreach (@lines) { + my @cols = split(" ", $_); + my ($avgESS, $PSRF) = @cols[7, 8]; # col 6 is the min ESS among the runs, col 7 is avg. + next unless($avgESS =~ /^\d*[.]?\d+/ and $PSRF =~ /^\d*[.]?\d+/); + $KSD_datacol++; # 2,3,4,... the params in pstat file are in cols 2,3,4,... in *.run?.p + my $KSDmax = $self->KSDmax($ngens_skip, $KSD_datacol); + push @KSDmaxes, $KSDmax; + $string .= "$avgESS $PSRF "; + if ($avgESS < $min_ok_ESS + or $PSRF > $max_ok_PSRF + or $KSDmax > $max_ok_KSD) { + $n_bad++; + } + } + $string .= join(" ", map sprintf("%5.3f", $_), @KSDmaxes); # join(" ", @KSDmaxes); + return ($n_bad, $string); +} + +sub test_convergence{ + my $self = shift; + my $file_basename = shift; # e.g. fam9877.nex + + my ($splits_converged, $splits_count, $splits_bad_count, $splits_avg_stddev) = + $self->splits_convergence($file_basename); + my ($modelparam_n_bad, $modelparam_string) = + $self->modelparam_convergence($file_basename); + my $ngens_skip = int($self->{burnin_frac} * $self->{ngens_run}); + + my $conv_string = "$splits_count $splits_bad_count $splits_avg_stddev " . + " $modelparam_string $modelparam_n_bad "; + + my $converged = ($splits_converged and $modelparam_n_bad == 0); + + return ($converged? 1 : 0, $conv_string); +} + + +sub extract_swap_info{ + my $self = shift; + my $mb_stdout_string = shift; + my @mb_stdout_lines = split("\n", $mb_stdout_string); + my $n_lines_to_extract = 0; + my $extract_next_n = 0; + my $n_runs = undef; + my $n_chains = undef; + my $out_string = ''; + foreach (@mb_stdout_lines) { + if (/number of chains to (\d+)/) { + $n_chains = $1; + $n_lines_to_extract = $n_chains + 4; + last if(defined $n_runs); + } elsif (/number of runs to (\d+)/) { + $n_runs = $1; + last if(defined $n_chains); + } + + } + my $run; + my %run_string = (); + foreach (@mb_stdout_lines) { + if (/Chain swap information for run (\d+)/) { + $run = $1; + $extract_next_n = $n_lines_to_extract; + } + $out_string .= "$_\n" if($extract_next_n > 0); + $extract_next_n--; + if ($extract_next_n == 0) { + $run_string{$run} = $out_string; + $out_string = ''; + last if($run == $n_runs); + } + } + $out_string = ''; + + foreach (keys %run_string) { + #print "$_ ", $run_string{$_}, "\n"; + my @lines = split("\n", $run_string{$_}); + splice @lines, 0, 4; + # print join("\n", @lines); + + my %ij_swap_pA = (); + my %ij_swap_tries = (); + foreach my $i (1..$n_chains) { + my $l = $lines[$i-1]; + $l =~ s/^\s*\d+\s+[|]\s+//; + my @xs = split(" ", $l); + my $n_ntry = $i-1; + my $n_pA = $n_chains-$i; + foreach my $j (1..$n_ntry) { + # print "swap_tries key: [$i $j]\n"; + $ij_swap_tries{"$i $j"} = shift @xs; + } + foreach (1..$n_pA) { + my $j = $_ + $i; + # print "swap_pA key: [$j $i]\n"; + $ij_swap_pA{"$j $i"} = shift @xs; + } + } # loop over chains + my %ij_swap_accepts = (); + # my @sijs = sort {$a cmp $b} keys %ij_swap_tries; + # foreach (@sijs) { + + foreach my $diff (1..$n_chains-1) { + foreach my $i (1..$n_chains-1) { + my $j = $i + $diff; + + last if($j > $n_chains); + my $key = "$j $i"; + # print "i,j: $i, $j, key: [$key] \n"; + if (exists $ij_swap_pA{$key} and exists $ij_swap_tries{$key}) { + $ij_swap_accepts{$key} = $ij_swap_tries{$key} * $ij_swap_pA{$key}; + $out_string .= int($ij_swap_accepts{$key}+0.5) . " " . $ij_swap_tries{$key} . " "; + } else { + warn "key $key present in neither ij_swap_tries nor ij_swap_pA.\n"; + } + } + $out_string .= ' '; + } + $out_string .= ' '; + } # loop over runs + return $out_string; +} + + +sub KSDmax{ # This does all pairwise comparisons between runs + # for one of the params in the *.run?.p files + # and returns the largest Kolmogorov-Smirnov D + my $self = shift; + + my $ngen_skip = shift || 0; + my $datacol = shift; # data column to use. + $datacol = 1 if(!defined $datacol); + + my $bigneg = -1e300; + + my $file_basename = $self->{alignment_nex_filename}; # e.g. fam9877 + # store data in hashes + my @val_count_hrefs = ({}, {}); # + my @counts = (0, 0); + my @files = `ls $file_basename.run?.p`; + my $runs_to_analyze = scalar @files; + warn "in KSDmax. n_runs: ", $self->{n_runs}, " *.p files found: ", $runs_to_analyze, " should agree, using min of the two.\n" if($self->{n_runs} != $runs_to_analyze); + $runs_to_analyze = min($runs_to_analyze, $self->{n_runs}); + + foreach my $irun (1..$runs_to_analyze) { + my $i = $irun - 1; + my $filename = "$file_basename.run" . $irun . ".p"; + open my $fh, "<$filename"; + while (<$fh>) { + my @cols = split(" ", $_); + # skip non-numerical stuff. + next unless($cols[0] =~ /^\d+$/); + my ($ngens, $x) = @cols[0,$datacol]; + next if($ngens < $ngen_skip); + $val_count_hrefs[$i]->{$x}++; + $counts[$i]++; + } + close $fh; + } + + # get cumulative distributions: + my @val_cumeprob_hrefs = (); + foreach my $i (0..$runs_to_analyze-1) { + push @val_cumeprob_hrefs, cumulative_prob($val_count_hrefs[$i], $counts[$i]); + } + + my @KSDs = (); # Kolmogorov-Smirnov D for each pairwise comparison between runs + foreach my $i (0..scalar @files - 2) { + foreach my $j ($i+1..scalar @files - 1) { + push @KSDs, Kolmogorov_Smirnov_D(@val_cumeprob_hrefs[$i,$j]); + } + } + return max(@KSDs); +} + + + +sub retrieve_param_samples{ + # read data from run?.p files + # store in + my $self = shift; + my $pattern = shift || $self->{alignment_nex_filename}; # e.g. fam9877.nex + my $p_files = `ls $pattern.run?.p`; + my @p_infiles = split(" ", $p_files); + my $n_runs_p = scalar @p_infiles; + + # the following has one elem for each run, and it is + # a hash ref, with generation numbers as keys, + # parameter strings (logL, TL, alpha ...) as values + my @gen_param_hashes = (); + foreach (1..$n_runs_p) { + push @gen_param_hashes, {}; + } + #my $p_run = 1; + foreach my $i_run_p (1..$n_runs_p) { + my $p_file = "$pattern.run$i_run_p.p"; + open my $fhp, "<$p_file"; + + while (my $line = <$fhp>) { + chomp $line; + next unless($line =~ /^\s*(\d+)/); + # print "$line \n"; + my @cols = split(" ", $line); + my $generations = shift @cols; + my $param_string = join(" ", @cols); + $gen_param_hashes[$i_run_p-1]->{$generations} = $param_string; + } + $i_run_p++; + } + return \@gen_param_hashes; +} +# end of reading in parameter values + +sub retrieve_topology_samples{ + my $self = shift; +my $pattern = shift || $self->{alignment_nex_filename}; # e.g. fam987?.nex + + my $t_files = `ls $pattern.run?.t`; + my @t_infiles = split(" ", $t_files); + my $n_runs = scalar @t_infiles; + my @gen_ntopo_hashes = (); + foreach (1..$n_runs) { + push @gen_ntopo_hashes, {}; + } +my %newick_number_map = (); +my %number_newick_map = (); + my $topology_count = 0; + foreach my $i_run (1..$n_runs) { + my $t_infile = "$pattern.run$i_run.t"; + open my $fh, "<$t_infile"; + + # read trees in, remove branch lengths, store in array + + while (my $line = <$fh>) { + chomp $line; + if ($line =~ s/tree gen[.](\d+) = .{4}\s+//) { + my $newick = $line; + my $generation = $1; + $newick =~ s/:[0-9e\-.]*(,|[)])/$1/g; # remove branch lengths + #print "[$newick]\n"; + $newick =~ s/^\s+//; + $newick =~ s/;\s*//; + $newick = order_newick($newick); + # print $newick, "\n"; + # exit; + if (!exists $newick_number_map{$newick}) { + $topology_count++; + $newick_number_map{$newick} = $topology_count; # 1,2,3,... + $number_newick_map{$topology_count} = $newick; + } + $gen_ntopo_hashes[$i_run-1]->{$generation} = $newick_number_map{$newick}; + } + } # now $gen_ntopo_hashes[$i_run] is hash ref with generations as keys, and topology numbers as values. + } +return (\@gen_ntopo_hashes, \%newick_number_map, \%number_newick_map); +} + + +sub retrieve_number_id_map{ + my $self = shift; + my $pattern = shift || $self->{alignment_nex_filename}; # e.g. fam9877.nex + + my $trprobs_file = "$pattern.trprobs"; + open my $fh, "<$trprobs_file"; + + my %number_id_map = (); + while (my $line = <$fh>) { + last if($line =~ /^\s*tree\s+tree_/); + if ($line =~ /^\s*(\d+)\s+(\S+)/) { + my $number = $1; + my $id = $2; + $id =~ s/[,;]$//; # delete final comma + $number_id_map{$number} = $id; + } + } + return \%number_id_map; +} + + +sub count_topologies{ +my $self = shift; +my $gen_ntopo_hrefs = shift; +my %topo_count = (); # key: topology number, value: arrayref with counts in each run +# e.g. $topo_count{13} = [3,5] means that topo 13 occurred 3 times in run1, 5 times in run 2. +my $total_trees = 0; +foreach my $i_run (1..scalar @$gen_ntopo_hrefs) { + my $gen_ntopo = $gen_ntopo_hrefs->[$i_run-1]; + my $trees_read_in = scalar keys %{$gen_ntopo}; + print "Run: $i_run. Trees read in: $trees_read_in\n"; + # store trees from array in hash, skipping burn-in + my $n_burnin = int($self->{burnin_frac} * $trees_read_in); +# print "trees read in: $trees_read_in. Post burn-in: ", $trees_read_in - $n_burnin, "\n"; + my @sorted_generations = sort {$a <=> $b} keys %{$gen_ntopo}; + foreach my $i_gen (@sorted_generations[$n_burnin..$trees_read_in-1]) { + my $topo_number = $gen_ntopo->{$i_gen}; + if (!exists $topo_count{$topo_number}) { + my @zeroes = ((0) x scalar @$gen_ntopo_hrefs); + $topo_count{$topo_number} = \@zeroes; + } + $topo_count{$topo_number}->[$i_run-1]++; + $total_trees++; + } + $i_run++; +} +return (\%topo_count, $total_trees); +} + +sub restore_ids_to_newick{ + my $self = shift; + my $newick = shift; + my $number_id_map = shift; + + foreach my $number (keys %$number_id_map) { + my $id = $number_id_map->{$number}; +$id .= '[species=' . $self->{id_species_map}->{$id} . ']'; + $newick =~ s/([(,])$number([,)])/$1$id$2/; + } + return $newick; +} + +sub setup_id_species_map{ +my $self = shift; +my $file = $self->{alignment_nex_filename}; +#$file =~ s/[.]nex/.fasta/; +open my $fh, "<$file"; +while (my $line = <$fh>){ + # print $line; + next unless($line =~ /^(\S+)\[species=(\S+)\]/); + my $id = $1; +my $species = $2; +#print $line; + $self->{id_species_map}->{$id} = $species; + print "$id $species \n"; +} +#exit; +return; +} + +my $bigneg = -1e300; + +# given a set of numbers (some of which may occur more than once), which +# are stored as keys in a hash, with the values being how many times they occur +# or more generally whatever weights you want, sort the keys and get the +# cumulative distribution. +sub cumulative_prob{ + my $val_weight_href = shift; # hashref, key: numbers, values: weights. + my $sum_weights = shift; + my $val_cumeprob_href = { $bigneg => 0}; + my $cume_prob = 0; + foreach (sort {$a <=> $b} keys %{$val_weight_href}) { + $cume_prob += $val_weight_href->{$_}/$sum_weights; + $val_cumeprob_href->{$_} = $cume_prob; + # print "$i $_ $cume_prob ", $val_count_hashes[$i]->{$_}, "\n"; + } + # print "\n"; + return $val_cumeprob_href; +} + + +sub Kolmogorov_Smirnov_D{ +# get the maximum difference between two empirical cumulative distributions. +# Arguments are two hashrefs, each representing an empirical cumulative distribution. +# Each key is a data point (a real number), and the corresponding hash value is +# the proportion of data pts <= to it. So the largest value should be 1. + my $val_cumeprob1 = shift; + my $val_cumeprob2 = shift; + my @sorted_vals1 = sort {$a <=> $b} keys %{$val_cumeprob1}; + my @sorted_vals2 = sort {$a <=> $b} keys %{$val_cumeprob2}; + + my ($i1, $i2) = (0, 0); + + my $size1 = scalar @sorted_vals1; + my $size2 = scalar @sorted_vals2; + + my $D = 0; + while(1){ + my ($xlo1, $xhi1) = ($sorted_vals1[$i1], $sorted_vals1[$i1+1]); + my ($xlo2, $xhi2) = ($sorted_vals2[$i2], $sorted_vals2[$i2+1]); + die "$xlo1 > $xhi2 ??\n" if($xlo1 > $xhi2); + die "$xlo2 > $xhi1 ??\n" if($xlo2 > $xhi1); + + my ($cume_prob1, $cume_prob2) = ($val_cumeprob1->{$xlo1}, $val_cumeprob2->{$xlo2}); + my $abs_diff = abs($cume_prob1 - $cume_prob2); + $D = $abs_diff if($abs_diff > $D); + if($xhi1 <= $xhi2){ + $i1++; + }elsif($xhi2 <= $xhi1){ + $i2++; + }else{ + die "$xhi1 xhi2 should be numerical.\n"; + } + last if($i1 == $size1-1); + last if($i2 == $size2-1); + } + return $D; +} + +# operates on a newick of form (3,(6,4)) +# i.e. no whitespace, no branch lengths, ids must be numbers. +# so just parens, commas and numbers +# puts the leaves in order, such that at each node the +# subtree with smaller value is on left. The value of an +# internal node is the min of the values of the two child +# nodes, and the value of a leave is its id, which must be a number. +sub order_newick{ + my $newick = shift; + if ($newick =~ /^(\d+)$/) { # subtree is leaf! + return ($1, $newick); + } else { # subtree has > 1 leaf. + my %label_newick = (); + $newick =~ /^[(](.*)[)]$/; + my @newick_chars = split('',$1); # without surrounding () + my $lmr_paren_count = 0; + my ($il, $ir) = (0, 0); + my $n_chars = scalar @newick_chars; + my $min_label = 10000000; + foreach (@newick_chars) { + die "$_ ", $newick_chars[$ir], " not same!\n" if($_ ne $newick_chars[$ir]); + if ($_ eq '(') { + $lmr_paren_count++; + } + if ($_ eq ')') { + $lmr_paren_count--; + } + + if (($ir == $n_chars-1) or ($_ eq ',' and $lmr_paren_count == 0)) { #split + my $ilast = ($ir == $n_chars-1)? $ir : $ir-1; + my $sub_newick = join('', @newick_chars[$il..$ilast]); + # print "subnewick $sub_newick\n"; + my ($label, $ordered_subnewick) = order_newick($sub_newick); + $label_newick{$label} = $ordered_subnewick; + $min_label = min($min_label, $label); + $il = $ir+1; $ir = $il; # skip the ',' + } else { + $ir++; + } + } # loop over chars in @newick_chars + my $ordered_newick = ''; + foreach (sort {$a <=> $b} keys %label_newick) { + $ordered_newick .= $label_newick{$_} . ","; + } + $ordered_newick =~ s/,$//; + $ordered_newick = '(' . $ordered_newick . ')'; + return ($min_label, $ordered_newick); + } + die "shouldnt get here, in order_newick\n"; +} + + +1; + + diff --git a/lib/CXGN/Phylo/Node.pm b/lib/CXGN/Phylo/Node.pm index 2fb36430..01b69967 100644 --- a/lib/CXGN/Phylo/Node.pm +++ b/lib/CXGN/Phylo/Node.pm @@ -29,7 +29,7 @@ use CXGN::Phylo::Species_name_map; use CXGN::Phylo::Tree; use CXGN::Phylo::Label; use CXGN::Page::FormattingHelpers qw/tooltipped_text/; - + =head2 function new() @@ -686,12 +686,13 @@ sub get_standard_species { my $tree = $self->get_tree(); if (defined $tree) { my $species_standardizer = $tree->get_species_standardizer(); - if (defined $species_standardizer) { + if (defined $species_standardizer and defined ($species_standardizer->get_standard_name($species)) ) { $species = $species_standardizer->get_standard_name($species); - # print "species standardizer branch. species: $species \n"; + # print "species standardizer branch. species: [$species] \n"; } else { - $species = CXGN::Phylo::Species_name_map::to_standard_format($species); # just e.g. solanum lycopersicum -> Solanum_lycopersicum - # print "to_standard_format branch: $species \n"; +# print "species: [$species]\n"; + $species = CXGN::Phylo::Species_name_map->to_standard_format($species, 1); # just e.g. solanum lycopersicum -> Solanum_lycopersicum +# print "to_standard_format branch: [$species] \n"; } } return $species; @@ -999,13 +1000,14 @@ sub set_hidden { =head2 function recursive_propagate_properties() - Synopsis: - Arguments: + Synopsis: $root->recursive_propagate_properties('hidden'); + Arguments: A list of properties to be propagated. e.g. +'hidden', but if no list given then hidden and hilited are propagated. Returns: Side effects: Description: recursively propagates certain attributes to the children nodes. - currently, the propagated properties are: + currently, the default propagated properties are: hidden, hilited =cut @@ -1016,15 +1018,24 @@ sub recursive_propagate_properties { my $hidden = $self->get_hidden(); my $hilited = $self->get_hilited(); + my @properties = @_; + unless (@properties){ @properties = ('hidden', 'hilited') } + # print STDERR "X [", scalar @properties, "] ", join("; ", @properties), "\n"; my @children = $self->get_children(); foreach my $c (@children) { - if ($hidden) { + foreach my $prop (@properties){ + + if (($prop eq 'hidden') and $hidden) { $c->set_hidden($hidden); - } - if ($hilited) { + }elsif (($prop eq 'hilited') and $hilited) { +# print STDERR "In rec.propagate_properties.: ", $c->get_name(), " being set hilited\n"; $c->set_hilited($hilited); + }elsif(defined $self->get_attribute($prop)){ + $c->set_attribute($prop, $self->get_attribute($prop)); } - $c->recursive_propagate_properties(); + } +# print STDERR "Y [", scalar @properties, "] ", join("; ", @properties), "\n"; + $c->recursive_propagate_properties(@properties); } } @@ -1740,7 +1751,7 @@ sub recursive_set_implicit_species_bits{ my $bithash = shift; # this gives the bit pattern associated with each species my $species_bits = int 0; my $implicit_species = $self->get_implicit_species(); - # print "implicit species: ", join(" ", @$implicit_species), "\n"; +# print STDERR "\nZZZ. implicit species: ", join(" ", @$implicit_species), "\n"; my $a = int 0; my $b = int 0; foreach (@$implicit_species) { @@ -1749,6 +1760,7 @@ sub recursive_set_implicit_species_bits{ # print STDERR "impl species: [$_], [", $bithash->{$_}, "]\n"; } } +# print STDERR "In rec set impl species bits. nodename: ", $self->get_name(), " species bit pattern: [", $species_bits, "]\n\n"; $self->set_attribute("species_bit_pattern", $species_bits); foreach ($self->get_children()) { $_->recursive_set_implicit_species_bits($bithash); @@ -1909,11 +1921,11 @@ sub recursive_copy { =cut sub copy { - my $self = shift; + my $self = shift; # this is the node to be copied my $new_parent = shift; my $new_tree = shift; - my $new = CXGN::Phylo::Node->new(); + my $new = CXGN::Phylo::Node->new(); # the copy if ($new_parent) { $new_parent->add_child_node($new); @@ -1923,7 +1935,7 @@ sub copy { $new->set_tree($new_tree); $new->set_species($self->get_species()); $new->set_label($self->get_label()->copy()); - $new->set_node_key($self->get_node_key()); + $new->set_node_key($self->get_node_key()); # the copy gets the same node key as the original $new->set_hidden($self->get_hidden()); $new->set_hilited($self->get_hilited()); $new->set_horizontal_coord($self->get_horizontal_coord()); @@ -1934,7 +1946,8 @@ sub copy { $new->set_branch_length($self->get_branch_length()); $new->set_attribute("leaf_count", $self->get_attribute("leaf_count")); -$new->set_attribute("leaf_species_count", $self->get_attribute("leaf_species_count")); + $new->set_attribute("leaf_species_count", $self->get_attribute("leaf_species_count")); + $new->set_attribute("species_bit_pattern", $self->get_attribute("species_bit_pattern")); return $new; } @@ -2003,7 +2016,8 @@ sub make_newick_attributes { my $show_all = shift; my $string = ""; # print "In Node::make_newick_attributes. ", join("; ", $self->get_tree()->newick_shown_attributes() ), "\n"; - foreach my $attr ( $self->get_tree()->newick_shown_attributes() ) { + +foreach my $attr ( $self->get_tree()->newick_shown_attributes() ) { # print "in make_newick_attributes. attribute: $attr\n"; my $value = ""; if ($attr eq "species") { # species shown for leaves, or all nodes if $show_all @@ -2359,7 +2373,7 @@ sub recursive_subtree_node_list { =head2 function get_attribute() - Synopsis: my $foo = $node->get_attribte("foo"); + Synopsis: my $foo = $node->get_attribute("foo"); Arguments: the name of the attribute Returns: the value of the attribute named foo. Side effects: none @@ -3139,14 +3153,20 @@ print("in robinson-foulds, root bls: ", $root1->get_branch_length(), " ", $roo sub determine_species_from_name{ my $self = shift; my $str = shift; - my $species = undef; +# my $species = undef; if (!$str) { $str = $self->get_name(); } + my $id_taxon_map = $self->get_tree()->get_id_taxon_map(); + my $species = $id_taxon_map->id_to_taxonname($str); + # print STDERR "string to get species from: ", $str, "\n"; - if ($str =~ /^At/i) { # At.... is Arabidopsis - $species = "Arabidopsis"; +# if ($str =~ /^At/i) { # At.... is Arabidopsis +# $species = "Arabidopsis"; +# } + + if(defined $species){ + # just keep this value of $species } - # for $str of form SGN-U followed by digits,eliminate the SGN-U: elsif ($str =~ /^SGN-{0,1}U(\d+)/i) { # should we require SGN to be initial? @@ -3540,11 +3560,15 @@ sub recursive_compare_species_split{ my $self = shift; # a node of species tree, typically my $a1 = shift; my $a2 = shift; + + if($a1 == 0 or $a2 == 0){ return 0; }; # one subtree has no species found in species tree my @children = $self->get_children(); +# print "ZZZ [", scalar @children, "][", $self->get_attribute("species_bit_pattern"), "]  "; + return int 0 if(scalar @children < 2); # if reach leaf of species tree, no speciation. my $b1 = $children[0]->get_attribute("species_bit_pattern"); my $b2 = $children[1]->get_attribute("species_bit_pattern"); - if (($a1 & ~$b1) == 0) { # a1 species set is subset of b1 species set + if (($a1 & ~$b1) == 0) { # a1 (gene tree) species set is subset of b1 species set if (($a2 & ~$b2) == 0) { return int 1; } elsif (($a2 & ~$b1) == 0) { @@ -3571,8 +3595,11 @@ sub recursive_compare_species_split{ sub collect_orthologs_of_leaf{ my $self = shift; # the leaf to start at my @ortholog_array; + if ($self->get_attribute("species_bit_pattern") == 0){ return @ortholog_array; } my $prev_parent = $self; my $parent = $self->get_parent(); +# return @ortholog_array; +# print "XXX: [", $parent->get_attribute("speciation"), "] "; while (1) { if ($parent->get_attribute("speciation")) { #print join(";", $parent->get_implicit_names()), "\n"; @@ -3585,7 +3612,7 @@ sub collect_orthologs_of_leaf{ $n =~ s/(.*)?\|/$1/; } #print STDERR "implicit names: ", join(" ", @imp_names), "\n"; - @ortholog_array = (@ortholog_array, @imp_names); + @ortholog_array = (@ortholog_array, @imp_names) if ($self->get_attribute("species_bit_pattern") > 0); } } last if($parent->is_root()); @@ -3610,6 +3637,7 @@ sub recursive_collect_max_speciation_nodes{ } } +# just recursively set_hilited for each speciation node. sub recursive_hilite_speciation_nodes{ my $self = shift; # return; diff --git a/lib/CXGN/Phylo/Orthologger.pm b/lib/CXGN/Phylo/Orthologger.pm new file mode 100644 index 00000000..c6022f43 --- /dev/null +++ b/lib/CXGN/Phylo/Orthologger.pm @@ -0,0 +1,250 @@ +package CXGN::Phylo::Orthologger; +use strict; +use List::Util qw ( min max sum ); + +# use Devel::Cycle; # not gonna use this anymore? + +my $default_arg_href = { + 'reroot_method' => 'none', # choices are mindl maxmin minmax minvar + 'gene_tree' => undef, + 'gene_tree_newick' => undef, + 'species_tree' => undef, # tree object. + 'species_name_map' => undef, + 'query_species' => undef, + 'query_id_regex' => undef + }; + +sub new { + my $class = shift; + my $arg_href = shift; + + my $self = bless {}, $class; + + # initialize parameters to defaults. + foreach (keys %$default_arg_href) { + $self->{$_} = $default_arg_href->{$_}; + } + # reset any parameters specified in argument hash ref. + foreach (keys %$arg_href) { +# print STDERR "param => value: $_ => ", $arg_href->{$_}, "\n"; + $self->{$_} = $arg_href->{$_}; + } + $self->set_species_name_map(CXGN::Phylo::Species_name_map->new()) unless(defined $self->get_species_name_map()); + + # get the gene tree + die "Must supply either a gene tree object, or gene tree newick string.\n" + if (!defined $self->get_gene_tree() and !defined $self->get_gene_tree_newick()); + my $do_set_error = 0; + my $gene_tree; + if (defined $self->get_gene_tree()) { + # print "gene tree branch \n"; + $gene_tree = $self->get_gene_tree(); + } else { + my $gene_tree_newick = $self->get_gene_tree_newick(); + $gene_tree = CXGN::Phylo::Parse_newick->new($gene_tree_newick, $do_set_error)->parse(); + $gene_tree->show_newick_attribute('species'); + if (!$gene_tree) { + die "Gene tree. Parser_newick->parse() failed to return a tree object.\n Newick string: ". $gene_tree_newick . "\n"; + } + $self->set_gene_tree($gene_tree); + } + my $species_tree = $self->get_species_tree(); + # tweak the gene tree object: + $gene_tree->impose_branch_length_minimum(); + $gene_tree->get_root()->recursive_implicit_names(); + $gene_tree->get_root()->recursive_implicit_species(); + # if species is empty, (because no [species=x] in newick), get it from the name... + $gene_tree->set_missing_species_from_names(); + + $gene_tree->set_show_standard_species(1); + $gene_tree->update_label_names(); + $gene_tree->set_species_standardizer($self->get_species_name_map()); + + # now tweak species tree: + $species_tree->set_missing_species_from_names(); # get species from name if species undef + $species_tree->impose_branch_length_minimum(); + $species_tree->collapse_tree(); + $species_tree->get_root()->recursive_implicit_names(); + $species_tree->get_root()->recursive_implicit_species(); + + my $spec_bit_hash = $self->get_species_bithash(); + + $gene_tree->get_root()->recursive_set_implicit_species_bits($spec_bit_hash); + $species_tree->get_root()->recursive_set_implicit_species_bits($spec_bit_hash); + + + $species_tree->set_show_standard_species(1); + $species_tree->update_label_names(); + $species_tree->set_species_standardizer($self->get_species_name_map()); + +#print STDERR "in Orthogger->new. About to call reroot. \n"; + + $self->reroot(); + + $gene_tree->get_root()->recursive_set_leaf_species_count(); + $gene_tree->get_root()->recursive_set_leaf_count(); + $gene_tree->get_root()->recursive_implicit_species(); + $gene_tree->get_root()->recursive_set_implicit_species_bits($spec_bit_hash); + + my $root_spec = $gene_tree->get_root()->speciation_at_this_node($species_tree); + if ($root_spec < 0) { + # if trifurcation at root, reroot to one of the neighboring branches + # if this will yield a speciation at root... + my $cn = ($gene_tree->get_root()->get_children())[$root_spec + 3]; + my $bl = $cn->get_branch_length(); + $gene_tree->reset_root_to_point_on_branch($cn, 0.5*$bl); + + $gene_tree->get_root()->recursive_implicit_species(); + $gene_tree->get_root()->recursive_set_implicit_species_bits($spec_bit_hash); + } + $gene_tree->get_root()->recursive_implicit_names(); + $gene_tree->get_root()->recursive_set_speciation($species_tree); + # $gene_tree->show_newick_attribute("species"); # should work now + $gene_tree->show_newick_attribute("speciation"); + return $self; +} # end of constructor + +sub reroot{ + my $self = shift; + my $gene_tree = $self->get_gene_tree(); + my $species_tree = $self->get_species_tree(); + my $reroot_method = $self->get_reroot_method(); + # print "top of reroot. reroot method: $reroot_method ; \n", $gene_tree->generate_newick(), "\n"; + my @new_root_point = (undef, undef); + if (defined $reroot_method and $reroot_method ne 'none') { + # reset root + if ($reroot_method eq "mindl") { # min duplicate & loss + # $gene_tree->set_branch_lengths_equal(0.0001); + + @new_root_point = $gene_tree->find_mindl_node($species_tree); + die "find_mindl_node failed\n" if(!defined $new_root_point[0]); + } elsif ($reroot_method eq "minvar") { # min variance + @new_root_point = $gene_tree->min_leaf_dist_variance_point(); + } elsif ($reroot_method eq "maxmin") { # max min; max over possible pts in tree (along branches) of min over nodes of pt-node distance. + @new_root_point = $gene_tree->find_point_furthest_from_leaves(); + } elsif ($reroot_method eq "minmax" or $reroot_method eq "midpoint") { # min max, aka midpoint (midpoint of longest leaf-leaf path) + @new_root_point = $gene_tree->find_point_closest_to_furthest_leaf(); + } + $gene_tree->reset_root_to_point_on_branch(@new_root_point); + $gene_tree->get_root()->recursive_implicit_names(); # needed after rerooting? + } + return $gene_tree; +} + +sub ortholog_result_string{ + my $self = shift; + my $gene_tree = $self->get_gene_tree(); + my $ortholog_str = ''; + my $leaf_ortholog_hashref = $gene_tree->leaf_ortholog_table(); + foreach my $leafname (keys %$leaf_ortholog_hashref) { + $ortholog_str .= "orthologs of " . $leafname . ": "; + my @orthologs = @{$leaf_ortholog_hashref->{$leafname}}; + $ortholog_str .= join(" ", @orthologs) . "\n"; + } + $ortholog_str .= "Leaves not in species tree: " . join(" ", keys %{$self->get_gene_tree()->non_speciestree_leafnode_names()}) . "\n"; + return $ortholog_str; +} # end of ortholog_result_string + +sub get_species_bithash{ # get a hash giving a bit pattern for each species which is in both $gene_tree and $spec_tree + my $self = shift; + return $self->get_gene_tree()->get_species_bithash($self->get_species_tree()); +} # end get_species_bithash + +sub get_expanded_subtrees{ + my $self = shift; + my $node = shift; + my $treename = shift || 'Default_tree_name'; + if (!defined $node->get_leaf_species_count()) { + $node->recursive_set_leaf_species_count(); + } + my $expansion_report_string = ''; + my $leaf_species_count = $node->get_attribute("leaf_species_count"); + my $leaf_count = $node->get_attribute("leaf_count"); + + if (($leaf_species_count==1) && ($leaf_count>1)) { + my $sp = $node->recursive_get_a_leaf()->get_standard_species(); + $expansion_report_string = "tree $treename has expansion of $leaf_count for species $sp\n"; + return $expansion_report_string; + } + foreach my $c ($node->get_children()) { + $expansion_report_string .= $self->get_expanded_subtrees($c, $treename); + } + return $expansion_report_string; +} + +sub decircularize{ +# call before Orthologger obj goes out of scope +# so it can be garbage collected, to avoid memory leak. +my $self = shift; +$self->set_species_tree(undef); +$self->get_gene_tree()->decircularize(); +$self->set_gene_tree(undef); +} + + +#accessors: + +sub get_gene_tree_newick{ + my $self = shift; + return $self->{gene_tree_newick}; +} +sub set_gene_tree_newick{ + my $self = shift; + $self->{gene_tree_newick} = shift; +} + +sub get_gene_tree{ + my $self = shift; + return $self->{gene_tree}; +} +sub set_gene_tree{ + my $self = shift; + $self->{gene_tree} = shift; +} + +sub get_species_tree{ + my $self = shift; + return $self->{species_tree}; +} +sub set_species_tree{ + my $self = shift; + $self->{species_tree} = shift; +} + +sub get_reroot_method{ + my $self = shift; + return $self->{reroot_method}; +} +sub set_reroot_method{ + my $self = shift; + $self->{reroot_method} = shift; +} + +sub get_species_name_map{ + my $self = shift; + return $self->{species_name_map}; +} +sub set_species_name_map{ + my $self = shift; + $self->{species_name_map} = shift; +} + +sub get_query_species{ + my $self = shift; + return $self->{query_species}; +} +sub set_query_species{ + my $self = shift; + $self->{query_species} = shift; +} + +sub get_query_id_regex{ + my $self = shift; + return $self->{query_id_regex}; +} +sub set_query_id_regex{ + my $self = shift; + $self->{query_id_regex} = shift; +} + +1; diff --git a/lib/CXGN/Phylo/Overlap.pm b/lib/CXGN/Phylo/Overlap.pm new file mode 100644 index 00000000..364722ee --- /dev/null +++ b/lib/CXGN/Phylo/Overlap.pm @@ -0,0 +1,228 @@ +package Overlap; +use strict; +use List::Util qw ( min max sum ); + +my $NO_ID = 'NOT_AN_ACTUAL_ID'; + +sub new { + my $class = shift; + my $arg = shift; # either filename or string with contents of fasta file + my $fraction = shift || 0.8; + my $seed = shift || undef; + my $args= {}; + my $self = bless $args, $class; + + if (defined $seed) { + srand($seed); + } else { + srand(); + } + my @ids = (); + my %id_overlapseq = (); + my %id_sequence = (); + my @lines = (); +# print "arg: $arg\n"; +# print "arg is file? ", -f $arg, "\n"; +# print "XXX: ", $arg =~ /\n/, "\n"; + # if ((! $arg =~ /\n/) and + if (!($arg =~ /\n/) and -f $arg) { # $arg is filename + open my $fhin, "<$arg"; + @lines = <$fhin>; +# print "XXXXXXXlines: ", join("\n", @lines), "\n"; + close $fhin; + + } else { # treat arg as string + @lines = split("\n", $arg); + } + my $id = $NO_ID; + my $sequence = ''; + while (@lines) { + my $line = shift @lines; + # print $line; + if ($line =~ /^>/) { + if ($id ne $NO_ID) { + $id_sequence{$id} = $sequence; + push @ids, $id; + } + $id = $line; + $id =~ s/^>\s*//; + $id =~ s/\s+$//; + $sequence = ''; + } else { + $line =~ s/^\s+//; + $line =~ s/\s+$//; + $sequence .= $line; + } + } + if (! exists $id_sequence{$id} and $sequence ne '') { # take care of the last id-sequence pair. + $id_sequence{$id} = $sequence; + push @ids, $id; + } + $self->{id_seq} = \%id_sequence; + $self->{ids} = \@ids; + + my $n_sequences = scalar @ids; + my $seq_length = length $id_sequence{$ids[0]}; + $self->{align_length} = $seq_length; + $self->{n_sequences} = $n_sequences; + my @position_counts = ((0) x $seq_length); + my @position_aas = (('') x $seq_length); + foreach my $id (@ids) { + my $sequence = $id_sequence{$id}; + my $seql = length $sequence; + die "Non-equal sequence lengths in alignment: $seq_length, $seql. id: $id \nsequence: $sequence\n" if($seql ne $seq_length); + + for (my $i = 0; $i < $seq_length; $i++) { + my $aa = substr($sequence, $i, 1); + if ($aa ne '-') { + $position_counts[$i]++; + if(!($position_aas[$i] =~ /$aa/)){ + $position_aas[$i] .= $aa; # not invariant + } + } + } + } + my $n_invariant = 0; + foreach (@position_aas){ + # print "[$_]\n"; + $n_invariant++ if(length $_ == 1); + } +# print "n_invariant: $n_invariant align length: ", scalar @position_aas, "\n"; +# print "pinv: ", $n_invariant/$seq_length, "\n"; + $self->{position_counts} = \@position_counts; + my $n_required = ($fraction >= 1)? $n_sequences: int ($fraction * $n_sequences) + 1; + $self->{n_required} = $n_required; + my $overlap_length = 0; + my %id_overlapnongapcount = (); +my $overlap_n_invariant = 0; + foreach my $position (0..@position_counts-1) { + my $count = $position_counts[$position]; + if ($count >= $n_required) { + $overlap_length++; + foreach my $id (@ids) { + my $char = substr($id_sequence{$id}, $position, 1); + $id_overlapseq{$id} .= $char; + $id_overlapnongapcount{$id}++ if($char ne '-'); + } + $overlap_n_invariant++ if(length $position_aas[$position] == 1); + } + } +# print "overlap n_invariant: $overlap_n_invariant, length: $overlap_length\n"; +# print "overlap pinv: ", $overlap_n_invariant/$overlap_length, "\n"; + + $self->{id_overlapseq} = \%id_overlapseq; + $self->{id_overlapnongapcount} = \%id_overlapnongapcount; + die "overlap length inconsistency??? $overlap_length \n" if($overlap_length != length $id_overlapseq{$ids[0]}); + $self->{overlap_length} = $overlap_length; + # $self->{ids} = \@ids; + + return $self; +} + + +sub weed_sequences{ + # weed out sequences which have poor overlap with others + my $self = shift; + my $fraction = shift || 0.3; + my $min_nongapcount = int($fraction * $self->{overlap_length}); + #my %id_overlap_count = (); # + my @ids = $self->{id_overlapnongapcount}; + foreach (@ids) { + if ($self->{id_overlapnongapcount}->{$_} < $min_nongapcount) { + # delete this sequence + delete $self->{id_overlapseq}->{$_}; + delete $self->{id_overlapnongapcount} + } + } +} + + +sub align_fasta_string{ + my $self = shift; + my $spacer = shift || ''; + my $align_fasta = ''; + foreach my $id (@{$self->{ids}}) { + my $sequence = $self->{id_seq}->{$id}; + $align_fasta .= ">$spacer$id\n$sequence\n"; + } + chomp $align_fasta; + return $align_fasta; +} + +sub overlap_fasta_string{ + my $self = shift; + my $spacer = shift || ''; + my $overlap_fasta = ''; + foreach my $id (@{$self->{ids}}) { + my $sequence = $self->{id_overlapseq}->{$id}; + $overlap_fasta .= ">$spacer$id\n$sequence\n"; + } + chomp $overlap_fasta; + return $overlap_fasta; +} + +sub overlap_nexus_string{ # basic nexus format string for use by MrBayes. + my $self = shift; + my $n_leaves = scalar @{$self->{ids}}; + my $overlap_length = length ($self->{id_overlapseq}->{$self->{ids}->[0]}); + my $nexus_string = "#NEXUS\n" . "begin data;\n"; + $nexus_string .= "dimensions ntax=$n_leaves nchar=$overlap_length;\n"; + $nexus_string .= "format datatype=protein interleave=no gap=-;\n"; + $nexus_string .= "matrix\n"; + + foreach my $id (@{$self->{ids}}) { + my $sequence = $self->{id_overlapseq}->{$id}; + $id =~ s/[|].*//; +#print "id, nexid: $id $nexid \n"; + + my $id50 = $id . " "; + $id50 = substr($id50, 0, 50); + $nexus_string .= "$id50$sequence\n"; + } + $nexus_string .= "\n;\n\n" . "end;\n"; + return $nexus_string; +} + +sub bootstrap_overlap_fasta_string{ + my $self = shift; + my $spacer = shift || ''; + my %id_bootstrapoverlapseq = (); + my $overlap_length = $self->{overlap_length}; + + my @indices = (); + for (1..$overlap_length) { + my $index = int( rand($overlap_length) ); + push @indices, $index; + # $index_count{$index}++; + } + + for my $id (@{$self->{ids}}) { + my $std_overlap = $self->{id_overlapseq}->{$id}; + my $string = ''; + foreach my $index (@indices) { + $string .= substr($std_overlap, $index, 1); + } + $id_bootstrapoverlapseq{$id} = $string; + } + + my $bofstring = ''; + foreach my $id (@{$self->{ids}}) { + # print "id, seq: $id; $sequence\n"; + my $sequence = $id_bootstrapoverlapseq{$id}; + $bofstring .= ">$spacer$id\n$sequence\n"; + } + chomp $bofstring; + return $bofstring; +} + +sub get_overlap_length{ + my $self = shift; + return $self->{overlap_length}; +} +sub set_overlap_length{ + my $self = shift; + $self->{overlap_length} = shift; +} + + +1; diff --git a/lib/CXGN/Phylo/Parser.pm b/lib/CXGN/Phylo/Parser.pm index 81d4592c..72d07379 100644 --- a/lib/CXGN/Phylo/Parser.pm +++ b/lib/CXGN/Phylo/Parser.pm @@ -112,6 +112,7 @@ $string = $1; for (my $i=0; $i<@tokens; $i++) { $self->set_error(\@tokens, $i) if($self->{do_set_error}); my $t = $tokens[$i]; + next unless ($t =~ /\S/); # skip tokens with only whitespace if ($t eq "(") { #print STDERR "Encountered (. Creating a new child node [parent=".$current_node->get_name()."\n"; my $child = $current_node->add_child(); diff --git a/lib/CXGN/Phylo/Renderer.pm b/lib/CXGN/Phylo/Renderer.pm index ab55806e..6d32aaa0 100644 --- a/lib/CXGN/Phylo/Renderer.pm +++ b/lib/CXGN/Phylo/Renderer.pm @@ -1,772 +1,791 @@ -=head1 Package CXGN::Phylo::Text_tree_renderer - -Abstract_tree_renderer - an interface to define tree renderers - -=head1 DESCRIPTION - -=head1 AUTHOR - -Lukas Mueller (lam87@cornell.edu) - -=cut - -=head1 Package Abstract_tree_renderer - -This class essentially defines an interface for tree renderers. -It contains two functions: new() and render(). -Additional functions can be defined as necessary in the derived classes. - -=cut - -package CXGN::Phylo::Abstract_tree_renderer; - -use GD; -GD::Image->trueColor(1); - -=head2 function new() - - Synopsis: abstract class from which tree renderer can be implemented. - Arguments: - Returns: - Side effects: - Description: - -=cut - -sub new { - my $class = shift; - my $tree = shift; - my $args = {}; - my $self = bless $args, $class; - - # THE FOLLOWING WAS MOVED TO THE TREE OBJECT: - # set a default layout object, which is Layout - # (the tree is rendered from left to right) - # - #my $layout_object = CXGN::Phylo::Layout->new($tree); - #$self->set_layout($layout_object); - - $self->set_tree($tree); - return $self; -} - -sub set_tree { - my $self = shift; - $self->{tree}=shift; -} - -sub get_tree { - my $self = shift; - return $self->{tree}; -} - -# =head2 function get_layout() - -# Synopsis: -# Arguments: -# Returns: -# Side effects: -# Description: Accessor for the layout object for this renderer. - -# =cut - -# sub get_layout { -# my $self=shift; -# return $self->{layout}; -# } - -# =head2 function set_layout() - -# Synopsis: -# Arguments: an instance of a Layout object or a subclass thereof. -# Returns: -# Side effects: the layout function determines how the tree is laid out, -# the standard way is the root node is placed on the left -# and the children on the right. Other layout objects will -# lay out trees with other orientations. -# Description: setter function for the layout object for this renderer. - -# =cut - -# sub set_layout { -# my $self=shift; -# $self->{layout}=shift; -# } - - - - -sub render { - die "You are attempting to use the abstract class Abstract_tree_renderer. Please subclass.\n"; -} - -package CXGN::Phylo::Text_tree_renderer; - -use base qw/ CXGN::Phylo::Abstract_tree_renderer /; - -sub new { - my $class = shift; - my $self = $class->SUPER::new(@_); - return $self; -} - -=head2 function render() - - Synopsis: - Arguments: - Returns: - Side effects: - Description: a simple rendering of the tree, output to the STDOUT. - -=cut - -sub render { - my $self = shift; - $self->get_tree()->get_root()->recursive_text_render(); -} - -=head1 Package CXGN::Phylo::PNG_tree_renderer - -Renders tree as PNG - -=cut - -package CXGN::Phylo::PNG_tree_renderer; - -use base qw/ CXGN::Phylo::Abstract_tree_renderer /; - -=head2 function new() - - Synopsis: - Arguments: a tree object - Returns: An instance of a PNG_tree_renderer object. - Side effects: Sets some defaults for drawing the tree. - These can be overridden by calling the corresponding - setter functions. - Description: - -=cut - -sub new { - my $class = shift; - my $self = $class -> SUPER::new(@_); - $self->set_font(GD::Font->Small()); - $self->set_transparent(0); - - $self->{bl_labels} = []; #need to keep array of these for image map - - return $self; -} - -=head2 function get_font() - - Synopsis: - Arguments: - Returns: - Side effects: - Description: - -=cut - -sub get_font { - my $self=shift; - return $self->{font}; -} - -=head2 function set_font() - - Synopsis: $r->set_font($font) - Arguments: a GD font - Returns: nothing - Side effects: $font will be used for rendering the tree labels - Description: - -=cut - -sub set_font { - my $self=shift; - $self->{font}=shift; -} - -=head2 function get_font_width - - Synopsis: - Arguments: - Returns: - Side effects: - Description: - -=cut - -sub get_font_width { - my $self=shift; - return $self->get_font()->width(); -} - -=head2 function get_font_height - - Synopsis: - Arguments: - Returns: - Side effects: - Description: - -=cut - -sub get_font_height { - my $self=shift; - return $self->get_font()->height(); -} - -=head2 function get_show_branch_length() - - Synopsis: - Arguments: - Returns: - Side effects: - Description: - -=cut - -sub get_show_branch_length { - my $self=shift; - return $self->{show_branch_length}; -} - -=head2 function set_show_branch_length() - - Synopsis: - Arguments: - Returns: - Side effects: - Description: - -=cut - -sub set_show_branch_length { - my $self=shift; - $self->{show_branch_length}=shift; -} - -sub hide_alignment { - my $self = shift; - $self->{hide_alignment} = 1; -} - -sub show_alignment { - my $self = shift; - $self->{hide_alignment} = 0; -} - -sub set_transparent { - my $self = shift; - $self->{transparent} = shift; -} - -sub get_transparent { - my $self = shift; - return $self->{transparent}; -} - -=head2 function render() - - Synopsis: - Arguments: (optional) a boolean for printing labels for all nodes (default- label is printed only for leaves, unless get_hide_label() = true ) - Returns: a png image - Side effects: creates a GD::Image object and renders the tree on it. - Description: draws the tree - -=cut - -sub render { - my $self = shift; - my $print_all_labels=shift; -# GD::Image->trueColor(1); - # initialize image - my $tree = $self->get_tree; - my $layout = $tree->get_layout; - $layout->layout(); - my ($phylo_width, $phylo_height) = ($layout->get_image_width, $layout->get_image_height); - - - my $image = GD::Image->new($phylo_width,$phylo_height, 1); - my $white = $image->colorResolve(255,255,255); -# if($self->{transparent}){ -# $image->transparent($white); -# } - $image->filledRectangle(0 , 0, $phylo_width, $phylo_height, $white); - $tree->get_root()->recursive_propagate_properties(); - # get the font - # - my $font = GD::Font->Small(); - if ($layout->get_vertical_gap()<12) { - $font= GD::Font->Tiny(); - } - $self->set_font($font); - # percolate font information to all nodes - foreach my $n ($tree->get_all_nodes()) { - $n->get_label()->set_font($self->get_font); - } - - my $color = $image->colorResolve($tree->get_line_color()); - - if ($tree->get_alignment && !$self->{hide_alignment}) { - my $alignment = $tree->get_alignment; - my $partition_width; - if($alignment->{label_shown}){ - $partition_width = $phylo_width/3; - } - else { - $partition_width = $phylo_width/2; - } - print STDERR "Showing Alignment...\n"; - - #Calculate node coordinates: - $layout->set_image_width($partition_width); - $layout->layout(); - - #Set Alignment Image attributes - - - $alignment->set_image($image); - $alignment->set_display_type("alignment"); #alignment and ruler only - $alignment->set_left_margin($partition_width+5); - my $label_spacer = 5; - my $gap = $label_spacer; -# if($alignment->{label_shown}){ -# $gap = $alignment->_calculate_label_gap + $label_spacer; -# } - $alignment->set_width($partition_width); - $alignment->set_height($layout->get_image_height); - $alignment->_add_ruler(); - $alignment->{width_adjustment} = 0; - $alignment->_add_ruler(); - $alignment->{ruler}->set_width($partition_width-$gap); - $alignment->{ruler}->set_label_spacer($label_spacer); - $alignment->{ruler}->hide_unit(); - $alignment->{ruler}->set_top_margin($alignment->{ruler}->{font}->height()); - - my %member_is_shown = (); - #Position each alignment member - foreach my $leaf ($tree->get_leaf_list()) { - my $m = $leaf->get_alignment_member; - next unless $m; - $m->set_width($partition_width-$gap); - $m->set_label_spacer($label_spacer); - my $height = $layout->get_vertical_gap; -# $height = 20 if($height>20); - $m->set_height($height); - $m->set_top_margin($leaf->get_Y); - my $id = $m->get_id; - $member_is_shown{$id} = 1; - } - - foreach my $m (@{$alignment->{members}}){ - my $id = $m->get_id; - $m->hide_seq unless $member_is_shown{$id}; - } - - #Render the alignment on the set image - $alignment->render(); - } - $self->recursive_draw($tree->get_root(), $image, $color, "", $print_all_labels ); - return $image->png(); -} - - - -sub recursive_draw { - my $self = shift; - my $node = shift; - my $image = shift; - my $color = shift; - my $parent_hilited = shift; - my $print_all_labels=shift; - - my $line_color = $color; - - my @children = $node->get_children(); - + +=head1 Package CXGN::Phylo::Text_tree_renderer + +Abstract_tree_renderer - an interface to define tree renderers + +=head1 DESCRIPTION + +=head1 AUTHOR + +Lukas Mueller (lam87@cornell.edu) + +=cut + +=head1 Package Abstract_tree_renderer + +This class essentially defines an interface for tree renderers. +It contains two functions: new() and render(). +Additional functions can be defined as necessary in the derived classes. + +=cut + +package CXGN::Phylo::Abstract_tree_renderer; + +use List::Util qw ( min max sum ); +use GD; + +GD::Image->trueColor(1); + +=head2 function new() + + Synopsis: abstract class from which tree renderer can be implemented. + Arguments: + Returns: + Side effects: + Description: + +=cut + +sub new { + my $class = shift; + my $tree = shift; + my $args = {}; + my $self = bless $args, $class; + + # THE FOLLOWING WAS MOVED TO THE TREE OBJECT: + # set a default layout object, which is Layout + # (the tree is rendered from left to right) + # + #my $layout_object = CXGN::Phylo::Layout->new($tree); + #$self->set_layout($layout_object); + + $self->set_tree($tree); + return $self; +} + +sub set_tree { + my $self = shift; + $self->{tree} = shift; +} + +sub get_tree { + my $self = shift; + return $self->{tree}; +} + +# =head2 function get_layout() + +# Synopsis: +# Arguments: +# Returns: +# Side effects: +# Description: Accessor for the layout object for this renderer. + +# =cut + +# sub get_layout { +# my $self=shift; +# return $self->{layout}; +# } + +# =head2 function set_layout() + +# Synopsis: +# Arguments: an instance of a Layout object or a subclass thereof. +# Returns: +# Side effects: the layout function determines how the tree is laid out, +# the standard way is the root node is placed on the left +# and the children on the right. Other layout objects will +# lay out trees with other orientations. +# Description: setter function for the layout object for this renderer. + +# =cut + +# sub set_layout { +# my $self=shift; +# $self->{layout}=shift; +# } + +sub render { + die "You are attempting to use the abstract class Abstract_tree_renderer. Please subclass.\n"; +} + +package CXGN::Phylo::Text_tree_renderer; + +use base qw/ CXGN::Phylo::Abstract_tree_renderer /; + +sub new { + my $class = shift; + my $self = $class->SUPER::new(@_); + return $self; +} + +=head2 function render() + + Synopsis: + Arguments: + Returns: + Side effects: + Description: a simple rendering of the tree, output to the STDOUT. + +=cut + +sub render { + my $self = shift; + $self->get_tree()->get_root()->recursive_text_render(); +} + +=head1 Package CXGN::Phylo::PNG_tree_renderer + +Renders tree as PNG + +=cut + +package CXGN::Phylo::PNG_tree_renderer; + +use base qw/ CXGN::Phylo::Abstract_tree_renderer /; + +=head2 function new() + + Synopsis: + Arguments: a tree object + Returns: An instance of a PNG_tree_renderer object. + Side effects: Sets some defaults for drawing the tree. + These can be overridden by calling the corresponding + setter functions. + Description: + +=cut + +sub new { + my $class = shift; + my $self = $class->SUPER::new(@_); + $self->set_font( GD::Font->Small() ); + $self->set_transparent(0); + + $self->{bl_labels} = []; #need to keep array of these for image map + + return $self; +} + +=head2 function get_font() + + Synopsis: + Arguments: + Returns: + Side effects: + Description: + +=cut + +sub get_font { + my $self = shift; + return $self->{font}; +} + +=head2 function set_font() + + Synopsis: $r->set_font($font) + Arguments: a GD font + Returns: nothing + Side effects: $font will be used for rendering the tree labels + Description: + +=cut + +sub set_font { + my $self = shift; + $self->{font} = shift; +} + +=head2 function get_font_width + + Synopsis: + Arguments: + Returns: + Side effects: + Description: + +=cut + +sub get_font_width { + my $self = shift; + return $self->get_font()->width(); +} + +=head2 function get_font_height + + Synopsis: + Arguments: + Returns: + Side effects: + Description: + +=cut + +sub get_font_height { + my $self = shift; + return $self->get_font()->height(); +} + +=head2 function get_show_branch_length() + + Synopsis: + Arguments: + Returns: + Side effects: + Description: + +=cut + +sub get_show_branch_length { + my $self = shift; + return $self->{show_branch_length}; +} + +=head2 function set_show_branch_length() + + Synopsis: + Arguments: + Returns: + Side effects: + Description: + +=cut + +sub set_show_branch_length { + my $self = shift; + $self->{show_branch_length} = shift; +} + +sub hide_alignment { + my $self = shift; + $self->{hide_alignment} = 1; +} + +sub show_alignment { + my $self = shift; + $self->{hide_alignment} = 0; +} + +sub set_transparent { + my $self = shift; + $self->{transparent} = shift; +} + +sub get_transparent { + my $self = shift; + return $self->{transparent}; +} + +=head2 function render() + + Synopsis: + Arguments: (optional) a boolean for printing labels for all nodes (default- label is printed only for leaves, unless get_hide_label() = true ) + Returns: a png image + Side effects: creates a GD::Image object and renders the tree on it. + Description: draws the tree + +=cut + +sub render { + my $self = shift; + my $print_all_labels = shift; + + # initialize image + my $tree = $self->get_tree; + my $layout = $tree->get_layout; + $layout->layout(); + my ( $phylo_width, $phylo_height ) = ( $layout->get_image_width, $layout->get_image_height ); + + my $image = GD::Image->new( $phylo_width, $phylo_height, 1 ); + my $white = $image->colorResolve( 255, 255, 255 ); + + $image->filledRectangle( 0, 0, $phylo_width, $phylo_height, $white ); + $tree->get_root()->recursive_propagate_properties('hidden'); + + # get the font + # + my $font = GD::Font->Small(); + if ( $layout->get_vertical_gap() < 12 ) { + $font = GD::Font->Tiny(); + } + $self->set_font($font); + + # percolate font information to all nodes + foreach my $n ( $tree->get_all_nodes() ) { + $n->get_label()->set_font( $self->get_font ); + } + + my $color = $image->colorResolve( $tree->get_line_color() ); + + if ( $tree->get_alignment && !$self->{hide_alignment} ) { + my $alignment = $tree->get_alignment; + my $partition_width; + if ( $alignment->{label_shown} ) { + $partition_width = $phylo_width / 3; + } else { + $partition_width = $phylo_width / 2; + } + print STDERR "Showing Alignment...\n"; + + #Calculate node coordinates: + $layout->set_image_width($partition_width); + $layout->layout(); + + #Set Alignment Image attributes + $alignment->set_image($image); + $alignment->set_display_type("alignment"); #alignment and ruler only + $alignment->set_left_margin( $partition_width + 5 ); + my $label_spacer = 5; + my $gap = $label_spacer; + + $alignment->set_width($partition_width); + $alignment->set_height( $layout->get_image_height ); + $alignment->_add_ruler(); + $alignment->{width_adjustment} = 0; + $alignment->_add_ruler(); + $alignment->{ruler}->set_width( $partition_width - $gap ); + $alignment->{ruler}->set_label_spacer($label_spacer); + $alignment->{ruler}->hide_unit(); + $alignment->{ruler}->set_top_margin( $alignment->{ruler}->{font}->height() ); + + my %member_is_shown = (); + + #Position each alignment member + foreach my $leaf ( $tree->get_leaf_list() ) { + my $m = $leaf->get_alignment_member; + next unless $m; + $m->set_width( $partition_width - $gap ); + $m->set_label_spacer($label_spacer); + my $height = $layout->get_vertical_gap; + + # $height = 20 if($height>20); + $m->set_height($height); + $m->set_top_margin( $leaf->get_Y ); + my $id = $m->get_id; + $member_is_shown{$id} = 1; + } + + foreach my $m ( @{ $alignment->{members} } ) { + my $id = $m->get_id; + $m->hide_seq unless $member_is_shown{$id}; + } + + #Render the alignment on the set image + $alignment->render(); + } + $self->recursive_draw( $tree->get_root(), $image, $color, $print_all_labels ); + return $image->png(); +} + +sub recursive_draw { + my $self = shift; + my $node = shift; + my $image = shift; + my $color = shift; + # my $parent_hilited = shift; # why is this needed ???? + my $print_all_labels = shift; + + my $line_color = $color; + + my @children = $node->get_children(); + #print STDERR "Node: ".$node->get_name()." X: ".$node->get_X()." Y: ".$node->get_Y()."\n"; - - # generate the color for the clickable area of the nodes - # - my $node_color = $image->colorResolve(150,150,250); # color for normal (non-hilited) nodes - - # get the hilite color for the hilited nodes - # -# print STDERR "name: ", $node->get_name(), "node implicit names:[", join(":", $node->get_implicit_names()), "] speciation:[", $node->get_attribute("speciation"), "] "; - if ($node->get_hilited() ){ #or $node->get_attribute("speciation") ) { - $color = $image->colorResolve($self->get_tree()->get_hilite_color()); # this is the color of the hilited subtree - if (!$parent_hilited or $node->get_attribute("speciation")) { - #Original Hilited Node - my @hcolor = $self->get_tree()->get_hilite_color(); - my $max = 0; - #Saturate the predominant color component - foreach (@hcolor) { - $max = $_ if $_>=$max; - } - foreach (@hcolor) { - if ($_>=$max) { - $_ = 255; - } else { - $_ *= 0.6; - } - } - $node_color = $image->colorResolve(@hcolor); # color of root of hilited subtree (bright red) - } else { - $node_color = $color; # color of rest of hilited subtree (dull red) - } - } - - if ($node->is_root()) { - my @line_color = $node->get_line_color(); - # print STDERR "line color: ", join(";", @line_color), "\n"; - $line_color = $image->colorResolve(@line_color) if (@line_color==3); - my $model = $node->get_attribute("model"); - if ($model) { - my $color_array = $self->get_model_color($model); - $line_color = $image->colorResolve(@$color_array) if (ref($color_array) eq "ARRAY"); - } - $line_color = $color if $node->get_hilited(); - $self->connect_nodes(undef, $node, $image, $line_color); - } - - unless($node->get_hidden()){# (don't draw children of hidden nodes) - foreach my $c (@children) { - my @line_color = $c->get_line_color(); - $line_color = $image->colorResolve(@line_color) if (@line_color==3); - my $model = $c->get_attribute("model"); - if ($model) { - my $color_array = $self->get_model_color($model); - $line_color = $image->colorResolve(@$color_array) if (ref($color_array) eq "ARRAY"); - } - $line_color = $color if $node->get_hilited(); - # print STDERR "line color: ", join(";", @line_color), "\n"; - $self->connect_nodes($node, $c, $image, $line_color); - $self->recursive_draw($c, $image, $color,$node->get_hilited(), $print_all_labels); - } - } - - if (!($node->is_leaf() || ($node->is_hidden()))) { - $image -> filledRectangle(($node->get_X()-2), ($node->get_Y()-2), - ($node->get_X()+2), ($node->get_Y+2), $node_color); - } elsif ($node->is_leaf()) { - $image -> filledArc(($node->get_X()), ($node->get_Y()), 7, 7, 0, 360, $node_color); - # my $label = $node->get_label; - # my $layout = $self->get_tree()->get_layout(); - # my $begin_x = $label->{font}->width()*length($label->get_name()) + $node->get_X + 5; - # $begin_x = int($begin_x); - # $image->line($begin_x, $node->get_Y, $begin_x+50, $node->get_Y, $node_color); - - } - - # if the node is hidden, draw the hidden symbol - # - if ($node->get_hidden()) { - $image -> rectangle(($node->get_X()-3), ($node->get_Y()-3), - ($node->get_X()+3), ($node->get_Y()+3), $color); - - $image -> line(($node->get_X()), ($node->get_Y()-2), - ($node->get_X()), ($node->get_Y()+2), $color); - $image -> line(($node->get_X()-2), ($node->get_Y()), - ($node->get_X()+2), ($node->get_Y()), $color); - } - - # draw the label if it it visible - # - if ( (!$node->get_hide_label() and $node->is_leaf()) || $print_all_labels ) { - $node->get_label()->set_reference_point(($node->get_X()), ($node->get_Y())); - if (!$node->is_leaf() && $node->get_children() % 2 > 0) { - $node->get_label()->set_reference_point(($node->get_X()), ($node->get_Y()-1-$node->get_label()->get_font()->height/2)); - } - $node->get_label()->set_orientation_horizontal(); - $node->get_label()->render($image); - } -} - -=head2 function connect_nodes() - - Synopsis: - Arguments: a parent node object, a child node object, a GD image object and a GD color - Returns: - Side effects: draws the connection between a parent and a child node. - Description: the default style is a connection that is broken into to perpendicular - lines. This can be overridden in subclasses to draw different - styles of trees - -=cut - -sub connect_nodes { - my $self = shift; - my $p_node = shift; - my $c_node = shift; - my $image = shift; - my $color = shift; - - if (!$p_node) { - $p_node=$c_node->copy(); - $p_node->set_X($self->get_tree()->get_layout()->get_left_margin()); - } - $self->display_branch_length($image, $p_node, $c_node, $color); - $image->setAntiAliased($color); - $image -> line ($p_node->get_X(), $p_node->get_Y(), - $p_node->get_X(), $c_node->get_Y(), $color); - - $image -> line ($p_node->get_X(), $c_node->get_Y(), - $c_node->get_X(), $c_node->get_Y(), $color); - -} - -sub display_branch_length { - my $self = shift; - - # if we are supposed to display the branch lengths, we do so... - return unless $self->get_show_branch_length(); - - my $image = shift; - my $p_node = shift; - my $c_node = shift; - my $color = shift; - - - my $label_text = $c_node->get_branch_length(); - my $branch_length_label = CXGN::Phylo::Label->new($label_text); - - - my @color = (200, 200, 200); - my $blc_sum = 300; # the branch length color will be the branch color normalized s.t. sum of r,g and b = blc_sum; so make this smaller if branch lengths are too light to read easily - if ($color) { - @color = $image->rgb($color); - my $bc_sum = ($color[0] + $color[1] + $color[2]); - my $norm_factor = ($bc_sum == 0)? 1: $blc_sum/$bc_sum; - $norm_factor = 1 if($norm_factor > 1); - @color = map { $_*$norm_factor } @color; #branch color normalized to have sum of r,g,b=tsum - - my $model = $c_node->get_attribute("model"); - if ($model =~ /\w+/) { - $branch_length_label->set_tooltip("Calculated by " . ucfirst($model) . " model"); - } - } - - $branch_length_label->set_reference_point( - $self->get_blen_ref_pt($p_node, $c_node, $label_text) - ); - $branch_length_label->set_font(GD::Font->Tiny()); - $branch_length_label->set_hidden(0); - $branch_length_label->align_center(); - $branch_length_label->set_text_color(@color); - - $branch_length_label->render($image); - - push(@{$self->{bl_labels}}, $branch_length_label); -} - -sub get_blen_ref_pt { - my $self = shift; - my $p_node = shift; - my $c_node = shift; -my $bl_scaled = ($c_node->get_transformed_branch_length($self->get_tree()->get_shown_branch_length_transformation()) * $self->get_tree()->get_layout()->get_horizontal_scaling_factor()); - return ( - $c_node->get_X()-$bl_scaled/2, - $c_node->get_Y()-$self->get_font_height()/2 - ); -} - - -sub hilite_model { - my $self = shift; - my $model = shift; - my $color_array = shift; - $self->{model_colors}->{$model} = $color_array; -} - -sub get_model_color { - my $self = shift; - my $model = shift; - my $color_array = $self->{model_colors}->{$model}; - return unless (ref($color_array) eq "ARRAY"); - return $color_array if (@$color_array == 3); -} - -=head2 function get_html_image_map() - - Synopsis: my $image_map=$renderer->get_html_image_map($name, $temp_filename, $hilite_temp, $align_type ); - Arguments: image_map name, filename - Returns: a html imagemap that can be used to embed links into the tree image. - Side effects: - Description: use $node->set_link($url) to set the link to $url. - -=cut - -sub get_html_image_map { -# print STDERR "PNG_tree_renderer: in: get_html_image_map()\n"; - my $self=shift; - my $name =shift; - my ($whole_temp, $hilite_temp, $align_type) = @_; - my $map = $self->recursive_image_map_coords($self->get_tree()->get_root()); - my $tree = $self->get_tree(); - - #Image Map portion for Alignment, if align temp file provided: - if($tree->get_alignment && $whole_temp){ - $hilite_temp = $whole_temp unless $hilite_temp; - foreach my $l ($tree->get_leaf_list){ - my $m = $l->get_alignment_member(); - next unless $m; - my $temp = ""; - my $title = ""; - ($l->get_hilited)?($temp = $hilite_temp):($temp = $whole_temp); - ($l->get_hilited)?($title = "See alignment for highlighted region only"):($title = "See alignment for all visible members"); - - ($temp) = $temp =~ /([^\/]+)$/; - my $coords = join ",", ($m->get_enclosing_rect); - my $url = "/tools/align_viewer/show_align.pl?temp_file=$temp&type=$align_type&title=Selection%20From%20Tree"; - my $target = "_SGN_ALIGN_" . int(rand(999999999)); - $map .= "\n"; - } - } - foreach my $bl_label (@{$self->{bl_labels}}){ - next unless $bl_label->get_tooltip(); - my $coords = join ",", ($bl_label->get_enclosing_rect); - #print STDERR "--------RENDERED.PM------". $bl_label->get_onmouseover()."------"; - $map .= "\n"; - } - - my $maptag = qq( \n$map\n ); - return $maptag; -} - -sub recursive_image_map_coords { - my $self = shift; - my $node =shift; - - my $tooltip = $node->get_tooltip(); - $tooltip ||= "Node " . $node->get_node_key() . ": " . $node->get_label()->get_name(); - #print STDERR "RECURSIVE_IMAGE_MAP_COORDS\n"; - my $map = do { no warnings 'uninitialized'; - "get_name()."\" coords=\"" - . int(($node->get_X())-3) . ",".int(($node->get_Y())-3).",".int(($node->get_X())+3).",".int(($node->get_Y())+3) - . "\" href=\"".$node->get_link() - ."\" title=\"$tooltip\" onmouseover=\"".$node->get_onmouseover()."\" onmouseout=\"".$node->get_onmouseout()."\" alt=\"$tooltip\" />\n"}; - - $map .= $node->get_label()->get_html_image_map(); - my @children = $node->get_children(); - foreach my $c (@children) { - #print STDERR "checking out the children...\n"; - $map .= $self->recursive_image_map_coords($c); - } - return $map; -} - - - -package CXGN::Phylo::PNG_angle_tree_renderer; - -use GD; - -use base qw/ CXGN::Phylo::PNG_tree_renderer /; - -sub new { - my $class = shift; - my $self = $class -> SUPER::new(@_); - return $self; -} - -sub connect_nodes { - my $self = shift; - my $p_node= shift; - my $c_node = shift; - my $image = shift; - my $color = shift; - - - if (!$p_node) { - $p_node=$c_node->copy(); - $p_node->set_X($self->get_tree()->get_layout()->get_left_margin()); - } - $self->display_branch_length($image, $p_node, $c_node, $color); - $image->setAntiAliased($color); - $image->line ($p_node->get_X(), - $p_node->get_Y(), - $c_node->get_X(), - $c_node->get_Y(),gdAntiAliased); - - - -} - -sub get_blen_ref_pt { - my ($self, $p_node, $c_node, $text) = @_; - - my ($dx, $dy) = ($c_node->get_X() - $p_node->get_X(), abs($c_node->get_Y()-$p_node->get_Y())); - my $angle = atan2($dy, $dx); - my $pi = 3.14159265359; - my $max = $pi/2; - my $halfw = (length($text)*$self->get_font()->width)/3; - my $x_shift = -$halfw*($angle)/$max; - - my $flip = 1; - $flip = -1 if ($c_node->get_Y() > $p_node->get_Y()); - - -my $bl_scaled = ($c_node->get_transformed_branch_length($self->get_tree()->get_shown_branch_length_transformation()) * $self->get_tree()->get_layout()->get_horizontal_scaling_factor()); - - return ( - $c_node->get_X() - $bl_scaled/2 + $x_shift, - ($c_node->get_Y()+$p_node->get_Y())/2 - $flip*$self->get_font_height()/2 - ); -} - -package CXGN::Phylo::PNG_round_tree_renderer; -use GD; -use base qw/ CXGN::Phylo::PNG_tree_renderer /; -sub new { - my $class = shift; - my $self = $class -> SUPER::new(@_); - return $self; -} - -sub connect_nodes { - my $self = shift; - my $p_node= shift; - my $c_node = shift; - my $image = shift; - my $color = shift; - - - if (!$p_node) { - $p_node=$c_node->copy(); - $p_node->set_X($self->get_tree()->get_layout()->get_left_margin()); - } - - $self->display_branch_length($image, $p_node, $c_node, $color); - - $image->setAntiAliased($color); - $image -> arc($c_node->get_X(), - $p_node->get_Y(), - ($c_node->get_X()-$p_node->get_X())*2, - ($p_node->get_Y()-$c_node->get_Y())*2, - 180, - 270, - gdAntiAliased - ); -} - -sub get_blen_ref_pt { - my ($self, $p_node, $c_node, $text) = @_; - my ($dx, $dy) = ($c_node->get_X() - $p_node->get_X(), abs($c_node->get_Y()-$p_node->get_Y())); - my $angle = atan2($dy, $dx); - my $pi = 3.14159265359; - my $max = $pi/2; - my $halfw = (length($text)*$self->get_font()->width)/2; - my $x_shift = -$halfw*($angle)/$max; - - my $flip = 1; - $flip = -1 if ($c_node->get_Y() > $p_node->get_Y()); - - #Try putting at "center of mass" of right triangle: - return ( $p_node->get_X()+($c_node->get_X() - $p_node->get_X())/3 +$x_shift, - $c_node->get_Y()-($c_node->get_Y() - $p_node->get_Y())/3 - $flip*($self->get_font()->height()/2) ); -} - -1; + + # generate the color for the clickable area of the nodes + + # set node_color to default color for normal (non-hilited) nodes + my $node_color = $image->colorResolve( 150, 150, 250 ); + + # get the hilite color for the hilited nodes + # +# print STDERR "name: ", $node->get_name(), " node implicit names:[", +# join( ":", @{ $node->get_implicit_names() } ), "] speciation:[", +# $node->get_attribute("speciation"), "] hilite:[", $node->get_hilited(), +# "]\n"; + + # if node's subtree has at least one species in species tree ... + if ( $node->get_hilited() + and ( $node->get_attribute('species_bit_pattern') != 0 ) ) + { #or $node->get_attribute("speciation") ) { + my @hcolor = $self->get_tree()->get_hilite_color(); + + # this is the (dull red) color of the hilited subtree: + $color = $image->colorResolve(@hcolor); + + # if ( # does this: '(!$parent_hilited) or' do anything useful? + if ( $node->get_attribute("speciation") ) { # Saturate the predominant color component(s) + my $max = List::Util::max(@hcolor); + foreach (@hcolor) { + $_ = ( $_ >= $max ) ? 255 : $_ * 0.6; + } + + $node_color = $image->colorResolve(@hcolor); # color of speciation nodes (bright red). + } else { + $node_color = $color; # color of rest of hilited subtree (dull red). + } + } + + if (0) { # is this needed for anything??? + if ( $node->is_root() ) { + my @line_color = $node->get_line_color(); + + # print STDERR "line color: ", join(";", @line_color), "\n"; + $line_color = $image->colorResolve(@line_color) + if ( @line_color == 3 ); + my $model = $node->get_attribute("model"); + if ($model) { + my $color_array = $self->get_model_color($model); + $line_color = $image->colorResolve(@$color_array) + if ( ref($color_array) eq "ARRAY" ); + } + $line_color = $color if $node->get_hilited(); + $self->connect_nodes( undef, $node, $image, $line_color ); # what does this do (with undef as arg)? + } + } + + unless ( $node->get_hidden() ) { # (don't draw children of hidden nodes) + foreach my $c (@children) { + my @line_color = $c->get_line_color(); + + # @line_color = (0,200,0); + $line_color = $image->colorResolve(@line_color) + if ( @line_color == 3 ); + my $model = $c->get_attribute("model"); + if ($model) { + my $color_array = $self->get_model_color($model); + $line_color = $image->colorResolve(@$color_array) + if ( ref($color_array) eq "ARRAY" ); + } + $line_color = $color if $node->get_hilited(); + + # print STDERR "line color: ", join(";", @line_color), "\n"; + $self->connect_nodes( $node, $c, $image, $line_color ); + $self->recursive_draw( $c, $image, $color, $print_all_labels ); + } + } + + # image is circle for leaf, square otherwise: + if ( !( $node->is_leaf() || ( $node->is_hidden() ) ) ) { + $image->filledRectangle( + ( $node->get_X() - 2 ), + ( $node->get_Y() - 2 ), + ( $node->get_X() + 2 ), + ( $node->get_Y + 2 ), + $node_color + ); + } elsif ( $node->is_leaf() ) { + $image->filledArc( ( $node->get_X() ), ( $node->get_Y() ), 7, 7, 0, 360, $node_color ); + } + + # if the node is hidden, draw the hidden symbol + # + if ( $node->get_hidden() ) { + $image->rectangle( + ( $node->get_X() - 3 ), + ( $node->get_Y() - 3 ), + ( $node->get_X() + 3 ), + ( $node->get_Y() + 3 ), + $color + ); + + $image->line( ( $node->get_X() ), ( $node->get_Y() - 2 ), ( $node->get_X() ), ( $node->get_Y() + 2 ), $color ); + $image->line( ( $node->get_X() - 2 ), ( $node->get_Y() ), ( $node->get_X() + 2 ), ( $node->get_Y() ), $color ); + } + + # draw the label if it it visible + # + if ( ( !$node->get_hide_label() and $node->is_leaf() ) + || $print_all_labels ) + { + $node->get_label()->set_reference_point( ( $node->get_X() ), ( $node->get_Y() ) ); + if ( !$node->is_leaf() && $node->get_children() % 2 > 0 ) { + $node->get_label()->set_reference_point( + ( $node->get_X() ), + ( + $node->get_Y() - 1 - $node->get_label()->get_font()->height / 2 + ) + ); + } + $node->get_label()->set_orientation_horizontal(); + $node->get_label()->render($image); + } +} + +=head2 function connect_nodes() + + Synopsis: + Arguments: a parent node object, a child node object, a GD image object and a GD color + Returns: + Side effects: draws the connection between a parent and a child node. + Description: the default style is a connection that is broken into perpendicular + lines. This can be overridden in subclasses to draw different + styles of trees + +=cut + +sub connect_nodes { + my $self = shift; + my $p_node = shift; + my $c_node = shift; + my $image = shift; + my $color = shift; + + if ( !$p_node ) { + $p_node = $c_node->copy(); + $p_node->set_X( $self->get_tree()->get_layout()->get_left_margin() ); + } + $self->display_branch_length( $image, $p_node, $c_node, $color ); + $image->setAntiAliased($color); + $image->line( $p_node->get_X(), $p_node->get_Y(), $p_node->get_X(), $c_node->get_Y(), $color ); + + $image->line( $p_node->get_X(), $c_node->get_Y(), $c_node->get_X(), $c_node->get_Y(), $color ); + +} + +sub display_branch_length { + my $self = shift; + + # if we are supposed to display the branch lengths, we do so... + return unless $self->get_show_branch_length(); + + my $image = shift; + my $p_node = shift; + my $c_node = shift; + my $color = shift; + + my $label_text = $c_node->get_branch_length(); + my $branch_length_label = CXGN::Phylo::Label->new($label_text); + + my @color = ( 0, 0, 0 ); #default color for branch length text. + my $max_blc_sum = 200 + ; # the branch length color will be the branch color normalized s.t. sum of r,g and b <= max_blc_sum; so make this smaller if branch lengths are too light to read easily + if ($color) { + @color = $image->rgb($color); + my $blc_sum = ( $color[0] + $color[1] + $color[2] ); + my $norm_factor = ( $blc_sum == 0 ) ? 1 : $max_blc_sum / $blc_sum; + $norm_factor = 1 + if ( $norm_factor > 1 ); # can make it darker but not lighter + @color = map { $_ * $norm_factor } @color; # normalize branch color. + + my $model = $c_node->get_attribute("model"); + if ( $model =~ /\w+/ ) { + $branch_length_label->set_tooltip( "Calculated by " . ucfirst($model) . " model" ); + } + } + + $branch_length_label->set_reference_point( $self->get_blen_ref_pt( $p_node, $c_node, $label_text ) ); + $branch_length_label->set_font( GD::Font->Tiny() ); + $branch_length_label->set_hidden(0); + $branch_length_label->align_center(); + $branch_length_label->set_text_color(@color); + $branch_length_label->render($image); + + push( @{ $self->{bl_labels} }, $branch_length_label ); +} + +sub get_blen_ref_pt { + my $self = shift; + my $p_node = shift; + my $c_node = shift; + my $bl_scaled = + ( $c_node->get_transformed_branch_length( $self->get_tree()->get_shown_branch_length_transformation() ) * + $self->get_tree()->get_layout()->get_horizontal_scaling_factor() ); + return ( $c_node->get_X() - $bl_scaled / 2, $c_node->get_Y() - $self->get_font_height() / 2 ); +} + +sub hilite_model { + my $self = shift; + my $model = shift; + my $color_array = shift; + $self->{model_colors}->{$model} = $color_array; +} + +sub get_model_color { + my $self = shift; + my $model = shift; + my $color_array = $self->{model_colors}->{$model}; + return unless ( ref($color_array) eq "ARRAY" ); + return $color_array if ( @$color_array == 3 ); +} + +=head2 function get_html_image_map() + + Synopsis: my $image_map=$renderer->get_html_image_map($name, $temp_filename, $hilite_temp, $align_type ); + Arguments: image_map name, filename + Returns: a html imagemap that can be used to embed links into the tree image. + Side effects: + Description: use $node->set_link($url) to set the link to $url. + +=cut + +sub get_html_image_map { + + # print STDERR "PNG_tree_renderer: in: get_html_image_map()\n"; + my $self = shift; + my $name = shift; + my ( $whole_temp, $hilite_temp, $align_type ) = @_; + my $map = $self->recursive_image_map_coords( $self->get_tree()->get_root() ); + my $tree = $self->get_tree(); + + #Image Map portion for Alignment, if align temp file provided: + if ( $tree->get_alignment && $whole_temp ) { + $hilite_temp = $whole_temp unless $hilite_temp; + foreach my $l ( $tree->get_leaf_list ) { + my $m = $l->get_alignment_member(); + next unless $m; + my $temp = ""; + my $title = ""; + ( $l->get_hilited ) + ? ( $temp = $hilite_temp ) + : ( $temp = $whole_temp ); + ( $l->get_hilited ) + ? ( $title = "See alignment for highlighted region only" ) + : ( $title = "See alignment for all visible members" ); + + ($temp) = $temp =~ /([^\/]+)$/; + my $coords = join ",", ( $m->get_enclosing_rect ); + my $url = + "/tools/align_viewer/show_align.pl?temp_file=$temp&type=$align_type&title=Selection%20From%20Tree"; + my $target = "_SGN_ALIGN_" . int( rand(999999999) ); + $map .= "\n"; + } + } + foreach my $bl_label ( @{ $self->{bl_labels} } ) { + next unless $bl_label->get_tooltip(); + my $coords = join ",", ( $bl_label->get_enclosing_rect ); + + #print STDERR "--------RENDERED.PM------". $bl_label->get_onmouseover()."------"; + $map .= "\n"; + } + + my $maptag = qq( \n$map\n ); + return $maptag; +} + +sub recursive_image_map_coords { + my $self = shift; + my $node = shift; + + my $tooltip = $node->get_tooltip(); + $tooltip ||= "Node " . $node->get_node_key() . ": " . $node->get_label()->get_name(); + + #print STDERR "RECURSIVE_IMAGE_MAP_COORDS\n"; + my $map = do { + no warnings 'uninitialized'; + "get_name() + . "\" coords=\"" + . int( ( $node->get_X() ) - 3 ) . "," + . int( ( $node->get_Y() ) - 3 ) . "," + . int( ( $node->get_X() ) + 3 ) . "," + . int( ( $node->get_Y() ) + 3 ) + . "\" href=\"" + . $node->get_link() + . "\" title=\"$tooltip\" onmouseover=\"" + . $node->get_onmouseover() + . "\" onmouseout=\"" + . $node->get_onmouseout() + . "\" alt=\"$tooltip\" />\n"; + }; + + $map .= $node->get_label()->get_html_image_map(); + my @children = $node->get_children(); + foreach my $c (@children) { + + #print STDERR "checking out the children...\n"; + $map .= $self->recursive_image_map_coords($c); + } + return $map; +} + +package CXGN::Phylo::PNG_angle_tree_renderer; + +use GD; + +use base qw/ CXGN::Phylo::PNG_tree_renderer /; + +sub new { + my $class = shift; + my $self = $class->SUPER::new(@_); + return $self; +} + +sub connect_nodes { + my $self = shift; + my $p_node = shift; + my $c_node = shift; + my $image = shift; + my $color = shift; + + if ( !$p_node ) { + $p_node = $c_node->copy(); + $p_node->set_X( $self->get_tree()->get_layout()->get_left_margin() ); + } + $self->display_branch_length( $image, $p_node, $c_node, $color ); + $image->setAntiAliased($color); + $image->line( $p_node->get_X(), $p_node->get_Y(), $c_node->get_X(), $c_node->get_Y(), gdAntiAliased ); + +} + +sub get_blen_ref_pt { + my ( $self, $p_node, $c_node, $text ) = @_; + + my ( $dx, $dy ) = ( $c_node->get_X() - $p_node->get_X(), abs( $c_node->get_Y() - $p_node->get_Y() ) ); + my $angle = atan2( $dy, $dx ); + my $pi = 3.14159265359; + my $max = $pi / 2; + my $halfw = ( length($text) * $self->get_font()->width ) / 3; + my $x_shift = -$halfw * ($angle) / $max; + + my $flip = 1; + $flip = -1 if ( $c_node->get_Y() > $p_node->get_Y() ); + + my $bl_scaled = + ( $c_node->get_transformed_branch_length( $self->get_tree()->get_shown_branch_length_transformation() ) * + $self->get_tree()->get_layout()->get_horizontal_scaling_factor() ); + + return ( $c_node->get_X() - $bl_scaled / 2 + $x_shift, + ( $c_node->get_Y() + $p_node->get_Y() ) / 2 - $flip * $self->get_font_height() / 2 ); +} + +package CXGN::Phylo::PNG_round_tree_renderer; +use GD; +use base qw/ CXGN::Phylo::PNG_tree_renderer /; + +sub new { + my $class = shift; + my $self = $class->SUPER::new(@_); + return $self; +} + +sub connect_nodes { + my $self = shift; + my $p_node = shift; + my $c_node = shift; + my $image = shift; + my $color = shift; + + if ( !$p_node ) { + $p_node = $c_node->copy(); + $p_node->set_X( $self->get_tree()->get_layout()->get_left_margin() ); + } + + $self->display_branch_length( $image, $p_node, $c_node, $color ); + + $image->setAntiAliased($color); + $image->arc( $c_node->get_X(), $p_node->get_Y(), + ( $c_node->get_X() - $p_node->get_X() ) * 2, + ( $p_node->get_Y() - $c_node->get_Y() ) * 2, + 180, 270, gdAntiAliased ); +} + +sub get_blen_ref_pt { + my ( $self, $p_node, $c_node, $text ) = @_; + my ( $dx, $dy ) = ( $c_node->get_X() - $p_node->get_X(), abs( $c_node->get_Y() - $p_node->get_Y() ) ); + my $angle = atan2( $dy, $dx ); + my $pi = 3.14159265359; + my $max = $pi / 2; + my $halfw = ( length($text) * $self->get_font()->width ) / 2; + my $x_shift = -$halfw * ($angle) / $max; + + my $flip = 1; + $flip = -1 if ( $c_node->get_Y() > $p_node->get_Y() ); + + #Try putting at "center of mass" of right triangle: + return ( $p_node->get_X() + ( $c_node->get_X() - $p_node->get_X() ) / 3 + $x_shift, + $c_node->get_Y() - ( $c_node->get_Y() - $p_node->get_Y() ) / 3 - $flip * ( $self->get_font()->height() / 2 ) ); +} + +1; diff --git a/lib/CXGN/Phylo/Species_name_map.pm b/lib/CXGN/Phylo/Species_name_map.pm index a4ccd81e..248ad490 100644 --- a/lib/CXGN/Phylo/Species_name_map.pm +++ b/lib/CXGN/Phylo/Species_name_map.pm @@ -90,7 +90,7 @@ sub new{ 'Arabidopsis_lyrata', 'Arachis_hypogaea', 'Cajanus_cajan', - 'Lotus_japonica', + 'Lotus_japonicus', 'Cannabis_sativa', 'Malus_domestica', 'Prunus_persica', @@ -151,7 +151,7 @@ sub new{ $self->set_standard_name('peach', 'Prunus_persica'); $self->set_standard_name('peanut', 'Arachis_hypogaea'); $self->set_standard_name('pigeon_pea', 'Cajanus_cajan'); - $self->set_standard_name('lotus', 'Lotus_japonica'); + $self->set_standard_name('lotus', 'Lotus_japonicus'); $self->set_standard_name('apple', 'Malus_domestica'); $self->set_standard_name('cannabis', 'Cannabis_sativa'); $self->set_standard_name('hemp', 'Cannabis_sativa'); diff --git a/lib/CXGN/Phylo/TLYstuff/utils.pm b/lib/CXGN/Phylo/TLYstuff/utils.pm deleted file mode 100644 index 0ccfeca7..00000000 --- a/lib/CXGN/Phylo/TLYstuff/utils.pm +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/perl -use strict; -use warnings; - -package Utils; - -# read fasta from file with file handle $filehandle -# # read $max_sequences sequences at concatenate to $chunk_sequence -# # unless hit eof -# # return the chunk sequence, first line of next chunk (which is undef if no next chunk) -# chunk_sequences keeps the newlines as in input - - -sub get_next_chunk{ - my $filehandle = shift; - my $max_sequences = shift; - my $chunk_sequence = shift; - $chunk_sequence ||= ""; - my $seqs_in_chunk = ($chunk_sequence eq "")? 0: 1; - - while (<$filehandle>) { - if (/^\s*>/) { # next id line - $seqs_in_chunk++; - if ($seqs_in_chunk > $max_sequences) { # this id belongs to the next chunk - return ($chunk_sequence, $_); - } - } - $chunk_sequence .= $_; - } - return ($chunk_sequence, undef); -} - - - -1; - diff --git a/lib/CXGN/Phylo/Tree.pm b/lib/CXGN/Phylo/Tree.pm index 124a6354..b044d22e 100644 --- a/lib/CXGN/Phylo/Tree.pm +++ b/lib/CXGN/Phylo/Tree.pm @@ -34,11 +34,14 @@ The tree object also provides the layout and rendering functions. The both layou use strict; use warnings; +use Math::BigInt; + use CXGN::Phylo::Node; use CXGN::Phylo::Species_name_map; use CXGN::Phylo::Layout; use CXGN::Phylo::Renderer; use CXGN::Phylo::Parser; +use CXGN::Phylo::IdTaxonMap; use base qw | CXGN::DB::Object |; @@ -55,116 +58,122 @@ my $do_parse_set_error = 0; =cut sub new { - my $class = shift; - my $self = bless {}, $class; - #You can feed constructor with a newick string, which will create - #a parser object that creates a tree object *without* passing a - #string, which would lead to an infinite loop. Watch out! - my $arg = shift; - my $newick_string = ""; - unless (ref($arg)) { - # print STDERR "Tree::new. [$newick_string] \n"; - $newick_string = $arg; -# print STDERR "Tree::new. [$newick_string] \n"; - } else { - my $newick_file = ''; - if ($arg->{from_files}) { - $newick_file = $arg->{from_files}->{newick}; - die "Need a newick file if 'from_files' is used\n" unless -f $newick_file; - - $self = _tree_from_file($newick_file); - my $alignment_file = $arg->{from_files}->{alignment}; - if ($alignment_file) { - die "Alignment file: $alignment_file not found" unless -f $alignment_file; - my $alignment = CXGN::Phylo::Alignment->new( from_file=>$alignment_file); - $self->set_alignment($alignment); - $self->standard_alignment_leaf_association(); - } - return $self; - } elsif ($arg->{from_file}) { - $newick_file = $arg->{from_file}; - $self = _tree_from_file($newick_file); - return $self; - } - } - if ($newick_string) { - $newick_string =~ s/\s//g; - $newick_string =~ s/\n|\r//sg; - if ($newick_string =~ /^\(.*\)|;$/) { # start with (, end with ) or ; - # print STDERR "in Tree::new, about to parse the newick_string \n"; - my $parser = CXGN::Phylo::Parse_newick->new($newick_string, $do_parse_set_error); - print "calling parse in Tree constructor\n"; - my $self = $parser->parse(); - return $self; - } elsif ($newick_string) { - print STDERR "String passed not recognized as newick\n"; - } - } - - ############################################################## - #$self is a new tree, not predefined by newick; instead it will be - #constructed by methods on this object and Phylo::Node's - - #print STDERR "constructing Tree not predefined by a newick\n"; - - $self->set_unique_node_key(0); - - # initialize the root node - # - my $root = CXGN::Phylo::Node->new(); - $root->set_name("."); - $root->set_tree($self); - $root->set_node_key($self->get_unique_node_key()); - $self->add_node_hash($root, $root->get_node_key()); - $self->set_root($root); - - # initialize some imaging parameters - # - $self->set_show_labels(1); - $self->set_hilite_color(255, 0 ,0); - $self->set_line_color(100, 100, 100); - $self->set_bgcolor(0, 0, 0); - $self->set_show_species_in_label(0); - $self->set_show_standard_species(0); - $self->set_species_standardizer(CXGN::Phylo::Species_name_map->new()); - - #Attribute names to show in newick extended format - $self->{newick_shown_attributes} = {}; - $self->{shown_branch_length_transformation} = "branch_length"; # other possibilities: "proportion_different", equal - $self->{min_shown_branch_length} = 0.001; # when showing branches graphically, this is added to the displayed length - $self->set_min_branch_length(0.0001); - # initialize a default layout and renderer - # - $self->set_layout( CXGN::Phylo::Layout->new($self) ); - $self->set_renderer( CXGN::Phylo::PNG_tree_renderer->new($self) ); - - return $self; + my $class = shift; + my $self = bless {}, $class; + + #You can feed constructor with a newick string, which will create + #a parser object that creates a tree object *without* passing a + #string, which would lead to an infinite loop. Watch out! + my $arg = shift; + my $newick_string = ""; + unless ( ref($arg) ) { + + # print STDERR "Tree::new. [$newick_string] \n"; + $newick_string = $arg; + + # print STDERR "Tree::new. [$newick_string] \n"; + } else { + my $newick_file = ''; + if ( $arg->{from_files} ) { + $newick_file = $arg->{from_files}->{newick}; + die "Need a newick file if 'from_files' is used\n" unless -f $newick_file; + + $self = _tree_from_file($newick_file); + my $alignment_file = $arg->{from_files}->{alignment}; + if ($alignment_file) { + die "Alignment file: $alignment_file not found" unless -f $alignment_file; + my $alignment = CXGN::Phylo::Alignment->new( from_file => $alignment_file ); + $self->set_alignment($alignment); + $self->standard_alignment_leaf_association(); + } + return $self; + } elsif ( $arg->{from_file} ) { + $newick_file = $arg->{from_file}; + $self = _tree_from_file($newick_file); + return $self; + } + } + if ($newick_string) { + $newick_string =~ s/\s//g; + $newick_string =~ s/\n|\r//sg; + if ( $newick_string =~ /^\(.*\)|;$/ ) { # start with (, end with ) or ; + # print STDERR "in Tree::new, about to parse the newick_string \n"; + my $parser = CXGN::Phylo::Parse_newick->new( $newick_string, $do_parse_set_error ); + print "calling parse in Tree constructor\n"; + my $self = $parser->parse(); + return $self; + } elsif ($newick_string) { + print STDERR "String passed not recognized as newick\n"; + } + } + + ############################################################## + #$self is a new tree, not predefined by newick; instead it will be + #constructed by methods on this object and Phylo::Node's + + #print STDERR "constructing Tree not predefined by a newick\n"; + + $self->set_unique_node_key(0); + + # initialize the root node + # + my $root = CXGN::Phylo::Node->new(); + $root->set_name("."); + $root->set_tree($self); + $root->set_node_key( $self->get_unique_node_key() ); + $self->add_node_hash( $root, $root->get_node_key() ); + $self->set_root($root); + + # initialize some imaging parameters + # + $self->set_show_labels(1); + $self->set_hilite_color( 255, 0, 0 ); + $self->set_line_color( 100, 100, 100 ); + $self->set_bgcolor( 0, 0, 0 ); + $self->set_show_species_in_label(0); + $self->set_show_standard_species(0); + $self->set_species_standardizer( CXGN::Phylo::Species_name_map->new() ); + $self->set_id_taxon_map( CXGN::Phylo::IdTaxonMap->new() ); + + #Attribute names to show in newick extended format + $self->{newick_shown_attributes} = {}; + $self->{shown_branch_length_transformation} = "branch_length"; # other possibilities: "proportion_different", equal + $self->{min_shown_branch_length} = 0.001; # when showing branches graphically, this is added to the displayed length + $self->set_min_branch_length(0.0001); + + # initialize a default layout and renderer + # + $self->set_layout( CXGN::Phylo::Layout->new($self) ); + $self->set_renderer( CXGN::Phylo::PNG_tree_renderer->new($self) ); + + return $self; } # copy some of the tree's fields. Other fields will just have default values as set in constructor # e.g. layout and renderer aren't copied because there is no copy method for these objects -sub copy_tree_fields{ - my $self = shift; # source - my $new = shift; # copy +sub copy_tree_fields { + my $self = shift; # source + my $new = shift; # copy - $new->set_name($self->get_name()); + $new->set_name( $self->get_name() ); - # initialize some imaging parameters - # - $new->set_show_labels($self->get_show_labels()); - $new->set_hilite_color($self->get_hilite_color()); - $new->set_line_color($self->get_line_color()); - $new->set_bgcolor($self->set_bgcolor()); + # initialize some imaging parameters + # + $new->set_show_labels( $self->get_show_labels() ); + $new->set_hilite_color( $self->get_hilite_color() ); + $new->set_line_color( $self->get_line_color() ); + $new->set_bgcolor( $self->set_bgcolor() ); - $new->set_species_standardizer($self->get_species_standardizer()->copy()) if (defined $self->get_species_standardizer()) ; - $new->set_show_species_in_label($self->get_show_species_in_label()); - $new->set_show_standard_species($self->get_show_standard_species()); + $new->set_species_standardizer( $self->get_species_standardizer()->copy() ) + if ( defined $self->get_species_standardizer() ); + $new->set_show_species_in_label( $self->get_show_species_in_label() ); + $new->set_show_standard_species( $self->get_show_standard_species() ); + + #Attribute names to show in newick extended format + # $new->{newick_shown_attributes} = $self->{newick_shown_attributes}; + # @{$new->{newick_shown_attributes}} = @{$self->{newick_shown_attributes}}; + %{ $new->{newick_shown_attributes} } = %{ $self->{newick_shown_attributes} }; - #Attribute names to show in newick extended format -# $new->{newick_shown_attributes} = $self->{newick_shown_attributes}; - # @{$new->{newick_shown_attributes}} = @{$self->{newick_shown_attributes}}; - %{$new->{newick_shown_attributes}} = %{$self->{newick_shown_attributes}}; - } =head2 function copy() @@ -178,60 +187,58 @@ sub copy_tree_fields{ =cut sub copy { - my $self = shift; - my $new = $self->get_root()->copy_subtree(); - $new->update_label_names(); - return $new; + my $self = shift; + my $new = $self->get_root()->copy_subtree(); + $new->update_label_names(); + return $new; } - sub _tree_from_file { - my $file = shift; - my $tree = _tree_from_newick(_newick_from_file($file)); - $tree->standard_layout(); - return $tree; + my $file = shift; + my $tree = _tree_from_newick( _newick_from_file($file) ); + $tree->standard_layout(); + return $tree; } sub _tree_from_newick { - my $newick_string = shift; - $newick_string =~ s/\s//g; # remove whitespace from newick string - $newick_string =~ s/\n|\r//g; - if($newick_string =~ /^\(.*\)|;$/){ - my $parser = CXGN::Phylo::Parse_newick->new($newick_string, $do_parse_set_error); - print "parsing tree in Tree::_tree_from_newick\n"; - my $tree = $parser->parse(); - return $tree; - } - elsif($newick_string) { - print STDERR "String passed not recognized as newick\n"; - return undef; - } + my $newick_string = shift; + $newick_string =~ s/\s//g; # remove whitespace from newick string + $newick_string =~ s/\n|\r//g; + if ( $newick_string =~ /^\(.*\)|;$/ ) { + my $parser = CXGN::Phylo::Parse_newick->new( $newick_string, $do_parse_set_error ); + print "parsing tree in Tree::_tree_from_newick\n"; + my $tree = $parser->parse(); + return $tree; + } elsif ($newick_string) { + print STDERR "String passed not recognized as newick\n"; + return undef; + } } sub _newick_from_file { - my $file = shift; - open(FH, $file) or die "Can't open file: $file\n"; - my $newick = ""; - $newick .= $_ while (); - close FH; - $newick =~ s/\n//sg; - $newick =~ s/\r//sg; - $newick =~ s/\s//g; - return $newick; + my $file = shift; + open( FH, $file ) or die "Can't open file: $file\n"; + my $newick = ""; + $newick .= $_ while (); + close FH; + $newick =~ s/\n//sg; + $newick =~ s/\r//sg; + $newick =~ s/\s//g; + return $newick; } sub get_alignment { - my $self = shift; - return $self->{alignment}; + my $self = shift; + return $self->{alignment}; } sub set_alignment { - my $self = shift; - $self->{alignment} = shift; - unless(@{$self->{alignment}->{members}}){ - warn "The alignment set to the tree has no members. You must construct the alignment before setting it here"; - return -1; - } + my $self = shift; + $self->{alignment} = shift; + unless ( @{ $self->{alignment}->{members} } ) { + warn "The alignment set to the tree has no members. You must construct the alignment before setting it here"; + return -1; + } } =head2 function standard_alignment_leaf_association() @@ -242,18 +249,18 @@ sub set_alignment { =cut sub standard_alignment_leaf_association { - my $self = shift; - my $alignment = $self->get_alignment(); - return unless $alignment; - my %id2mem = (); - foreach my $m ($alignment->get_members()) { - $id2mem{$m->get_id()} = $m; - } - foreach my $l ($self->get_leaves()) { - my $m = $id2mem{$l->get_name()}; - next unless $m; - $l->set_alignment_member($m); - } + my $self = shift; + my $alignment = $self->get_alignment(); + return unless $alignment; + my %id2mem = (); + foreach my $m ( $alignment->get_members() ) { + $id2mem{ $m->get_id() } = $m; + } + foreach my $l ( $self->get_leaves() ) { + my $m = $id2mem{ $l->get_name() }; + next unless $m; + $l->set_alignment_member($m); + } } =head2 function get_root() @@ -267,8 +274,8 @@ sub standard_alignment_leaf_association { =cut -sub get_root { - my $self=shift; +sub get_root { + my $self = shift; return $self->{root}; } @@ -291,11 +298,11 @@ sub get_root { =cut sub set_root { - my $self=shift; - my $new_root=shift; - $new_root->set_parent(undef); #is_root must be true + my $self = shift; + my $new_root = shift; + $new_root->set_parent(undef); #is_root must be true $new_root->set_branch_length(undef); - $self->{root}=$new_root; + $self->{root} = $new_root; } =head2 function delete_node() and del_node() @@ -314,26 +321,27 @@ sub set_root { =cut sub delete_node { - my $self = shift; - my $node_key = shift; + my $self = shift; + my $node_key = shift; - # get the node object from the key - # - my $node=$self->get_node($node_key); - return $self->del_node($node); + # get the node object from the key + # + my $node = $self->get_node($node_key); + return $self->del_node($node); } # delete node by passing node object as argument # rather than node key as with delete_node -sub del_node{ - my $self = shift; - my $node = shift; - if (!$node) { - warn 'The node you want to delete does not exist!'; return; - } - my $retval = $node->delete_self(); - $self->recalculate_tree_data(); - return $retval; +sub del_node { + my $self = shift; + my $node = shift; + if ( !$node ) { + warn 'The node you want to delete does not exist!'; + return; + } + my $retval = $node->delete_self(); + $self->recalculate_tree_data(); + return $retval; } =head2 function recalculate_tree_data() @@ -347,11 +355,11 @@ sub del_node{ =cut -sub recalculate_tree_data { +sub recalculate_tree_data { my $self = shift; $self->calculate_leaf_list(); $self->clear_node_hash(); - $self->regenerate_node_hash($self->get_root()); + $self->regenerate_node_hash( $self->get_root() ); $self->get_root()->calculate_distances_from_root(); $self->get_root()->recursive_clear_properties(); } @@ -368,8 +376,8 @@ sub recalculate_tree_data { =cut -sub prune_to_subtree { - my $self = shift; +sub prune_to_subtree { + my $self = shift; my $new_root_node = shift; $self->set_root($new_root_node); @@ -382,7 +390,7 @@ sub prune_to_subtree { #=cut -#sub sub_branch { +#sub sub_branch { # if (0) { # my $self = shift; # my $new_root_node = shift; @@ -394,7 +402,6 @@ sub prune_to_subtree { # } #} - =head2 function reset_root() Synopsis: $tree->reset_root($node); @@ -410,41 +417,42 @@ sub prune_to_subtree { =cut sub reset_root { - my $self = shift; # tree object - my $new_root_node = shift; # node object - - if (0) { #either of these branches should work. - my @parents = $new_root_node->get_all_parents(); # parent, grandparent, etc. up to & including root - $new_root_node->set_parent(undef); # because it is to be the root - my $pc_blen = $new_root_node->get_branch_length(); # branch length between $pc and $cp - my $cp=$new_root_node; - foreach my $pc (@parents) { - my $former_p_blen = $pc->get_branch_length(); - $pc->remove_child($cp); # removes $cp from $pc's child list - $cp->add_child_node($pc); # adds $pc as child of $cp, and set $pc's parent to $cp - $pc->set_branch_length($pc_blen); - $cp = $pc; - $pc_blen = $former_p_blen; - } - } else { - my @parents_root_down = reverse $new_root_node->get_all_parents(); - push @parents_root_down, $new_root_node; # need to include the new root in the array - my $pc = shift @parents_root_down; # pc means goes from being parent to being child - - for (my $cp = shift @parents_root_down; defined $cp; $cp = shift @parents_root_down) { - my $blen = $cp->get_branch_length(); - $pc->remove_child($cp); # remove $cp from children list of $pc - $cp->set_parent(undef); - $cp->add_child_node($pc); # now $cp is parent, $pc the child - $pc->set_branch_length($blen); - $pc = $cp; - # at this point we still have a consistent tree, but with the root moved another step along the - # path from original root to new root. - } - } - $self->set_root($new_root_node); - $new_root_node->set_branch_length(0); - $self->recalculate_tree_data(); + my $self = shift; # tree object + my $new_root_node = shift; # node object + + if (0) { #either of these branches should work. + my @parents = $new_root_node->get_all_parents(); # parent, grandparent, etc. up to & including root + $new_root_node->set_parent(undef); # because it is to be the root + my $pc_blen = $new_root_node->get_branch_length(); # branch length between $pc and $cp + my $cp = $new_root_node; + foreach my $pc (@parents) { + my $former_p_blen = $pc->get_branch_length(); + $pc->remove_child($cp); # removes $cp from $pc's child list + $cp->add_child_node($pc); # adds $pc as child of $cp, and set $pc's parent to $cp + $pc->set_branch_length($pc_blen); + $cp = $pc; + $pc_blen = $former_p_blen; + } + } else { + my @parents_root_down = reverse $new_root_node->get_all_parents(); + push @parents_root_down, $new_root_node; # need to include the new root in the array + my $pc = shift @parents_root_down; # pc means goes from being parent to being child + + for ( my $cp = shift @parents_root_down ; defined $cp ; $cp = shift @parents_root_down ) { + my $blen = $cp->get_branch_length(); + $pc->remove_child($cp); # remove $cp from children list of $pc + $cp->set_parent(undef); + $cp->add_child_node($pc); # now $cp is parent, $pc the child + $pc->set_branch_length($blen); + $pc = $cp; + + # at this point we still have a consistent tree, but with the root moved another step along the + # path from original root to new root. + } + } + $self->set_root($new_root_node); + $new_root_node->set_branch_length(0); + $self->recalculate_tree_data(); } =head2 function get_leaf_count() @@ -457,10 +465,11 @@ sub reset_root { =cut -sub get_leaf_count { +sub get_leaf_count { my $self = shift; -# $self->get_root()->count_leaves(); - return scalar $self->get_leaf_list(); + + # $self->get_root()->count_leaves(); + return scalar $self->get_leaf_list(); } =head2 function get_unhidden_leaf_count() @@ -470,8 +479,8 @@ sub get_leaf_count { =cut sub get_unhidden_leaf_count { - my $self = shift; - return scalar grep { !$_->is_hidden } $self->get_leaf_list; + my $self = shift; + return scalar grep { !$_->is_hidden } $self->get_leaf_list; } =head2 function set_unique_node_key() @@ -489,9 +498,9 @@ sub get_unhidden_leaf_count { =cut -sub set_unique_node_key { - my $self = shift; - $self->{unique_node_key}=shift; +sub set_unique_node_key { + my $self = shift; + $self->{unique_node_key} = shift; } =head2 function get_unique_node_key() @@ -509,13 +518,14 @@ sub set_unique_node_key { =cut -sub get_unique_node_key { - my $self = shift; - $self->{unique_node_key}++; # increment the unique node key - while (exists $self->{node_hash}->{$self->{unique_node_key}}) { # if key already in node_hash, increment again... - $self->{unique_node_key}++; - } - return $self->{unique_node_key}; +sub get_unique_node_key { + my $self = shift; + $self->{unique_node_key}++; # increment the unique node key + while ( exists $self->{node_hash}->{ $self->{unique_node_key} } ) + { # if key already in node_hash, increment again... + $self->{unique_node_key}++; + } + return $self->{unique_node_key}; } =head2 function clear_node_hash() @@ -528,9 +538,9 @@ sub get_unique_node_key { =cut -sub clear_node_hash { +sub clear_node_hash { my $self = shift; - %{$self->{node_hash}}=(); + %{ $self->{node_hash} } = (); } =head2 function regenerate_node_hash() @@ -544,18 +554,19 @@ sub clear_node_hash { =cut -sub regenerate_node_hash { +sub regenerate_node_hash { my $self = shift; my $node = shift; - $node ||= $self->get_root(); -#print("in regenerate_node_hash. \n"); -#$node->print_node(); -#print("node key: ", $node->get_node_key()); - $self->add_node_hash($node, $node->get_node_key()); - foreach my $c ($node->get_children()) { - $self->regenerate_node_hash($c); + $node ||= $self->get_root(); + + #print("in regenerate_node_hash. \n"); + #$node->print_node(); + #print("node key: ", $node->get_node_key()); + $self->add_node_hash( $node, $node->get_node_key() ); + foreach my $c ( $node->get_children() ) { + $self->regenerate_node_hash($c); } - $self->set_unique_node_key( scalar $self->get_all_nodes() ); + $self->set_unique_node_key( scalar $self->get_all_nodes() ); } =head2 function add_node_hash() @@ -574,12 +585,12 @@ sub regenerate_node_hash { =cut -sub add_node_hash { - my $self = shift; - my $node = shift; +sub add_node_hash { + my $self = shift; + my $node = shift; my $unique_key = shift; - ${$self->{node_hash}}{$unique_key}=$node; + ${ $self->{node_hash} }{$unique_key} = $node; } =head2 function get_all_nodes() @@ -594,19 +605,19 @@ sub add_node_hash { sub get_all_nodes { my $self = shift; - return (values %{$self->{node_hash}}); + return ( values %{ $self->{node_hash} } ); } sub get_all_node_keys { my $self = shift; - return (keys %{$self->{node_hash}}); + return ( keys %{ $self->{node_hash} } ); } -sub get_node_count { +sub get_node_count { my $self = shift; - return scalar($self->get_all_nodes()); + return scalar( $self->get_all_nodes() ); } =head2 function get_node() @@ -623,28 +634,27 @@ sub get_node_count { =cut -sub get_node { - my $self = shift; - my $key = shift; - return ${$self->{node_hash}}{$key}; -} - -sub print_node_keys{ - my $self = shift; - my $hashref = $self->{node_hash}; - foreach my $k (keys (%$hashref)) { - my $n = $self->get_node($k); - if (defined $n) { - print("key, node: ", $k); $n->print_node(); - } else { - print("key: ", $k, " has undefined node (returned by get_node($k) ). \n"); - } - } -print("present value of unique_node_key: ", $self->{unique_node_key}, "\n"); +sub get_node { + my $self = shift; + my $key = shift; + return ${ $self->{node_hash} }{$key}; +} + +sub print_node_keys { + my $self = shift; + my $hashref = $self->{node_hash}; + foreach my $k ( keys(%$hashref) ) { + my $n = $self->get_node($k); + if ( defined $n ) { + print( "key, node: ", $k ); + $n->print_node(); + } else { + print( "key: ", $k, " has undefined node (returned by get_node($k) ). \n" ); + } + } + print( "present value of unique_node_key: ", $self->{unique_node_key}, "\n" ); } - - =head2 function incorporate_nodes() Given a list of nodes, add them to this tree's membership @@ -657,14 +667,14 @@ print("present value of unique_node_key: ", $self->{unique_node_key}, "\n"); =cut sub incorporate_nodes { - my $self = shift; - my @nodes = @_; - foreach my $n (@nodes) { - my $new_key = $self->get_unique_node_key(); - $n->set_tree($self); - $n->set_node_key($new_key); - $self->add_node_hash($n, $new_key); - } + my $self = shift; + my @nodes = @_; + foreach my $n (@nodes) { + my $new_key = $self->get_unique_node_key(); + $n->set_tree($self); + $n->set_node_key($new_key); + $self->add_node_hash( $n, $new_key ); + } } =head2 function incorporate_tree() @@ -674,10 +684,10 @@ Given a tree, incorporate that tree's nodes into this tree. This does not affec =cut sub incorporate_tree { - my $self = shift; - my $sub_tree = shift; - my @nodes = $sub_tree->get_root()->get_descendents(); - $self->incorporate_nodes(@nodes); + my $self = shift; + my $sub_tree = shift; + my @nodes = $sub_tree->get_root()->get_descendents(); + $self->incorporate_nodes(@nodes); } =head2 function make_binary() @@ -687,16 +697,17 @@ Inserts joint nodes at polyphetic points so that the tree is biphetic or monophe =cut sub make_binary { - my $self = shift; - my $node = shift; - $node ||= $self->get_root(); - my $new_bl = shift; - $new_bl ||= $self->get_min_branch_length(); -# warn "new bl in make_binary: $new_bl \n"; - $node->binarify_children($new_bl); - foreach($node->get_children()){ - $self->make_binary($_, $new_bl); - } + my $self = shift; + my $node = shift; + $node ||= $self->get_root(); + my $new_bl = shift; + $new_bl ||= $self->get_min_branch_length(); + + # warn "new bl in make_binary: $new_bl \n"; + $node->binarify_children($new_bl); + foreach ( $node->get_children() ) { + $self->make_binary( $_, $new_bl ); + } } =head2 function traverse() @@ -706,95 +717,183 @@ sub make_binary { Arguments: a function to be performed on each node, taking that node as its only argument Returns: nothing - Side effects: the function will be executed on each node object. - Description: not yet implemented... UPDATE: C. Carpita attempts + Side effects: The function will be executed on each node object + BEFORE descending into the node's child subtrees, i.e. preorder traversal. + Description: Synonym for preorder_traversal. =cut -sub traverse { - my $self = shift; - my $function = shift; - my $node = shift; - die "You did not pass a subroutine reference" unless (ref($function) eq "CODE"); - $node ||= $self->get_root(); +sub traverse { + my $self = shift; + my $function = shift; + my $node = shift; + die "You did not pass a subroutine reference" unless ( ref($function) eq "CODE" ); + $node ||= $self->get_root(); + + &$function($node); + foreach ( $node->get_children() ) { + $self->traverse( $function, $_ ); + } +} + +=head2 function preorder_traversal() + + Synopsis: $tree->preorder_traversal( sub{ my $node = shift; + $node->set_hidden() } ); + Arguments: a function to be performed on each node, taking + that node as its only argument + Returns: nothing + Side effects: The function will be executed on each node object. + Description: Preorder traversal. Call function on node, then descend into its child subtrees. + +=cut + +sub preorder_traversal { + my $self = shift; + my $function = shift; + my $node = shift; + die "You did not pass a subroutine reference" unless ( ref($function) eq "CODE" ); + $node ||= $self->get_root(); - &$function($node); + # print STDERR 'In traverse. before. ' . $node->get_name() . " "; + &$function($node); - foreach( $node->get_children() ){ - $self->traverse($function, $_); - } + # print STDERR "after. \n"; + foreach ( $node->get_children() ) { + $self->preorder_traversal( $function, $_ ); + } } -sub newick_shown_attributes { # just return the keys (attributes), so everything should work the same. - my $self = shift; - return keys %{$self->{newick_shown_attributes}}; +=head2 function inorder_traversal() + + Synopsis: $tree->inorder_traversal( sub{ my $node = shift; + $node->set_hidden() } ); + Arguments: a function to be performed on each node, taking + that node as its only argument + Returns: nothing + Side effects: inorder traversal. The function will be executed on each node object. + Description: inorder traversal. The function will be called on the node in between + descending to child subtrees. E.g. for binary tree, descend into left subtree, + call function on node, descend into right subtree. + +=cut + +sub inorder_traversal { + my $self = shift; + my $function = shift; + die "You did not pass a subroutine reference" unless ( ref($function) eq "CODE" ); + my $node = shift || $self->get_root(); # node is by default the root of tree. + + my @children = $node->get_children(); + if ( scalar @children > 0 ) { + my $leftmost_node = shift @children; + $self->inorder_traversal( $function, $leftmost_node ); + foreach my $child (@children) { + &$function($node); + $self->inorder_traversal( $function, $child ); + } + } else { + &$function($node); + } +} + +=head2 function postorder_traversal() + + Synopsis: $tree->postorder_traversal( sub{ my $node = shift; + $node->set_hidden() } ); + Arguments: a function to be performed on each node, taking + that node as its only argument + Returns: nothing + Side effects: The function will be executed on each node object. + Description: postorder traversal. Call function on node AFTER descending into subtrees. + +=cut + +sub postorder_traversal { + my $self = shift; + my $function = shift; + my $node = shift; + die "You did not pass a subroutine reference" unless ( ref($function) eq "CODE" ); + $node ||= $self->get_root(); # node is by default the root of tree. + + foreach ( $node->get_children() ) { + $self->postorder_traversal( $function, $_ ); + } + &$function($node); +} + +sub newick_shown_attributes { # just return list of keys (attributes), so everything should work the same. + my $self = shift; + return keys %{ $self->{newick_shown_attributes} }; } sub show_newick_attribute { - my $self = shift; - my $attr = shift; -# push(@{$self->{newick_shown_attributes}}, $attr); -$self->{newick_shown_attributes}->{$attr}++; + my $self = shift; + my $attr = shift; + + # push(@{$self->{newick_shown_attributes}}, $attr); + $self->{newick_shown_attributes}->{$attr}++; } sub unshow_newick_attribute { - my $self = shift; - my $attr = shift; + my $self = shift; + my $attr = shift; - delete $self->{newick_shown_attributes}->{$attr}; + delete $self->{newick_shown_attributes}->{$attr}; -# my $size = scalar @{$self->{newick_shown_attributes}}; -# foreach my $index (0..$size-1) { -# if ( ($self->{newick_shown_attributes})->[$index] eq $attr) { -# delete $self->{newick_shown_attributes}->[$index]; -# last; -# } -# } + # my $size = scalar @{$self->{newick_shown_attributes}}; + # foreach my $index (0..$size-1) { + # if ( ($self->{newick_shown_attributes})->[$index] eq $attr) { + # delete $self->{newick_shown_attributes}->[$index]; + # last; + # } + # } } -sub get_min_branch_length{ -my $self = shift; -return $self->{min_branch_length}; +sub get_min_branch_length { + my $self = shift; + return $self->{min_branch_length}; } -sub set_min_branch_length{ -my $self = shift; -$self->{min_branch_length} = shift; +sub set_min_branch_length { + my $self = shift; + $self->{min_branch_length} = shift; } -sub get_shown_branch_length_transformation{ -my $self = shift; -return $self->{shown_branch_length_transformation}; +sub get_shown_branch_length_transformation { + my $self = shift; + return $self->{shown_branch_length_transformation}; } -sub set_shown_branch_length_transformation{ -my $self = shift; -$self->{shown_branch_length_transformation} = shift; +sub set_shown_branch_length_transformation { + my $self = shift; + $self->{shown_branch_length_transformation} = shift; } -sub set_min_shown_branch_length{ -my $self = shift; -$self->{min_shown_branch_length} = shift; +sub set_min_shown_branch_length { + my $self = shift; + $self->{min_shown_branch_length} = shift; } -sub get_min_shown_branch_length{ -my $self = shift; -return $self->{min_shown_branch_length}; +sub get_min_shown_branch_length { + my $self = shift; + return $self->{min_shown_branch_length}; } -sub shown_branch_length_transformation_reset{ - my $self = shift; - $self->set_shown_branch_length_transformation(shift); - $self->{longest_branch_length} = undef; - $self->get_root()->calculate_distances_from_root(); +sub shown_branch_length_transformation_reset { + my $self = shift; + $self->set_shown_branch_length_transformation(shift); + $self->{longest_branch_length} = undef; + $self->get_root()->calculate_distances_from_root(); } -sub subtree_newick{ -my $self = shift; -my $subtree_root = shift || $self->get_root(); -#print "number of children: ", scalar $subtree_root->get_children(), "\n"; -my $newick_string = $subtree_root->recursive_subtree_newick(); -return $newick_string; +sub subtree_newick { + my $self = shift; + my $subtree_root = shift || $self->get_root(); + + #print "number of children: ", scalar $subtree_root->get_children(), "\n"; + my $newick_string = $subtree_root->recursive_subtree_newick(); + return $newick_string; } =head2 function generate_newick() @@ -807,15 +906,14 @@ return $newick_string; =cut sub generate_newick { - my $self = shift; - my $node = shift; + my $self = shift; + my $node = shift; my $show_root = shift; $node ||= $self->get_root(); return $node->recursive_generate_newick(''); } - =head2 function get_orthologs() Synopsis: my $ortho_trees_ref = $tree->get_orthologs(); @@ -833,53 +931,54 @@ sub generate_newick { =cut sub get_orthologs { - my $self=shift; - my $root_node = $self->get_root(); + my $self = shift; + my $root_node = $self->get_root(); - $root_node->recursive_set_leaf_count(); # set leaf_count attribute for all nodes - $root_node->recursive_set_leaf_species_count(); # set leaf_species_count attribute for all nodes - my $trees_ref = $root_node->collect_orthologs(); + $root_node->recursive_set_leaf_count(); # set leaf_count attribute for all nodes + $root_node->recursive_set_leaf_species_count(); # set leaf_species_count attribute for all nodes + my $trees_ref = $root_node->collect_orthologs(); - # can delete the leaf_count and leaf_species_count attributes here - my @node_list = $self->get_all_nodes(); - map($_->delete_attribute("leaf_count"), @node_list); - map($_->delete_attribute("leaf_species_count"), @node_list); - - return $trees_ref; + # can delete the leaf_count and leaf_species_count attributes here + my @node_list = $self->get_all_nodes(); + map( $_->delete_attribute("leaf_count"), @node_list ); + map( $_->delete_attribute("leaf_species_count"), @node_list ); + + return $trees_ref; } #This should recursively get all the subtree leaf species counts, and then run over everything again, -# comparing to the leaf counts for each species in the whole tree, to get the leaf species counts for the +# comparing to the leaf counts for each species in the whole tree, to get the leaf species counts for the # complement of each subtree. -sub set_all_subtree_and_complement_leaf_species_counts{ - my $self = shift; - my $leaf_species_count_hash = $self->get_root()->recursive_set_leaf_species_count(); - print "in set_all_subtree... ; number of species: ", scalar keys %$leaf_species_count_hash, "\n"; readline(); - $self->get_root()->recursive_set_leaf_species_count($leaf_species_count_hash); -} - -sub get_complement_ortho_group_candidates{ - my $self = shift; - my @node_list = $self->get_root()->recursive_subtree_node_list(); - foreach my $n (@node_list) { - my $comp_leaf_count = $self->get_root()->get_attribute("leaf_count") - $n->get_attribute("leaf_count"); - my $comp_leaf_species_count = $n->get_attribute("comp_leaf_species_count"); - if ($comp_leaf_count == $comp_leaf_species_count && $comp_leaf_count >1) { - print "complement to subtree : ", $n->get_name(), " is a og candidate \n"; - print "with $comp_leaf_count leaves and $comp_leaf_species_count leaf species \n"; - } - } +sub set_all_subtree_and_complement_leaf_species_counts { + my $self = shift; + my $leaf_species_count_hash = $self->get_root()->recursive_set_leaf_species_count(); + print "in set_all_subtree... ; number of species: ", scalar keys %$leaf_species_count_hash, "\n"; + readline(); + $self->get_root()->recursive_set_leaf_species_count($leaf_species_count_hash); +} + +sub get_complement_ortho_group_candidates { + my $self = shift; + my @node_list = $self->get_root()->recursive_subtree_node_list(); + foreach my $n (@node_list) { + my $comp_leaf_count = $self->get_root()->get_attribute("leaf_count") - $n->get_attribute("leaf_count"); + my $comp_leaf_species_count = $n->get_attribute("comp_leaf_species_count"); + if ( $comp_leaf_count == $comp_leaf_species_count && $comp_leaf_count > 1 ) { + print "complement to subtree : ", $n->get_name(), " is a og candidate \n"; + print "with $comp_leaf_count leaves and $comp_leaf_species_count leaf species \n"; + } + } } - -sub get_leaf_parents_list { +sub get_leaf_parents_list { my $self = shift; - foreach my $leaf ($self->get_leaf_list()) { - my $parent = $leaf->get_parent(); - ${$self->{leaf_parent_hash}}{$parent->get_node_key()}=$parent; + foreach my $leaf ( $self->get_leaf_list() ) { + my $parent = $leaf->get_parent(); + ${ $self->{leaf_parent_hash} }{ $parent->get_node_key() } = $parent; } + # return the parents as a neat list - return map (${$self->{leaf_parent_hash}}{$_}, keys(%{$self->{leaf_parent_hash}})); + return map ( ${ $self->{leaf_parent_hash} }{$_}, keys( %{ $self->{leaf_parent_hash} } ) ); } # helper functions that deal with the leaf list. It contains a list of nodes @@ -897,10 +996,10 @@ sub get_leaf_parents_list { =cut -sub get_leaf_list { - my $self=shift; - if (!exists($self->{leaf_list}) || !@{$self->{leaf_list}}) { $self->calculate_leaf_list(); } - return @{$self->{leaf_list}}; +sub get_leaf_list { + my $self = shift; + if ( !exists( $self->{leaf_list} ) || !@{ $self->{leaf_list} } ) { $self->calculate_leaf_list(); } + return @{ $self->{leaf_list} }; } =head2 get_leaves @@ -910,44 +1009,42 @@ sub get_leaf_list { =cut sub get_leaves { - my $self = shift; - return $self->get_leaf_list(); + my $self = shift; + return $self->get_leaf_list(); } -sub add_leaf_list { - my $self = shift; +sub add_leaf_list { + my $self = shift; my $leaf_node = shift; - push @{$self->{leaf_list}}, $leaf_node; + push @{ $self->{leaf_list} }, $leaf_node; } -sub clear_leaf_list { +sub clear_leaf_list { my $self = shift; - @{$self->{leaf_list}}=(); + @{ $self->{leaf_list} } = (); } -sub calculate_leaf_list { +sub calculate_leaf_list { my $self = shift; $self->clear_leaf_list(); - my @leaf_list = $self->get_root()->recursive_leaf_list(); - foreach my $leaf (@leaf_list) { - $self->add_leaf_list($leaf); + my @leaf_list = $self->get_root()->recursive_leaf_list(); + foreach my $leaf (@leaf_list) { + $self->add_leaf_list($leaf); } } - - -# the tree_topology_changed member variable contains the status of the -# topology of the tree. If the tree has been changed, it should be 1, -# otherwise it should be 0. +# the tree_topology_changed member variable contains the status of the +# topology of the tree. If the tree has been changed, it should be 1, +# otherwise it should be 0. # -sub get_tree_topology_changed { +sub get_tree_topology_changed { my $self = shift; return $self->{tree_topology_changed}; } -sub _set_tree_topology_changed { +sub _set_tree_topology_changed { my $self = shift; - $self->{tree_topology_changed}=shift; + $self->{tree_topology_changed} = shift; } =head2 function get_name() @@ -960,8 +1057,8 @@ sub _set_tree_topology_changed { =cut -sub get_name { - my $self=shift; +sub get_name { + my $self = shift; return $self->{name}; } @@ -976,9 +1073,9 @@ sub get_name { =cut -sub set_name { - my $self=shift; - $self->{name}=shift; +sub set_name { + my $self = shift; + $self->{name} = shift; } =head2 function get_longest_root_leaf_length() @@ -991,13 +1088,14 @@ sub set_name { =cut -sub get_longest_root_leaf_length { - my $self=shift; - if (!$self->{longest_branch_length}) { - $self->set_longest_root_leaf_length($self->calculate_longest_root_leaf_length()); - } -# print "in get_longest_root_leaf_length: ", $self->{longest_branch_length}, "\n"; - return $self->{longest_branch_length}; +sub get_longest_root_leaf_length { + my $self = shift; + if ( !$self->{longest_branch_length} ) { + $self->set_longest_root_leaf_length( $self->calculate_longest_root_leaf_length() ); + } + + # print "in get_longest_root_leaf_length: ", $self->{longest_branch_length}, "\n"; + return $self->{longest_branch_length}; } =head2 function set_longest_root_leaf_length() @@ -1012,21 +1110,21 @@ sub get_longest_root_leaf_length { =cut -sub set_longest_root_leaf_length { - my $self=shift; - $self->{longest_branch_length}=shift; +sub set_longest_root_leaf_length { + my $self = shift; + $self->{longest_branch_length} = shift; } sub calculate_longest_root_leaf_length { - my $self=shift; - my $largest = 0; - foreach my $leaf ($self->get_leaf_list()) { - my $dist = $leaf->get_dist_from_root(); - if ($dist > $largest) { - $largest=$dist; - } - } - return $largest; + my $self = shift; + my $largest = 0; + foreach my $leaf ( $self->get_leaf_list() ) { + my $dist = $leaf->get_dist_from_root(); + if ( $dist > $largest ) { + $largest = $dist; + } + } + return $largest; } =head2 function retrieve_longest_branch_node() @@ -1039,15 +1137,12 @@ sub calculate_longest_root_leaf_length { =cut -sub retrieve_longest_branch_node { - my $self=shift; - my $longest_branch_node = $self->get_root()->_recursive_longest_branch_node(CXGN::Phylo::Node->new()); +sub retrieve_longest_branch_node { + my $self = shift; + my $longest_branch_node = $self->get_root()->_recursive_longest_branch_node( CXGN::Phylo::Node->new() ); return $longest_branch_node; } - - - =head2 APPEARANCE OF THE TREE =head2 function get_show_labels() @@ -1060,8 +1155,8 @@ sub retrieve_longest_branch_node { =cut -sub get_show_labels { - my $self=shift; +sub get_show_labels { + my $self = shift; return $self->{show_labels}; } @@ -1076,22 +1171,32 @@ sub get_show_labels { =cut -sub set_show_labels { - my $self=shift; - $self->{show_labels}=shift; +sub set_show_labels { + my $self = shift; + $self->{show_labels} = shift; } - -sub get_show_species_in_label{ +sub get_show_species_in_label { my $self = shift; return $self->{show_species_in_labels}; } -sub set_show_species_in_label{ +sub set_show_species_in_label { my $self = shift; $self->{show_species_in_labels} = shift; } +sub get_id_taxon_map{ + my $self = shift; + return $self->{'id_taxon_map'}; +} + +sub set_id_taxon_map{ + my $self = shift; + my $id_taxon_map = shift; + $self->{'id_taxon_map'} = $id_taxon_map; + return; +} =head2 accessors get_line_color(), set_line_color() @@ -1104,17 +1209,16 @@ sub set_show_species_in_label{ =cut -sub get_line_color { - my $self=shift; - return @{$self->{line_color}}; +sub get_line_color { + my $self = shift; + return @{ $self->{line_color} }; } -sub set_line_color { - my $self=shift; - @{$self->{line_color}}=@_; +sub set_line_color { + my $self = shift; + @{ $self->{line_color} } = @_; } - =head2 accessors get_bgcolor(), set_bgcolor() Synopsis: $tree->set_bgcolor(255, 255, 255); @@ -1125,14 +1229,14 @@ sub set_line_color { =cut -sub get_bgcolor { - my $self=shift; - return @{$self->{bgcolor}}; +sub get_bgcolor { + my $self = shift; + return @{ $self->{bgcolor} }; } -sub set_bgcolor { - my $self=shift; - @{$self->{bgcolor}}=@_; +sub set_bgcolor { + my $self = shift; + @{ $self->{bgcolor} } = @_; } =head2 accessors get_hilite_color(), set_hilite_color() @@ -1145,14 +1249,14 @@ sub set_bgcolor { =cut -sub get_hilite_color { - my $self=shift; - return @{$self->{hilite_color}}; +sub get_hilite_color { + my $self = shift; + return @{ $self->{hilite_color} }; } -sub set_hilite_color { - my $self=shift; - @{$self->{hilite_color}}=@_; +sub set_hilite_color { + my $self = shift; + @{ $self->{hilite_color} } = @_; } =head2 function get_node_by_name() @@ -1169,48 +1273,48 @@ sub set_hilite_color { =cut -sub get_node_by_name { +sub get_node_by_name { my $self = shift; my $name = shift; - foreach my $n ($self->get_all_nodes()){ - return $n if ($n->get_name() eq $name); - } - return undef; + foreach my $n ( $self->get_all_nodes() ) { + return $n if ( $n->get_name() eq $name ); + } + return undef; } #returns a list of nodes matching a certain reg expression depending on the argument -sub search_node_name { - my $self = shift; - my $term = shift; +sub search_node_name { + my $self = shift; + my $term = shift; my @nodes = (); - foreach my $n ($self->get_all_nodes()) { - my $node_name = $n->get_name(); - if ($node_name =~ /\Q$term\E/i) { - push @nodes, $n; - } + foreach my $n ( $self->get_all_nodes() ) { + my $node_name = $n->get_name(); + if ( $node_name =~ /\Q$term\E/i ) { + push @nodes, $n; + } } return @nodes; } #returns a list of nodes matching a certain reg expression depending on the argument -sub search_label_name { - my $self = shift; - my $term = shift; - my @nodes = (); - foreach my $n ($self->get_all_nodes()) { - my $label_name = $n->get_label()->get_name(); - if ($term =~ m/m\/(.*)\//) { # if enter m/stuff/ then treat stuff as perl regex - my $match = $1; - if ($match && $label_name =~ /$match/) { - push @nodes, $n; - } - } else { - if ($term && $label_name =~ /\Q$term\E/i) { - push @nodes, $n; - } - } - } - return @nodes; +sub search_label_name { + my $self = shift; + my $term = shift; + my @nodes = (); + foreach my $n ( $self->get_all_nodes() ) { + my $label_name = $n->get_label()->get_name(); + if ( $term =~ m/m\/(.*)\// ) { # if enter m/stuff/ then treat stuff as perl regex + my $match = $1; + if ( $match && $label_name =~ /$match/ ) { + push @nodes, $n; + } + } else { + if ( $term && $label_name =~ /\Q$term\E/i ) { + push @nodes, $n; + } + } + } + return @nodes; } =head2 function compare() @@ -1261,12 +1365,13 @@ Author: Tom York =cut -sub compare_rooted{ - my $self = shift; - my $other_tree = shift; -my $compare_field = shift; -# print STDOUT "in compare_rooted. compare_field: $compare_field \n"; - return $self->get_root()->compare_subtrees($other_tree->get_root(), $compare_field); +sub compare_rooted { + my $self = shift; + my $other_tree = shift; + my $compare_field = shift; + + # print STDOUT "in compare_rooted. compare_field: $compare_field \n"; + return $self->get_root()->compare_subtrees( $other_tree->get_root(), $compare_field ); } =head2 function compare_unrooted @@ -1286,35 +1391,35 @@ my $compare_field = shift; =cut -sub compare_unrooted { - my $self = shift; - my $other_tree = shift; - my $compare_field = shift; # to control comparison of names (default) or species ("species") - # copy the trees into temporary trees, so that the trees can - # be manipulated (rerooted, collapsed) without changing the original trees. - # - # print STDOUT "in compare_unrooted. compare_field: $compare_feld \n"; - my $tree1 = $self->copy(); - my $tree2 = $other_tree->copy(); - - # find a leaf - any leaf - of tree1 and the corresponding leaf (i.e. with the same name) of tree2 - - my $leaf1 = $tree1->get_root()->recursive_get_a_leaf(); - my $corresponding_leaf = $tree2->get_node_by_name($leaf1->get_name()); - - if (!$corresponding_leaf) { - print("in compare_unrooted. leaf1 name: ", $leaf1->get_name(), ". Can't find corresponding leaf in other tree. \n"); - return 0; - } +sub compare_unrooted { + my $self = shift; + my $other_tree = shift; + my $compare_field = shift; # to control comparison of names (default) or species ("species") + # copy the trees into temporary trees, so that the trees can + # be manipulated (rerooted, collapsed) without changing the original trees. + # + # print STDOUT "in compare_unrooted. compare_field: $compare_feld \n"; + my $tree1 = $self->copy(); + my $tree2 = $other_tree->copy(); + + # find a leaf - any leaf - of tree1 and the corresponding leaf (i.e. with the same name) of tree2 + + my $leaf1 = $tree1->get_root()->recursive_get_a_leaf(); + my $corresponding_leaf = $tree2->get_node_by_name( $leaf1->get_name() ); + + if ( !$corresponding_leaf ) { + print( "in compare_unrooted. leaf1 name: ", + $leaf1->get_name(), ". Can't find corresponding leaf in other tree. \n" ); + return 0; + } - # reset roots of trees to the two corresponding leaves: - $tree1->reset_root($leaf1); - $tree2->reset_root($corresponding_leaf); + # reset roots of trees to the two corresponding leaves: + $tree1->reset_root($leaf1); + $tree2->reset_root($corresponding_leaf); - return $tree1->get_root()->compare_subtrees($tree2->get_root(), $compare_field); + return $tree1->get_root()->compare_subtrees( $tree2->get_root(), $compare_field ); } - =head2 function get_layout(), set_layout() Synopsis: $tree->set_layout($layout) @@ -1326,17 +1431,16 @@ sub compare_unrooted { =cut -sub get_layout { - my $self=shift; +sub get_layout { + my $self = shift; return $self->{layout}; } -sub set_layout { - my $self=shift; - $self->{layout}=shift; +sub set_layout { + my $self = shift; + $self->{layout} = shift; } - =head2 function layout() Synopsis: $tree->layout() @@ -1348,13 +1452,11 @@ sub set_layout { =cut - -sub layout { +sub layout { my $self = shift; $self->get_layout()->layout(); } - =head2 accessors get_renderer(), set_renderer() Synopsis: $tree->set_renderer($renderer) @@ -1365,14 +1467,14 @@ sub layout { =cut -sub get_renderer { - my $self=shift; +sub get_renderer { + my $self = shift; return $self->{renderer}; } -sub set_renderer { - my $self=shift; - $self->{renderer}=shift; +sub set_renderer { + my $self = shift; + $self->{renderer} = shift; } =head2 function render() @@ -1388,21 +1490,21 @@ sub set_renderer { =cut -sub render { - my $self = shift; - my $print_all_labels=shift; +sub render { + my $self = shift; + my $print_all_labels = shift; $self->get_renderer()->render($print_all_labels); } sub standard_layout { - my $self = shift; - my $layout = CXGN::Phylo::Layout->new($self); - $layout->set_top_margin(20); - $layout->set_bottom_margin(20); - $layout->set_image_height(400); - $layout->set_image_width(700); - $self->set_layout($layout); - $self->layout(); + my $self = shift; + my $layout = CXGN::Phylo::Layout->new($self); + $layout->set_top_margin(20); + $layout->set_bottom_margin(20); + $layout->set_image_height(400); + $layout->set_image_width(700); + $self->set_layout($layout); + $self->layout(); } =head2 function render_png() @@ -1417,19 +1519,18 @@ sub standard_layout { =cut -sub render_png { - my $self = shift; - my $file = shift; - my $print_all_labels= shift; ## Boolean for printing non-leaf node labels +sub render_png { + my $self = shift; + my $file = shift; + my $print_all_labels = shift; ## Boolean for printing non-leaf node labels $self->layout(); my $png_string = $self->render($print_all_labels); - if(defined $file){ - open (my $T, ">$file") || die "PNG_tree_renderer: render_png(): Can't open file $file."; - print $T $png_string; - close $T ; - } - else { - return $png_string; + if ( defined $file ) { + open( my $T, ">$file" ) || die "PNG_tree_renderer: render_png(): Can't open file $file."; + print $T $png_string; + close $T; + } else { + return $png_string; } } @@ -1445,33 +1546,43 @@ sub render_png { sub collapse_tree { my $self = shift; + # first, collapse all nodes that have only one child onto the # parent node # -#print STDERR "before rec..coll...single_nodes\n"; + # print STDERR "Z0 BRANCHLENGTHS: ", join( ";", map( $_->get_branch_length(), $self->get_leaf_list() ) ), "\n"; + + #print STDERR "before rec..coll...single_nodes\n"; + # print STDERR "newick: ", $self->generate_newick(), "\n"; $self->get_root()->recursive_collapse_single_nodes(); -#print STDERR "after rec..coll...single_nodes\n"; +# print STDERR "Z BRANCHLENGTHS: ", join( ";", map( $_->get_branch_length(), $self->get_leaf_list() ) ), "\n"; + + #print STDERR "after rec..coll...single_nodes\n"; $self->recalculate_tree_data(); +# print STDERR "ZZ BRANCHLENGTHS: ", join( ";", map( $_->get_branch_length(), $self->get_leaf_list() ) ), "\n"; - # then, collapse all nodes that have branch lengths of zero + # then, collapse all nodes that have branch lengths of zero # with their parent node # -#print STDERR "before rec..coll...zero_branches\n"; + #print STDERR "before rec..coll...zero_branches\n"; $self->get_root()->recursive_collapse_zero_branches(); -#print STDERR "after rec..coll...zero_branches\n"; + + #print STDERR "after rec..coll...zero_branches\n"; +# print STDERR "ZZZ BRANCHLENGTHS: ", join( ";", map( $_->get_branch_length(), $self->get_leaf_list() ) ), "\n"; # let's re-calculate the tree's properties # $self->recalculate_tree_data(); } -sub collapse_unique_species_subtrees { +sub collapse_unique_species_subtrees { my $self = shift; - # calculate, for each node, how many nodes are beneath it. + + # calculate, for each node, how many nodes are beneath it. # This information can then be accessed using the # $node-> get_subtree_node_count() function. # - # $self->get_root()->calculate_subtree_node_count(); + # $self->get_root()->calculate_subtree_node_count(); # calculate, for each node, how many different species are in the leaves of the subtree beneath it. # @@ -1495,12 +1606,12 @@ sub collapse_unique_species_subtrees { =cut -sub find_point_furthest_from_leaves{ - my $self = shift; - $self->set_min_dist_to_leaf(); - my @furthest_point = $self->get_root()->recursive_find_point_furthest_from_leaves(); - $furthest_point[1] -= $furthest_point[0]->get_attribute("min_dist_to_leaf"); - return @furthest_point; +sub find_point_furthest_from_leaves { + my $self = shift; + $self->set_min_dist_to_leaf(); + my @furthest_point = $self->get_root()->recursive_find_point_furthest_from_leaves(); + $furthest_point[1] -= $furthest_point[0]->get_attribute("min_dist_to_leaf"); + return @furthest_point; } =head2 function find_point_closest_to_furthest_leaf() @@ -1517,37 +1628,37 @@ sub find_point_furthest_from_leaves{ =cut # returns a list containing a node object, and the distance of the point above that node -sub find_point_closest_to_furthest_leaf{ - my $self = shift; - $self->get_root()->recursive_set_max_dist_to_leaf_in_subtree(); - - my @nodes = $self->get_root()->recursive_subtree_node_list(); - push @nodes, $self->get_root(); # we want the root in our list - - my @sorted_nodes = sort - { $a->get_max_leaf_leaf_pathlength_in_subtree_thru_node() - <=> - $b->get_max_leaf_leaf_pathlength_in_subtree_thru_node() } - @nodes; - - # using attribute "lptl_child" (longest path to leaf child) follow the longest path to leaf, - # until you reach the midpoint of the longest leaf to leaf path - my $current_node = pop @sorted_nodes; - my $distance_to_go = 0.5*($current_node->get_attribute("dist_to_leaf_longest") - $current_node->get_attribute("dist_to_leaf_next_longest")); - for (;;) { - my $next_node = $current_node->get_attribute("lptl_child"); - my $branch_length = $next_node->get_branch_length(); - if ($branch_length >= $distance_to_go) { - return ($next_node, $branch_length - $distance_to_go); - } else { - $distance_to_go -= $branch_length; - $current_node = $next_node; - } - } +sub find_point_closest_to_furthest_leaf { + my $self = shift; + $self->get_root()->recursive_set_max_dist_to_leaf_in_subtree(); + + my @nodes = $self->get_root()->recursive_subtree_node_list(); + push @nodes, $self->get_root(); # we want the root in our list + + my @sorted_nodes = sort { + $a->get_max_leaf_leaf_pathlength_in_subtree_thru_node() + <=> $b->get_max_leaf_leaf_pathlength_in_subtree_thru_node() + } @nodes; + + # using attribute "lptl_child" (longest path to leaf child) follow the longest path to leaf, + # until you reach the midpoint of the longest leaf to leaf path + my $current_node = pop @sorted_nodes; + my $distance_to_go = + 0.5 * + ( $current_node->get_attribute("dist_to_leaf_longest") - + $current_node->get_attribute("dist_to_leaf_next_longest") ); + for ( ; ; ) { + my $next_node = $current_node->get_attribute("lptl_child"); + my $branch_length = $next_node->get_branch_length(); + if ( $branch_length >= $distance_to_go ) { + return ( $next_node, $branch_length - $distance_to_go ); + } else { + $distance_to_go -= $branch_length; + $current_node = $next_node; + } + } } - - =head2 function reset_root_to_point_on_branch() Synopsis: $t->reset_root_to_point_on_branch($anode, $distance) @@ -1561,16 +1672,14 @@ sub find_point_closest_to_furthest_leaf{ Author: Tom York. =cut -sub reset_root_to_point_on_branch{ - my $self = shift; - my ($child_of_new_node, $dist_above) = @_; - - my $new_node = $child_of_new_node->add_parent($dist_above); # goes - my $former_root = $self->get_root(); - $self->reset_root($new_node); - - $self->collapse_tree(); +sub reset_root_to_point_on_branch { + my $self = shift; + my ( $node, $dist_above ) = @_; + return if ( $node->is_root() ); # if desired root node is already the root do nothing. + my $new_node = $node->add_parent($dist_above); # goes + $self->reset_root($new_node); + $self->collapse_tree(); } =head2 function set_min_dist_to_leaf() @@ -1586,13 +1695,12 @@ sub reset_root_to_point_on_branch{ =cut -sub set_min_dist_to_leaf{ - my $self = shift; - $self->get_root()->recursive_set_min_dist_to_leaf(); - $self->get_root()->recursive_propagate_mdtl(); +sub set_min_dist_to_leaf { + my $self = shift; + $self->get_root()->recursive_set_min_dist_to_leaf(); + $self->get_root()->recursive_propagate_mdtl(); } - =head2 function min_leaf_dist_variance_point() Synopsis: $t->min_leaf_dist_variance_point() @@ -1609,29 +1717,28 @@ sub set_min_dist_to_leaf{ =cut -sub min_leaf_dist_variance_point{ - my $self = shift; +sub min_leaf_dist_variance_point { + my $self = shift; - $self->get_root()->recursive_set_dl_dlsqr_sums_down(); - $self->get_root()->recursive_set_dl_dlsqr_sums_up(); + $self->get_root()->recursive_set_dl_dlsqr_sums_down(); + $self->get_root()->recursive_set_dl_dlsqr_sums_up(); - my @node_list = $self->get_root()->recursive_subtree_node_list(); - my $opt_node = shift @node_list; - my ($opt_dist_above, $opt_var) = $opt_node->min_leaf_dist_variance_point(); + my @node_list = $self->get_root()->recursive_subtree_node_list(); + my $opt_node = shift @node_list; + my ( $opt_dist_above, $opt_var ) = $opt_node->min_leaf_dist_variance_point(); - foreach my $n (@node_list) { - my ($da, $var) = $n->min_leaf_dist_variance_point(); - if ($var < $opt_var) { - $opt_node = $n; - $opt_dist_above = $da; - $opt_var = $var; - } - } - $self->get_root()->recursive_delete_dl_dlsqr_attributes(); - return ($opt_node, $opt_dist_above, $opt_var); + foreach my $n (@node_list) { + my ( $da, $var ) = $n->min_leaf_dist_variance_point(); + if ( $var < $opt_var ) { + $opt_node = $n; + $opt_dist_above = $da; + $opt_var = $var; + } + } + $self->get_root()->recursive_delete_dl_dlsqr_attributes(); + return ( $opt_node, $opt_dist_above, $opt_var ); } - =head2 function test_tree_node_hash() Synopsis: $t->test_tree_node_hash() @@ -1656,34 +1763,35 @@ sub min_leaf_dist_variance_point{ =cut -sub test_tree_node_hash{ - my $ok1 = 1; my $ok2 = 1; my $ok3 = 1; - my $self = shift; - my $node_hashref = $self->{node_hash}; - my $root = $self->get_root(); - my @node_list = $root->recursive_subtree_node_list(); - push @node_list, $root; - my %nodekeys; - - foreach my $n (@node_list) { # test that each node in this list is found in the tree's node hash - my $node_key = $n->get_node_key(); - $nodekeys{$node_key}++; - if (!defined $node_hashref->{$node_key}) { # a node in node_list is not in the hash. - $ok1 = 0; - } - } - - if (scalar keys %nodekeys != scalar @node_list) { # test that each node in node_list has a distinct key - $ok2 = 0; - } - - my @node_keys = keys (%$node_hashref); # test that each key in node hash is - if (scalar @node_keys != scalar @node_list) { - $ok3 = 0; - } - return $ok1*$ok2*$ok3; -} +sub test_tree_node_hash { + my $ok1 = 1; + my $ok2 = 1; + my $ok3 = 1; + my $self = shift; + my $node_hashref = $self->{node_hash}; + my $root = $self->get_root(); + my @node_list = $root->recursive_subtree_node_list(); + push @node_list, $root; + my %nodekeys; + + foreach my $n (@node_list) { # test that each node in this list is found in the tree's node hash + my $node_key = $n->get_node_key(); + $nodekeys{$node_key}++; + if ( !defined $node_hashref->{$node_key} ) { # a node in node_list is not in the hash. + $ok1 = 0; + } + } + if ( scalar keys %nodekeys != scalar @node_list ) { # test that each node in node_list has a distinct key + $ok2 = 0; + } + + my @node_keys = keys(%$node_hashref); # test that each key in node hash is + if ( scalar @node_keys != scalar @node_list ) { + $ok3 = 0; + } + return $ok1 * $ok2 * $ok3; +} =head2 function test_tree_parents_and_children() @@ -1697,67 +1805,70 @@ sub test_tree_node_hash{ =cut -sub test_tree_parents_and_children{ - my $self = shift; - my $ok1 = $self->test_tree_nodes_are_parents_of_their_children(); - my $ok2 = $self->test_tree_nodes_are_children_of_their_parents(); - return ($ok1 && $ok2); +sub test_tree_parents_and_children { + my $self = shift; + my $ok1 = $self->test_tree_nodes_are_parents_of_their_children(); + my $ok2 = $self->test_tree_nodes_are_children_of_their_parents(); + return ( $ok1 && $ok2 ); } -sub test_tree{ - my $self = shift; - return $self->test_tree_node_hash() && $self->test_tree_parents_and_children(); +sub test_tree { + my $self = shift; + return $self->test_tree_node_hash() && $self->test_tree_parents_and_children(); } # tests that for all nodes n, each child of n has n as its parent. -sub test_tree_nodes_are_parents_of_their_children{ - my $self = shift; - my $root = $self->get_root(); - my @node_list = $root->recursive_subtree_node_list(); - push @node_list, $root; - my $ok = 1; - - foreach my $n (@node_list) { - my @children = $n->get_children(); - my $node_key = $n->get_node_key(); - foreach my $c (@children) { - if(! defined $c->get_parent()){ - print("child node has undefined parent. \n"); $n->print_node(); $c->print_node(); - $ok = 0; - } elsif ($c->get_parent()->get_node_key() != $node_key) { - print("child node has wrong parent. \n"); $n->print_node(); $c->print_node(); - $ok = 0; - } - } - } - return $ok; +sub test_tree_nodes_are_parents_of_their_children { + my $self = shift; + my $root = $self->get_root(); + my @node_list = $root->recursive_subtree_node_list(); + push @node_list, $root; + my $ok = 1; + + foreach my $n (@node_list) { + my @children = $n->get_children(); + my $node_key = $n->get_node_key(); + foreach my $c (@children) { + if ( !defined $c->get_parent() ) { + print("child node has undefined parent. \n"); + $n->print_node(); + $c->print_node(); + $ok = 0; + } elsif ( $c->get_parent()->get_node_key() != $node_key ) { + print("child node has wrong parent. \n"); + $n->print_node(); + $c->print_node(); + $ok = 0; + } + } + } + return $ok; } - # tests that for all nodes n, that if n has a parent, then n is among the children of that parent. -sub test_tree_nodes_are_children_of_their_parents{ - my $self = shift; - my $root = $self->get_root(); - my @node_list = $root->recursive_subtree_node_list(); - push @node_list, $root; - my $ok = 1; - - foreach my $n (@node_list) { # test that $n is among the children of its parent - my $p = $n->get_parent(); - if (defined $p) { # if not defined, do no test for this node - my @children = $p->get_children(); - my $this_n_ok = 0; - foreach my $c (@children) { - if ($c->get_node_key() == $n->get_node_key()) { - $this_n_ok = 1; - last; - } - } - if(! $this_n_ok){ print("This node not among the children of its parent: \n"), $n->print_node(); } - $ok &&= $this_n_ok; - } - } - return $ok; +sub test_tree_nodes_are_children_of_their_parents { + my $self = shift; + my $root = $self->get_root(); + my @node_list = $root->recursive_subtree_node_list(); + push @node_list, $root; + my $ok = 1; + + foreach my $n (@node_list) { # test that $n is among the children of its parent + my $p = $n->get_parent(); + if ( defined $p ) { # if not defined, do no test for this node + my @children = $p->get_children(); + my $this_n_ok = 0; + foreach my $c (@children) { + if ( $c->get_node_key() == $n->get_node_key() ) { + $this_n_ok = 1; + last; + } + } + if ( !$this_n_ok ) { print("This node not among the children of its parent: \n"), $n->print_node(); } + $ok &&= $this_n_ok; + } + } + return $ok; } =head2 function orthologs() @@ -1780,31 +1891,32 @@ sub test_tree_nodes_are_children_of_their_parents{ =cut -sub orthologs{ - my $self = shift; # tree - my $species_t = shift; # species tree; if undefined, - my $cssst = shift; # switch to collapse single-species subtrees to a single node - my $qRFd_max = shift; - if (!defined $qRFd_max) { - $qRFd_max = 0; - } - if ($cssst) { - $self->collapse_unique_species_subtrees(); - } - # should we collapse the tree here? - my $ortho_trees_ref = $self->get_orthologs(); - my @ortho_groups=(); # a list of Ortholog_group object that will contain the results - - # go through all the ortho_trees and compare to the species tree - # - foreach my $ortho_t (@$ortho_trees_ref) { - my $ortho_group = CXGN::Phylo::Ortholog_group->new($ortho_t, $species_t, $qRFd_max); - if ($ortho_group->get_ortholog_tree()->get_leaf_count()>1) { - push @ortho_groups, $ortho_group; - } - } # end of foreach my $ortho_t (@$ortho_trees_ref) { - return @ortho_groups; -} # end of sub orthologs +sub orthologs { + my $self = shift; # tree + my $species_t = shift; # species tree; if undefined, + my $cssst = shift; # switch to collapse single-species subtrees to a single node + my $qRFd_max = shift; + if ( !defined $qRFd_max ) { + $qRFd_max = 0; + } + if ($cssst) { + $self->collapse_unique_species_subtrees(); + } + + # should we collapse the tree here? + my $ortho_trees_ref = $self->get_orthologs(); + my @ortho_groups = (); # a list of Ortholog_group object that will contain the results + + # go through all the ortho_trees and compare to the species tree + # + foreach my $ortho_t (@$ortho_trees_ref) { + my $ortho_group = CXGN::Phylo::Ortholog_group->new( $ortho_t, $species_t, $qRFd_max ); + if ( $ortho_group->get_ortholog_tree()->get_leaf_count() > 1 ) { + push @ortho_groups, $ortho_group; + } + } # end of foreach my $ortho_t (@$ortho_trees_ref) { + return @ortho_groups; +} # end of sub orthologs =head2 function set_missing_species_from_names() @@ -1822,14 +1934,15 @@ sub orthologs{ =cut -sub set_missing_species_from_names{ - my $self = shift; - foreach my $n ($self->get_leaf_list()) { - # print("defined \$n->get_species():{", defined $n->get_species(), "} ,\$n->get_species():{", $n->get_species(), "}\n"); - if (!$n->get_species()) { - $n->set_species($n->determine_species_from_name()); - } - } +sub set_missing_species_from_names { + my $self = shift; + foreach my $n ( $self->get_leaf_list() ) { + +# print("defined \$n->get_species():{", defined $n->get_species(), "} ,\$n->get_species():{", $n->get_species(), "}\n"); + if ( !$n->get_species() ) { + $n->set_species( $n->determine_species_from_name() ); + } + } } =head2 function impose_branch_length_minimum() @@ -1845,38 +1958,37 @@ sub set_missing_species_from_names{ =cut -sub impose_branch_length_minimum{ - my $self = shift; - my $minimum_bl = shift; - $minimum_bl ||= $self->get_min_branch_length(); - foreach my $n ($self->get_all_nodes()) { - unless (defined $n->get_branch_length() and $n->get_branch_length() > $minimum_bl) { - $n->set_branch_length($minimum_bl); - } - } - $self->get_root()->set_branch_length(0.0); # leave this at 0 +sub impose_branch_length_minimum { + my $self = shift; + my $minimum_bl = shift; + $minimum_bl ||= $self->get_min_branch_length(); + foreach my $n ( $self->get_all_nodes() ) { + unless ( defined $n->get_branch_length() and $n->get_branch_length() > $minimum_bl ) { + $n->set_branch_length($minimum_bl); + } + } + $self->get_root()->set_branch_length(0.0); # leave this at 0 } - -sub set_show_standard_species{ - my $self = shift; - $self->{show_standard_species} = shift; -} -sub get_show_standard_species{ - my $self = shift; - return $self->{show_standard_species}; +sub set_show_standard_species { + my $self = shift; + $self->{show_standard_species} = shift; } -sub set_species_standardizer{ - my $self = shift; - $self->{species_standardizer} = shift; +sub get_show_standard_species { + my $self = shift; + return $self->{show_standard_species}; } -sub get_species_standardizer{ - my $self = shift; - return $self->{species_standardizer}; +sub set_species_standardizer { + my $self = shift; + $self->{species_standardizer} = shift; } +sub get_species_standardizer { + my $self = shift; + return $self->{species_standardizer}; +} =head2 function update_label_names() @@ -1889,21 +2001,24 @@ sub get_species_standardizer{ =cut -sub update_label_names{ - my $self = shift; - my $show_spec = $self->get_show_species_in_label(); -foreach my $n ($self->get_all_nodes()) { -# print "node: ", $n->get_name(), " impl names: ", join("\t", @{$n->get_implicit_names()}), "\n"; - my $n_leaves = scalar @{$n->get_implicit_names()}; - my $label_text = $n->get_name(); - # print STDERR "in update_label_names. $n_leaves, [", $n->get_name(), "][", $label_text, "] \n"; - if ($show_spec) { - my $species_text = $n->get_shown_species(); - # print STDERR "species text: ", $n->get_shown_species(), " is leaf:[", $n->is_leaf(), "]\n"; - $label_text .= " [".$species_text."]" if(defined $species_text); - } - $n->get_label()->set_name($label_text); - } +sub update_label_names { + my $self = shift; + my $show_spec = $self->get_show_species_in_label(); + foreach my $n ( $self->get_all_nodes() ) { + + # print "node: ", $n->get_name(), " impl names: ", join("\t", @{$n->get_implicit_names()}), "\n"; + # my $n_leaves = scalar @{$n->get_implicit_names()}; + my $label_text = $n->get_name(); + + # print STDERR "in update_label_names. $n_leaves, [", $n->get_name(), "][", $label_text, "] \n"; + if ($show_spec) { + my $species_text = $n->get_shown_species(); + + # print STDERR "species text: ", $n->get_shown_species(), " is leaf:[", $n->is_leaf(), "]\n"; + $label_text .= " [" . $species_text . "]" if ( defined $species_text ); + } + $n->get_label()->set_name($label_text); + } } =head2 function prune_nameless_leaves() @@ -1916,56 +2031,58 @@ foreach my $n ($self->get_all_nodes()) { =cut -sub prune_nameless_leaves{ - - my $self = shift; - my @leaf_list = $self->get_root()->recursive_leaf_list(); - my $count_leaves_deleted = 0; - $self->get_root()->recursive_implicit_names(); # is this needed? - foreach my $l (@leaf_list) { - if ($l->get_name()) { # non-empty string. OK. - } else { - # print STDERR "Warning. Leaf node with key: ", $l->get_node_key(), " has empty or undefined name. Deleting nameless node. \n"; - $self->del_node($l); - $self->collapse_tree(); - $count_leaves_deleted++; - } - } - return $count_leaves_deleted; +sub prune_nameless_leaves { + + my $self = shift; + my @leaf_list = $self->get_root()->recursive_leaf_list(); + my $count_leaves_deleted = 0; + $self->get_root()->recursive_implicit_names(); # is this needed? + foreach my $l (@leaf_list) { + if ( $l->get_name() ) { # non-empty string. OK. + } else { + +# print STDERR "Warning. Leaf node with key: ", $l->get_node_key(), " has empty or undefined name. Deleting nameless node. \n"; + $self->del_node($l); + $self->collapse_tree(); + $count_leaves_deleted++; + } + } + return $count_leaves_deleted; } # return key, node pair corresponding to the implicit name given as argument. -sub node_from_implicit_name_string{ - #searches tree until the node with the specified implicit name string (tab separated) is found - my $self = shift; - my $in_string = shift; - if (! scalar $self->get_root()->get_implicit_names() > 0) { - $self->get_root()->recursive_implicit_names(); - } +sub node_from_implicit_name_string { - foreach my $k ($self->get_all_node_keys()) { - my $n = $self->get_node($k); - my $node_impl_name = join("\t", @{$n->get_implicit_names()}); - # print STDERR "node impl name: $node_impl_name \n"; - if ($node_impl_name eq $in_string) { - return ($k, $n); - } - } + #searches tree until the node with the specified implicit name string (tab separated) is found + my $self = shift; + my $in_string = shift; + if ( !scalar $self->get_root()->get_implicit_names() > 0 ) { + $self->get_root()->recursive_implicit_names(); + } -# print STDERR "In Tree::node_from_implicit_name_string. Node not found which matches specified string: $in_string \n"; -# print STDERR $self->generate_newick(), "\n"; - return (undef, undef); -} + foreach my $k ( $self->get_all_node_keys() ) { + my $n = $self->get_node($k); + my $node_impl_name = join( "\t", @{ $n->get_implicit_names() } ); + # print STDERR "node impl name: $node_impl_name \n"; + if ( $node_impl_name eq $in_string ) { + return ( $k, $n ); + } + } -sub leaf_species_string{ - my $self = shift; -my $str = "species, standard species \n"; - foreach my $l ($self->get_leaf_list()) { - my $std_species = ($l->get_standard_species())? $l->get_standard_species(): 'std_species_undefined'; - $str .= $l->get_species() . " " . $std_species . "\n"; - } - return $str; + # print STDERR "In Tree::node_from_implicit_name_string. Node not found which matches specified string: $in_string \n"; + # print STDERR $self->generate_newick(), "\n"; + return ( undef, undef ); +} + +sub leaf_species_string { + my $self = shift; + my $str = "species, standard species \n"; + foreach my $l ( $self->get_leaf_list() ) { + my $std_species = ( $l->get_standard_species() ) ? $l->get_standard_species() : 'std_species_undefined'; + $str .= $l->get_species() . " " . $std_species . "\n"; + } + return $str; } =head2 function quasiRF_distance @@ -2000,95 +2117,102 @@ my $str = "species, standard species \n"; =cut -sub quasiRF_distance{ -my $self = shift; -my $tree1 = $self; -my $tree2 = shift; -my $compare_field = shift; - - my $root1 = $tree1->get_root(); - my $root2 = $tree2->get_root(); - - my $distance = 0.0; - - # get the implicit names or species for each node in both trees - # - if (lc $compare_field eq "species") { -#print STDOUT "top of quasiRF... compare_field eq species branch. \n"; - $root1->recursive_implicit_species(); - $root2->recursive_implicit_species(); - - my %n_bl_2 = (); # set up the hash for tree2 nodes, with species as key (value unused) - my $nhr2 = $tree2->{node_hash}; - foreach my $n2 (values ( %$nhr2)) { - my $implicit_species = join("\t", @{$n2->get_implicit_species()}); - # print STDOUT "Y stree implicit species: $implicit_species
\n"; - $n_bl_2{$implicit_species}++; # values are not used, just count occurrences - } - - my $nhr1 = $tree1->{node_hash}; - foreach my $n1 (values ( %$nhr1)) { - my $implicit_species = join("\t", @{$n1->get_implicit_species()}); - # print STDOUT "otree implicit species: $implicit_species
\n"; - if (exists $n_bl_2{$implicit_species}) { # there are subtrees with this set of leaves in both trees, do nothing - $n1->set_attribute("subtree_leaves_match", "true"); - # print STDOUT "true
\n"; - } else { # no node with this implicit name in tree2, so add branch length to total - $distance += $n1->get_branch_length(); - $n1->set_attribute("subtree_leaves_match", "false"); - } - } - } else { - $root1->recursive_implicit_names(); - $root2->recursive_implicit_names(); - - # set up the hash for tree2 nodes, with name as key (value unused) - my %n_bl_2 = (); - my $nhr2 = $tree2->{node_hash}; - foreach my $n2 (values ( %$nhr2)) { - $n_bl_2{$n2->get_name()}++; # values are not used, just count occurrences of the name - } - - my $nhr1 = $tree1->{node_hash}; - foreach my $n1 (values ( %$nhr1)) { - if (exists $n_bl_2{$n1->get_name()}) { # there are subtrees with this set of leaves in both trees, do nothing - $n1->set_attribute("subtree_leaves_match", "true"); - } else { # no node with this implicit name in tree2, so add branch length to total - $distance += $n1->get_branch_length(); - $n1->set_attribute("subtree_leaves_match", "false"); - } - } - } -my $distance2 = $root1->recursive_quasiRF_distance(); # this works on tree1 - which is not a copy here. -return $distance; # $tree1 has qRFd info at every node. -} - -sub RF_distance { - my $self = shift; - my $other_tree = shift; - my $compare_field = shift; # to control comparison of names (default) or species ("species") - # copy the trees into temporary trees, so that the trees can - # be manipulated (rerooted, collapsed) without changing the original trees. - # - # print STDOUT "in compare_unrooted. compare_field: $compare_feld \n"; - my $tree1 = $self->copy(); - my $tree2 = $other_tree->copy(); +sub quasiRF_distance { + my $self = shift; + my $tree1 = $self; + my $tree2 = shift; + my $compare_field = shift; - # find a leaf - any leaf - of tree1 and the corresponding leaf (i.e. with the same name) of tree2 + my $root1 = $tree1->get_root(); + my $root2 = $tree2->get_root(); - my $leaf1 = $tree1->get_root()->recursive_get_a_leaf(); - my $corresponding_leaf = $tree2->get_node_by_name($leaf1->get_name()); + my $distance = 0.0; - if (!$corresponding_leaf) { - print("in compare_unrooted. leaf1 name: ", $leaf1->get_name(), ". Can't find corresponding leaf in other tree. \n"); - return 0; - } + # get the implicit names or species for each node in both trees + # + if ( lc $compare_field eq "species" ) { + + #print STDOUT "top of quasiRF... compare_field eq species branch. \n"; + $root1->recursive_implicit_species(); + $root2->recursive_implicit_species(); + + my %n_bl_2 = (); # set up the hash for tree2 nodes, with species as key (value unused) + my $nhr2 = $tree2->{node_hash}; + foreach my $n2 ( values(%$nhr2) ) { + my $implicit_species = join( "\t", @{ $n2->get_implicit_species() } ); + + # print STDOUT "Y stree implicit species: $implicit_species
\n"; + $n_bl_2{$implicit_species}++; # values are not used, just count occurrences + } + + my $nhr1 = $tree1->{node_hash}; + foreach my $n1 ( values(%$nhr1) ) { + my $implicit_species = join( "\t", @{ $n1->get_implicit_species() } ); + + # print STDOUT "otree implicit species: $implicit_species
\n"; + if ( exists $n_bl_2{$implicit_species} ) + { # there are subtrees with this set of leaves in both trees, do nothing + $n1->set_attribute( "subtree_leaves_match", "true" ); + + # print STDOUT "true
\n"; + } else { # no node with this implicit name in tree2, so add branch length to total + $distance += $n1->get_branch_length(); + $n1->set_attribute( "subtree_leaves_match", "false" ); + } + } + } else { + $root1->recursive_implicit_names(); + $root2->recursive_implicit_names(); + + # set up the hash for tree2 nodes, with name as key (value unused) + my %n_bl_2 = (); + my $nhr2 = $tree2->{node_hash}; + foreach my $n2 ( values(%$nhr2) ) { + $n_bl_2{ $n2->get_name() }++; # values are not used, just count occurrences of the name + } + + my $nhr1 = $tree1->{node_hash}; + foreach my $n1 ( values(%$nhr1) ) { + if ( exists $n_bl_2{ $n1->get_name() } ) + { # there are subtrees with this set of leaves in both trees, do nothing + $n1->set_attribute( "subtree_leaves_match", "true" ); + } else { # no node with this implicit name in tree2, so add branch length to total + $distance += $n1->get_branch_length(); + $n1->set_attribute( "subtree_leaves_match", "false" ); + } + } + } + my $distance2 = $root1->recursive_quasiRF_distance(); # this works on tree1 - which is not a copy here. + return $distance; # $tree1 has qRFd info at every node. +} + +sub RF_distance { + my $self = shift; + my $other_tree = shift; + my $compare_field = shift; # to control comparison of names (default) or species ("species") + # copy the trees into temporary trees, so that the trees can + # be manipulated (rerooted, collapsed) without changing the original trees. + # + # print STDOUT "in compare_unrooted. compare_field: $compare_feld \n"; + my $tree1 = $self->copy(); + my $tree2 = $other_tree->copy(); + + # find a leaf - any leaf - of tree1 and the corresponding leaf (i.e. with the same name) of tree2 + + my $leaf1 = $tree1->get_root()->recursive_get_a_leaf(); + my $corresponding_leaf = $tree2->get_node_by_name( $leaf1->get_name() ); + + if ( !$corresponding_leaf ) { + print( "in compare_unrooted. leaf1 name: ", + $leaf1->get_name(), ". Can't find corresponding leaf in other tree. \n" ); + return 0; + } - # reset roots of trees to the two corresponding leaves: - $tree1->reset_root($leaf1); - $tree2->reset_root($corresponding_leaf); + # reset roots of trees to the two corresponding leaves: + $tree1->reset_root($leaf1); + $tree2->reset_root($corresponding_leaf); - return $tree1->RF_distance_inner($tree2, $compare_field); + return $tree1->RF_distance_inner( $tree2, $compare_field ); } =head2 function RF_distance_inner @@ -2098,9 +2222,8 @@ sub RF_distance { Arguments: A tree object; and optionally a string specifying whether to compare node name or species. (Default is name) - Returns: Compares tree1 and tree2. If they are topologically - the same, 0 is returned. Otherwise returns a "distance" - describing how different the two trees are. + Returns: Compares tree1 and tree2. Returns list of 3 measures of distance: + robinson-foulds distance, symmetric disance and Side effects: Sets "subtree_leaves_match" field for each node Description: Tree1, tree2 should be collapsed before calling this function. For each node in tree1 add branch length to @@ -2115,429 +2238,585 @@ sub RF_distance { =cut -sub RF_distance_inner{ - my $self = shift; - my $tree1 = $self; - my $tree2 = shift; - my $compare_field = shift; - - my $root1 = $tree1->get_root(); - my $root2 = $tree2->get_root(); - - my $sym_diff = 0; #symmetric difference, just one for each partition in only one tree - my $distance = 0.0; - my $in_both_sum = 0.0; - my $in_one_only_sum = 0.0; - my $branch_score = 0.0; - - # get the implicit names or species for each node in both trees - # - if (lc $compare_field eq "species") { - # die "RF_distance with compare_field set to species is not implemented. \n"; - #print STDOUT "top of quasiRF... compare_field eq species branch. \n"; - $root1->recursive_implicit_species(); - $root2->recursive_implicit_species(); - unless(join("\t", $root1->get_implicit_species()) eq join("\t", $root1->get_implicit_species())){ - print STDERR "In RFdistance; trees do not have same set of leaves (by species).\n"; - return undef; - } - # set up the hash for tree nodes, with species as key, node obj as value - my %n_bl_1 = (); - my @nhr1 = $root1->recursive_subtree_node_list; #->{node_hash}; - - foreach my $n1 (@nhr1) { #all tree1 nodes except root1 - $n_bl_1{$n1->get_species()} = $n1; - } - my %n_bl_2 = (); - my @nhr2 = $root2->recursive_subtree_node_list; #$tree2->{node_hash}; - foreach my $n2 (@nhr2) { #all tree2 nodes except root2 - $n_bl_2{$n2->get_species()} = $n2; - } - - # my $in_both_sum = 0.0; - # my $in_one_only_sum = 0.0; - foreach my $n1 (@nhr1) { - if (exists $n_bl_2{$n1->get_species()}) { # there are subtrees with this set of leaves in both trees - my $n2 = $n_bl_2{$n1->get_species()}; - $in_both_sum += abs($n1->get_branch_length() - $n2->get_branch_length()); - } else { # no node with this implicit species in tree2, so add branch length to total - $in_one_only_sum += $n1->get_branch_length(); - $sym_diff++; - } - } - # my $in_both_sum2 = 0.0; - foreach my $n2 (@nhr2) { - if (exists $n_bl_1{$n2->get_species()}) { # there are subtrees with this set of leaves in both trees - # my $n1 = $n_bl_1{$n2->get_species()}; - # $in_both_sum2 += abs($n1->get_branch_length() - $n2->get_branch_length()); - } else { # no node with this implicit species in tree2, so add branch length to total - $in_one_only_sum += $n2->get_branch_length(); - $sym_diff++; - } - } - # print ("in_both_sum: ", $in_both_sum, " in_one_only_sum: ", $in_one_only_sum, "\n"); - # $distance = $in_both_sum + $in_one_only_sum; - # print "distance: ", $distance, "\n"; - - - } else { # compare field is "name" -# print "comparing trees by name fields \n"; - $root1->recursive_implicit_names(); - $root2->recursive_implicit_names(); - unless(join("\t", $root1->get_name()) eq join("\t", $root1->get_name())){ - print STDERR "In RFdistance; trees do not have same set of leaves (by name).\n"; - return undef; - } - # set up the hash for tree nodes, with name as key, node obj as value - my %n_bl_1 = (); - my @nhr1 = $root1->recursive_subtree_node_list(); #->{node_hash}; - - foreach my $n1 (@nhr1) { #all tree1 nodes except root1 -# print "n1 name: ", $n1->get_name(), "\n"; - $n_bl_1{$n1->get_name()} = $n1; - } - my %n_bl_2 = (); - my @nhr2 = $root2->recursive_subtree_node_list(); #$tree2->{node_hash}; - foreach my $n2 (@nhr2) { #all tree2 nodes except root2 -# print "n2 name: ", $n2->get_name(), "\n"; - $n_bl_2{$n2->get_name()} = $n2; - } - - # my $in_both_sum = 0.0; - # my $in_one_only_sum = 0.0; - my $diff = 0.0; - # foreach my $n1 (@nhr1) { - foreach my $name1 (keys %n_bl_1){ - my $n1 = $n_bl_1{$name1}; - if (exists $n_bl_2{$n1->get_name()}) { # there are subtrees with this set of leaves in both trees - my $n2 = $n_bl_2{$n1->get_name()}; - $diff = $n1->get_branch_length() - $n2->get_branch_length(); - $in_both_sum += abs($diff); # $n1->get_branch_length() - $n2->get_branch_length()); - # $branch_score += $diff*$diff; - } else { # no node with this implicit name in tree2, so add branch length to total - $diff = $n1->get_branch_length(); - $in_one_only_sum += $diff; # $n1->get_branch_length(); - # $branch_score += $diff*$diff; - $sym_diff++; -# print "name not present in hash 2: ", $n1->get_name(), "\n"; - } - $branch_score += $diff*$diff; - } - # my $in_both_sum2 = 0.0; - # foreach my $n2 (@nhr2) { - foreach my $name2 (keys %n_bl_2){ - my $n2 = $n_bl_2{$name2}; - if (exists $n_bl_1{$n2->get_name()}) { # there are subtrees with this set of leaves in both trees - # my $n1 = $n_bl_1{$n2->get_name()}; - # $in_both_sum2 += abs($n1->get_branch_length() - $n2->get_branch_length()); - } else { # no node with this implicit name in tree2, so add branch length to total - $in_one_only_sum += $n2->get_branch_length(); - $sym_diff++; - # print "name not present in hash 1: ", $n2->get_name(), "\n"; - } - } - } - - $distance = $in_both_sum + $in_one_only_sum; +sub RF_distance_inner { + my $self = shift; + my $tree1 = $self; + my $tree2 = shift; + my $compare_field = shift; + + my $root1 = $tree1->get_root(); + my $root2 = $tree2->get_root(); + + my $sym_diff = 0; #symmetric difference, just one for each partition in only one tree + my $distance = 0.0; + my $in_both_sum = 0.0; + my $in_one_only_sum = 0.0; + my $branch_score = 0.0; + + # get the implicit names or species for each node in both trees + # + if ( lc $compare_field eq "species" ) { + + # die "RF_distance with compare_field set to species is not implemented. \n"; + #print STDOUT "top of quasiRF... compare_field eq species branch. \n"; + $root1->recursive_implicit_species(); + $root2->recursive_implicit_species(); + unless ( join( "\t", $root1->get_implicit_species() ) eq join( "\t", $root1->get_implicit_species() ) ) { + print STDERR "In RFdistance; trees do not have same set of leaves (by species).\n"; + return undef; + } + + # set up the hash for tree nodes, with species as key, node obj as value + my %n_bl_1 = (); + my @nhr1 = $root1->recursive_subtree_node_list; #->{node_hash}; + + foreach my $n1 (@nhr1) { #all tree1 nodes except root1 + $n_bl_1{ $n1->get_species() } = $n1; + } + my %n_bl_2 = (); + my @nhr2 = $root2->recursive_subtree_node_list; #$tree2->{node_hash}; + foreach my $n2 (@nhr2) { #all tree2 nodes except root2 + $n_bl_2{ $n2->get_species() } = $n2; + } + + # my $in_both_sum = 0.0; + # my $in_one_only_sum = 0.0; + foreach my $n1 (@nhr1) { + if ( exists $n_bl_2{ $n1->get_species() } ) { # there are subtrees with this set of leaves in both trees + my $n2 = $n_bl_2{ $n1->get_species() }; + $in_both_sum += abs( $n1->get_branch_length() - $n2->get_branch_length() ); + } else { # no node with this implicit species in tree2, so add branch length to total + $in_one_only_sum += $n1->get_branch_length(); + $sym_diff++; + } + } + + # my $in_both_sum2 = 0.0; + foreach my $n2 (@nhr2) { + if ( exists $n_bl_1{ $n2->get_species() } ) { # there are subtrees with this set of leaves in both trees + # my $n1 = $n_bl_1{$n2->get_species()}; + # $in_both_sum2 += abs($n1->get_branch_length() - $n2->get_branch_length()); + } else { # no node with this implicit species in tree2, so add branch length to total + $in_one_only_sum += $n2->get_branch_length(); + $sym_diff++; + } + } + + # print ("in_both_sum: ", $in_both_sum, " in_one_only_sum: ", $in_one_only_sum, "\n"); + # $distance = $in_both_sum + $in_one_only_sum; + # print "distance: ", $distance, "\n"; + + } else { # compare field is "name" + + # print "comparing trees by name fields \n"; + $root1->recursive_implicit_names(); + $root2->recursive_implicit_names(); + unless ( join( "\t", $root1->get_name() ) eq join( "\t", $root1->get_name() ) ) { + print STDERR "In RFdistance; trees do not have same set of leaves (by name).\n"; + return undef; + } + + # set up the hash for tree nodes, with name as key, node obj as value + my %n_bl_1 = (); + my @nhr1 = $root1->recursive_subtree_node_list(); #->{node_hash}; + + foreach my $n1 (@nhr1) { #all tree1 nodes except root1 + + # print "n1 name: ", $n1->get_name(), "\n"; + $n_bl_1{ $n1->get_name() } = $n1; + } + my %n_bl_2 = (); + my @nhr2 = $root2->recursive_subtree_node_list(); #$tree2->{node_hash}; + foreach my $n2 (@nhr2) { #all tree2 nodes except root2 + + # print "n2 name: ", $n2->get_name(), "\n"; + $n_bl_2{ $n2->get_name() } = $n2; + } + + # my $in_both_sum = 0.0; + # my $in_one_only_sum = 0.0; + my $diff = 0.0; + + # foreach my $n1 (@nhr1) { + foreach my $name1 ( keys %n_bl_1 ) { + my $n1 = $n_bl_1{$name1}; + if ( exists $n_bl_2{ $n1->get_name() } ) { # there are subtrees with this set of leaves in both trees + my $n2 = $n_bl_2{ $n1->get_name() }; + $diff = $n1->get_branch_length() - $n2->get_branch_length(); + $in_both_sum += abs($diff); # $n1->get_branch_length() - $n2->get_branch_length()); + # $branch_score += $diff*$diff; + } else { # no node with this implicit name in tree2, so add branch length to total + $diff = $n1->get_branch_length(); + $in_one_only_sum += $diff; # $n1->get_branch_length(); + # $branch_score += $diff*$diff; + $sym_diff++; + + # print "name not present in hash 2: ", $n1->get_name(), "\n"; + } + $branch_score += $diff * $diff; + } + + # my $in_both_sum2 = 0.0; + # foreach my $n2 (@nhr2) { + foreach my $name2 ( keys %n_bl_2 ) { + my $n2 = $n_bl_2{$name2}; + if ( exists $n_bl_1{ $n2->get_name() } ) { # there are subtrees with this set of leaves in both trees + # my $n1 = $n_bl_1{$n2->get_name()}; + # $in_both_sum2 += abs($n1->get_branch_length() - $n2->get_branch_length()); + } else { # no node with this implicit name in tree2, so add branch length to total + $in_one_only_sum += $n2->get_branch_length(); + $sym_diff++; + + # print "name not present in hash 1: ", $n2->get_name(), "\n"; + } + } + } + + $distance = $in_both_sum + $in_one_only_sum; + # print ("in_both_sum: ", $in_both_sum, " in_one_only_sum: ", $in_one_only_sum, " RFdistance: ", $distance, "\n"); - return ($distance, $sym_diff, $branch_score); + return ( $distance, $sym_diff, $branch_score ); } -sub get_branch_length_sum{ - my $self = shift; - my @nodelist = $self->get_root()->recursive_subtree_node_list; - my $bl_sum = 0.0; - foreach (@nodelist) { - $bl_sum += $_->get_branch_length(); - } - return $bl_sum; +sub get_branch_length_sum { + my $self = shift; + my @nodelist = $self->get_root()->recursive_subtree_node_list; + my $bl_sum = 0.0; + foreach (@nodelist) { + $bl_sum += $_->get_branch_length(); + } + return $bl_sum; } -sub get_branch_length_sum_noterm{ # sum of all non-terminal branch lengths - my $self = shift; - my @nodelist = $self->get_root()->recursive_subtree_node_list; - my $bl_sum = 0.0; - foreach (@nodelist) { - next if($->is_leaf()); - $bl_sum += $_->get_branch_length(); - } - return $bl_sum; +sub get_branch_length_sum_noterm { # sum of all non-terminal branch lengths + my $self = shift; + my @nodelist = $self->get_root()->recursive_subtree_node_list; + my $bl_sum = 0.0; + foreach (@nodelist) { + next if ( $- > is_leaf() ); + $bl_sum += $_->get_branch_length(); + } + return $bl_sum; } -sub multiply_branch_lengths_by{ - my $self = shift; - my $factor = shift; - my @nodelist = $self->get_root()->recursive_subtree_node_list; - foreach (@nodelist) { - $_->set_branch_length($_->get_branch_length()*$factor); - } +sub multiply_branch_lengths_by { + my $self = shift; + my $factor = shift; + my @nodelist = $self->get_root()->recursive_subtree_node_list; + foreach (@nodelist) { + $_->set_branch_length( $_->get_branch_length() * $factor ); + } } #scale branch lengths s.t. their sum is #desired_bl_sum (1.0 by default) # returns original bl sum -sub normalize_branch_length_sum{ - my $self = shift; - my $desired_bl_sum = shift; - $desired_bl_sum ||= 1.0; - my $bl_sum = $self->get_branch_length_sum(); - if ($bl_sum <= 0.0) { - print STDERR "Can\'t normalize branch length sum, sum is $bl_sum; <= zero. \n"; - } else { - $self->multiply_branch_lengths_by($desired_bl_sum/$bl_sum); - } - return $bl_sum; -} - -sub RFdist_over_totbl{ # this is (weighted, i.e. using branch lengths) RF distance, normalized by sum of all -# branch lengths in both trees, so it will lie in range [0,1] - my $self = shift; - my $tree1 = $self; - my $tree2 = shift; - my $compare_field = shift; - my $normalize_bl_sums = shift; - $normalize_bl_sums = 0 unless(defined $normalize_bl_sums); - - if ($normalize_bl_sums) { - $tree1->normalize_branch_length_sum(); - $tree2->normalize_branch_length_sum(); - } - my $bl_sum = $tree1->get_branch_length_sum() + $tree2->get_branch_length_sum(); - #print "bl_sum: $bl_sum \n"; - my ($rfd, $symdiff, $branch_score) = $tree1->RF_distance($tree2, $compare_field); -# print "bl_sum: $bl_sum . rfd: $rfd \n"; - return $rfd/$bl_sum; +sub normalize_branch_length_sum { + my $self = shift; + my $desired_bl_sum = shift; + $desired_bl_sum ||= 1.0; + my $bl_sum = $self->get_branch_length_sum(); + if ( $bl_sum <= 0.0 ) { + print STDERR "Can\'t normalize branch length sum, sum is $bl_sum; <= zero. \n"; + } else { + $self->multiply_branch_lengths_by( $desired_bl_sum / $bl_sum ); + } + return $bl_sum; +} + +sub RFdist_over_totbl { # this is (weighted, i.e. using branch lengths) RF distance, normalized by sum of all + + # branch lengths in both trees, so it will lie in range [0,1] + my $self = shift; + my $tree1 = $self; + my $tree2 = shift; + my $compare_field = shift; + my $normalize_bl_sums = shift; + $normalize_bl_sums = 0 unless ( defined $normalize_bl_sums ); + + if ($normalize_bl_sums) { + $tree1->normalize_branch_length_sum(); + $tree2->normalize_branch_length_sum(); + } + my $bl_sum = $tree1->get_branch_length_sum() + $tree2->get_branch_length_sum(); + + #print "bl_sum: $bl_sum \n"; + my ( $rfd, $symdiff, $branch_score ) = $tree1->RF_distance( $tree2, $compare_field ); + + # print "bl_sum: $bl_sum . rfd: $rfd \n"; + return $rfd / $bl_sum; } # divide into trees no bigger than $max_leaves -sub divide_into_small_trees{ - my $self = shift; - my $max_leaves = shift; - $max_leaves ||= 100; - # print "in Tree::divide_into_small_trees. ", $self->get_root()->get_attribute("leaf_count"), "\n\n"; - my $small_trees_array = $self->get_root()->recursive_divide_subtree_into_small_trees($max_leaves); - return $small_trees_array; +sub divide_into_small_trees { + my $self = shift; + my $max_leaves = shift; + $max_leaves ||= 100; + + # print "in Tree::divide_into_small_trees. ", $self->get_root()->get_attribute("leaf_count"), "\n\n"; + my $small_trees_array = $self->get_root()->recursive_divide_subtree_into_small_trees($max_leaves); + return $small_trees_array; } # get list of subtrees containing ortholog group candidate subtrees # (trees with > 1 leaf, and distinct species in all leaves) # the argument allows one to specify to go up some number of parent # nodes above the nodes with the ortholog group candidate subtrees. -sub get_ortholog_group_candidate_subtrees{ - my $self = shift; - my $desired_levels_above = shift; - $desired_levels_above = 0 unless($desired_levels_above > 0); -# print "tree. levels_above: ", $desired_levels_above, "\n"; - my $ortholog_group_candidate_subtrees_array = []; - $self->get_root()->recursive_set_levels_above_distinct_species_subtree(); - $self->get_root()->recursive_find_ortholog_group_candidate_subtrees($ortholog_group_candidate_subtrees_array, $desired_levels_above); - return $ortholog_group_candidate_subtrees_array; +sub get_ortholog_group_candidate_subtrees { + my $self = shift; + my $desired_levels_above = shift; + $desired_levels_above = 0 unless ( $desired_levels_above > 0 ); + + # print "tree. levels_above: ", $desired_levels_above, "\n"; + my $ortholog_group_candidate_subtrees_array = []; + $self->get_root()->recursive_set_levels_above_distinct_species_subtree(); + $self->get_root()->recursive_find_ortholog_group_candidate_subtrees( $ortholog_group_candidate_subtrees_array, + $desired_levels_above ); + return $ortholog_group_candidate_subtrees_array; +} + +sub make_names_urec_ok { + my $self = shift; + my @nodes = $self->get_all_nodes(); + foreach my $a_node (@nodes) { + my $name = $a_node->get_name(); + if ( $name =~ /^[a-zA-Z]/ ) { # ok - leave as is. + } else { # add X_ in front to make urec happy + $a_node->set_name("X_$name"); + } + } } - -sub make_names_urec_ok{ - my $self = shift; - my @nodes = $self->get_all_nodes(); - foreach my $a_node (@nodes){ - my $name = $a_node->get_name(); - if($name =~ /^[a-zA-Z]/){ # ok - leave as is. - }else{ # add X_ in front to make urec happy - $a_node->set_name("X_$name"); +sub leaf_species_bit_pattern_string { + my $self = shift; + my @leaves = $self->get_leaf_list(); + my $str = ''; + foreach (@leaves) { + $str .= $_->get_name() . " [" . $_->get_attribute("species_bit_pattern") . "]\n"; } - } + return $str; } - # using urec, find the node s.t. rooting on its branch gives minimal duplications and losses - # w.r.t. a species tree -sub find_mindl_node{ - my $gene_tree = shift; # a rooted gene tree - my $species_t = shift; # a species tree -#print "in find_mindl_node. species_tree newick: \n", $species_t->subtree_newick(), " \n"; - -# print STDERR "##################### Top of find_mindl_node. #############\n"; - # urec requires binary tree - make sure the tree is binary - # if polytomy at root, reroot a bit down one branch, to get binary root (if was tritomy) - my @new_root_point; - { - my @root_children = $gene_tree->get_root()->get_children(); - if (scalar @root_children != 2) { - @new_root_point = ($root_children[0], 0.9*$root_children[0]->get_branch_length()); - $gene_tree->reset_root_to_point_on_branch(@new_root_point); - } - } - # binarify every non-binary node. At present doesn't attempt to choose in a smart way - # among the various possible resolutions - $gene_tree->make_binary($gene_tree->get_root()); # urec requires binary tree. - - my $store_show_std_species = $gene_tree->get_show_standard_species(); - # put the trees into form of newick strings with no whitespace, so urec will be happy - $gene_tree->show_newick_attribute("species"); - $gene_tree->set_show_standard_species(1); - # make sure node names start with alphabetic char to make urec happy. - $gene_tree->make_names_urec_ok(); - # need to redo node implicit names here. - my $gene_newick_string = $gene_tree->generate_newick(); -# print "binarified gene tree (urec input): ", $gene_newick_string, "\n"; - $gene_newick_string =~ s/\s//g; - - $species_t->show_newick_attribute("species"); - $species_t->set_show_standard_species(1); -my @thenodes = $species_t->get_leaves(); - foreach my $a_node (@thenodes){ -# print "species,stdspecies: ", $a_node->get_species(), " ", $a_node->get_standard_species(), "\n"; - } -#print "species tree (from subtree_newick): ", $species_t->subtree_newick(), "\n"; - my $species_newick_string = $species_t->generate_newick(); - $species_newick_string =~ s/\s//g; # remove whitespace -#print "species tree newick string: $species_newick_string\n"; -#print "gene newick string: $gene_newick_string \n\n"; -#print "about to call urec. \n"; -# print STDERR "about to call urec, with gene tree newick:\n$gene_newick_string \n$species_newick_string \n"; +# using urec, find the node s.t. rooting on its branch gives minimal duplications and losses +# w.r.t. a species tree +sub find_mindl_node { + my $gene_tree = shift; # a rooted gene tree + my $species_tree = shift; # a species tree +# print STDERR "0 BRANCHLENGTHS: ", join( ";", map( $_->get_branch_length(), $gene_tree->get_leaf_list() ) ), "\n"; + + # for($gene_tree->get_leaf_list()){ print STDERR "genetree sbts: ", $_->get_attribute('species_bit_pattern'), "\n"; } + # print STDERR "gtnewick: ", $gene_tree->generate_newick(), "\n"; + + my $gene_tree_copy = $gene_tree->copy(); + # print STDERR "1 BRANCHLENGTHS: ", join( ";", map( $_->get_branch_length(), $gene_tree_copy->get_leaf_list() ) ), +# "\n"; + +# for($gene_tree->get_leaf_list()){ +# print STDERR $_->get_name(), " ", $_->get_species(), " ", +# $_->get_attribute('species_bit_pattern'), "\n"; +# } +# print STDERR "gtcopynewick before pruning: ", $gene_tree_copy->generate_newick(), "\n"; + + my @non_speciestree_leafnodes = values %{ $gene_tree_copy->non_speciestree_leafnode_names() }; +# print STDERR "nonspeciestreeleafnodes: \n", join("\n", map($_->get_name() . "--" . $_->get_species(), @non_speciestree_leafnodes)), "\n"; + + $gene_tree_copy->prune_leaves(@non_speciestree_leafnodes); + my $n_leaves_after_pruning = scalar $gene_tree_copy->get_leaf_list(); + if ( $n_leaves_after_pruning < 2 ) { + warn "Problem with mindl rerooting. Only: ", $n_leaves_after_pruning, " leaves after pruning.\n Tree: ", + $gene_tree_copy->generate_newick(), "\n"; + return ( undef, undef ); + } + # print STDERR "gt newick after pruning: ", $gene_tree_copy->generate_newick(), "\n"; + # urec requires binary tree - make sure the tree is binary + # if polytomy at root, reroot a bit down one branch, to get binary root (if was tritomy) + my @new_root_point; + + # print STDERR "pruned copy newick: ", $gene_tree_copy->generate_newick(), "\n"; + { + my @root_children = $gene_tree_copy->get_root()->get_children(); + if ( scalar @root_children != 2 ) { + + @new_root_point = ( $root_children[0], 0.9 * $root_children[0]->get_branch_length() ); + $gene_tree_copy->reset_root_to_point_on_branch(@new_root_point); + } + } + + # binarify every non-binary node. At present doesn't attempt to choose in a smart way + # among the various possible resolutions + $gene_tree_copy->make_binary( $gene_tree_copy->get_root() ); # urec requires binary tree. + + my $store_show_std_species = $gene_tree_copy->get_show_standard_species(); + + # put the trees into form of newick strings with no whitespace, so urec will be happy + $gene_tree_copy->show_newick_attribute("species"); + $gene_tree_copy->set_show_standard_species(1); + + # make sure node names start with alphabetic char to make urec happy. + $gene_tree_copy->make_names_urec_ok(); + + # need to redo node implicit names here. + my $gene_newick_string = $gene_tree_copy->generate_newick(); + $gene_newick_string =~ s/\s//g; + + $species_tree->show_newick_attribute("species"); + $species_tree->set_show_standard_species(1); + + my $species_newick_string = $species_tree->generate_newick(); + $species_newick_string =~ s/\s//g; # remove whitespace + + # print STDERR "gene tree: $gene_newick_string \n"; + # print STDERR "species tree: $species_newick_string \n"; + # my $rerooted_newick = `/home/tomfy/cxgn/cxgn-corelibs/lib/CXGN/Phylo/Urec/urec -s "$species_newick_string" -g "$gene_newick_string" -b -O`; -# my $rerooted_newick = `/data/local/cxgn/core/perllib/CXGN/Phylo/Urec/urec -s "$species_newick_string" -g "$gene_newick_string" -b -O`; - - my $rerooted_newick; -# my $urec_dir = `which urec`; -# if($urec_dir =~ /\S/){ - if(`which urec` =~ /\S/){ - $rerooted_newick = `urec -s "$species_newick_string" -g "$gene_newick_string" -b -O`; -}else{ - $rerooted_newick = `/data/local/cxgn-old/core/perllib/CXGN/Phylo/Urec/urec -s "$species_newick_string" -g "$gene_newick_string" -b -O`; -} - - # print STDERR "gene_newick_string: \n $gene_newick_string \n\nspecies_newick_string: \n $species_newick_string.\n\n"; -# print STDERR "Rerooted newick string: [$rerooted_newick].\n"; - -# print "parsing mindl rerooted gene tree in Tree\n"; - my $minDL_rerooted_gene_tree = (CXGN::Phylo::Parse_newick->new($rerooted_newick, $do_parse_set_error))->parse(); # this is now rooted so as to minimize gene duplication and loss needed to reconcile with species tree, - # but branch lengthswill be wrong for nodes whose parent has changed in the rerooting (they are just the branch lengths to the old parents). - $minDL_rerooted_gene_tree->get_root()->recursive_implicit_names(); - - # $minDL_rerooted_gene_tree should have 2 children and (at least) one should have it's subtree also present in the pre-rerooting tree. - # identify the node at the root of this subtree (using implicit names) and reroot there. - # Have to do this because some branch length info was lost in urec step. -# warn "minDL_rerooted_gene_tree newick: \n", $minDL_rerooted_gene_tree->generate_newick(), "\n"; - my @root_children = $minDL_rerooted_gene_tree->get_root()->get_children(); - my ($node_key, $rr_node); - foreach (@root_children) { - my $implicit_name_string = join("\t", @{$_->get_implicit_names()}); - # warn "implicit name string: $implicit_name_string \n"; - ($node_key, $rr_node) = $gene_tree->node_from_implicit_name_string($implicit_name_string); - if (defined $rr_node) { - $minDL_rerooted_gene_tree->decircularize(); - # debug ("Reroot above this node: $implicit_name_string \n"); -# warn "branch length along which to reroot: ", $rr_node->get_branch_length(), "\n"; - return @new_root_point = ($rr_node, 0.5*($rr_node->get_branch_length())); - } - } -$| = 1; - - warn "problem with mindl rerooting. minDL_rerooted_gene_tree newick: \n", - $minDL_rerooted_gene_tree->generate_newick(), "\n"; - die "find_mindl_node failed. \n"; - -# $gene_tree->set_shown_standard_species($store_show_standard_species); -#$gene_tree->update_label_names(); - return (undef, undef); -} - -sub get_species_bithash{ #get a hash giving a bit pattern for each species in both $gene_tree and $spec_tree - my $gene_tree = shift; - my $spec_tree = shift; - my $bithash = {}; - my %genehash; - my %spechash; - $spec_tree->show_newick_attribute("species"); - my $stree_newick = $spec_tree->generate_newick(); -# print STDERR "SPECIES TREE: $stree_newick \n"; - my @leaf_list = $gene_tree->get_leaf_list(); - foreach (@leaf_list) { - my $lspecies = $_->get_standard_species(); - print STDERR "In Tree::get_species_bithash; gtree species: $lspecies \n"; - $genehash{$lspecies}++; # keys are species in gene tree, values are number of leaves with that species - } - @leaf_list = $spec_tree->get_leaf_list(); - foreach (@leaf_list) { - my $lspecies = $_->get_standard_species(); - # print STDERR "stree species, raw, std: ", $_->get_standard_species(), " $lspecies \n"; - if ($genehash{$lspecies}) { - $spechash{$lspecies}++; # keys are species in both trees. - } - } - my @species_list = sort (keys %spechash); - # print join(" ", @species_list), "\n"; - my $bits = 1; - foreach (@species_list) { - $bithash->{$_} = $bits; - $bits = $bits << 1; # mult by two - # print "$_, $bits \n"; - } - - return $bithash; -} - -sub prune_non{ # prune leaves from tree 1 if their species does not occur in tree2 + + my $urec_cmd = '/data/prod/bin/urec'; + $urec_cmd = 'urec' if ( !-x $urec_cmd ); + + # print STDERR `which $urec_cmd`; + my $rerooted_newick = `$urec_cmd -s "$species_newick_string" -g "$gene_newick_string" -b -O`; + # print STDERR "in mindl rerooted newick: [$rerooted_newick] \n"; + my $minDL_rerooted_gene_tree = ( CXGN::Phylo::Parse_newick->new( $rerooted_newick, $do_parse_set_error ) )->parse(); + +# $minDL_rerooted_gene_tree is now rooted so as to minimize gene duplication and loss needed to reconcile with species tree, +# but branch lengths will be wrong for nodes whose parent has changed in the rerooting (they are just the branch lengths to the old parents). + $minDL_rerooted_gene_tree->get_root()->recursive_implicit_names(); + + # the root of $minDL_rerooted_gene_tree should have 2 children + # and (at least) one hould have it's subtree also present in the pre-rerooting tree. + # identify the node at the root of this subtree (using implicit names) and reroot there. + # Have to do this because some branch length info was lost in urec step. + + my @root_children = $minDL_rerooted_gene_tree->get_root()->get_children(); + my ( $node_key, $rr_node ); + foreach (@root_children) { + my $implicit_name_string = join( "\t", @{ $_->get_implicit_names() } ); + ( $node_key, $rr_node ) = $gene_tree_copy->node_from_implicit_name_string($implicit_name_string); + if ( defined $rr_node ) { + $minDL_rerooted_gene_tree->decircularize(); + $gene_tree_copy->decircularize(); + my $rr_node_in_orig_gene_tree = $gene_tree->get_node($node_key); + return @new_root_point = + ( $rr_node_in_orig_gene_tree, 0.5 * ( $rr_node_in_orig_gene_tree->get_branch_length() ) ); + + } + } + $| = 1; + + warn "problem with mindl rerooting. minDL_rerooted_gene_tree newick: \n", + $minDL_rerooted_gene_tree->generate_newick(), "\n"; + die "find_mindl_node failed. \n"; + + return ( undef, undef ); +} + +sub get_species_bithash { #get a hash giving a bit pattern for each species in BOTH $gene_tree and $spec_tree + my $gene_tree = shift; + my $spec_tree = shift; + my $bithash = {}; + my %genehash; + my %spechash; + $spec_tree->show_newick_attribute("species"); + my $stree_newick = $spec_tree->generate_newick(); + + # count number of gene tree leaves of each species (use standard species) + my @leaf_list = $gene_tree->get_leaf_list(); + foreach (@leaf_list) { + + # print STDERR $_->get_name(), " ", $_->get_species(), " ", $_->get_standard_species(), "\n"; + my $lspecies = $_->get_standard_species(); # gene tree leaf standard species + # print STDERR "In Tree::get_species_bithash; gtree species: $lspecies \n"; + $genehash{$lspecies}++; # keys are species in gene tree, values are number of leaves with that species + } + + # count number of species tree leaves of each species (use standard species) + @leaf_list = $spec_tree->get_leaf_list(); + foreach (@leaf_list) { + my $lspecies = $_->get_standard_species(); + + # print STDERR "stree species, raw, std: ", $_->get_standard_species(), " $lspecies \n"; + if ( $genehash{$lspecies} ) { + $spechash{$lspecies}++; # keys are species in both trees. + } + } + my @species_list = sort ( keys %spechash ); + + # assign 1,2,4,8, etc. to the various species (only the species in both gene and species trees) + my $bits = Math::BigInt->new(1); + foreach (@species_list) { + $bithash->{$_} = $bits; + $bits = $bits << 1; # mult by two + } + + return $bithash; +} + + +# returns 1 if can't delete one of the leaf nodes in arg list, 0 if OK. +sub prune_leaves { # prune leaves from tree 1 if their species does not occur in tree2 + my $self = shift; + my @leaves = @_; + + my $retval = 0; + foreach my $leaf_node (@leaves) { + if ( !$leaf_node ) { + warn 'The node you want to delete does not exist!'; + return; + } + my $parent_of_pruned_node = $leaf_node->get_parent(); + $retval ||= $leaf_node->delete_self(); + $parent_of_pruned_node->recursive_collapse_single_nodes(); # so if e.g. you delete two neighbor leaves, their parent is also deleted + } + + $self->collapse_tree(); +# print STDERR "Y BRANCHLENGTHS: ", join( ";", map( $_->get_branch_length(), $self->get_leaf_list() ) ), "\n"; + return $retval; +} + +# returns a ref to a hash whose keys are names of leaf nodes, values are refs to arrays of names of other +# leaf nodes inferred to be orthologs +sub leaf_ortholog_table{ +my $self = shift; +my $hilited_only = shift || undef; +my $query_species = shift || undef; +my $query_id_regex = shift || undef; +my $standard_query_species = undef; +if (defined $query_species) { + $standard_query_species = $self->get_species_standardizer()->get_standard_name($query_species); #e.g. "Ipomoea_batatas"; + } +my %leaf_ortholog_hash = (); + my @leaves = $self->get_leaves(); + my $non_species_tree_leaf_node_names = $self->non_speciestree_leafnode_names(); + foreach my $leaf (@leaves) { +# print STDERR $leaf->get_name(), " [", $leaf->get_label()->get_hilite(), "]\n"; + next if($hilited_only && !$leaf->get_label()->get_hilite()); + my $species_ok = (! defined $query_species or ( $leaf->get_standard_species() =~ /$standard_query_species/) ); + my $name_ok = (! defined $query_id_regex or $leaf->get_name() =~ /$query_id_regex/); + next if(!$species_ok or !$name_ok); + my $leafname = $leaf->get_name(); + + next if( exists $non_species_tree_leaf_node_names->{$leafname} ); + # $ortholog_str .= "orthologs of " . $leafname . ": "; + + my @cand_orthologs = $leaf->collect_orthologs_of_leaf(); + + # keep only leaves whose species appear in species tree + my @orthologs = (); + + # my $non_species_tree_leaf_node_names = + # $browser->get_tree()->non_speciestree_leafnode_names(); + if ( scalar keys %$non_species_tree_leaf_node_names > 0 ) { + foreach (@cand_orthologs) { + if ( exists $non_species_tree_leaf_node_names->{$_} ) { + # unknown species, can't claim orthology + } else { + push @orthologs, $_; + } + } + } else { + @orthologs = @cand_orthologs; + } + + # my @orthologs = $leaf->collect_orthologs_of_leaf(); # list of leaf names + # $ortholog_str .= join(" ", @orthologs) . "\n"; + $leaf_ortholog_hash{$leafname} = \@orthologs; + } # loop over leaves +return \%leaf_ortholog_hash; + } # return a hash whose keys are leaf node names (hidden nodes excluded) # and whose values are refs to arrays of 1's and 0's, the 1's indicating orthology. -sub ortho_matrix_hash{ - my $self = shift; - my @leaf_names = (); - for ($self->get_leaves()) { - next if($_->get_hide_label()); # do not include hidden labels - push @leaf_names, $_->get_name(); - } - @leaf_names = sort @leaf_names; -# print STDERR join(" ", @leaf_names), "\n"; - my $n_leaves = scalar @leaf_names; - my %name_hash; - my %ortho_hash; - - my $i = 0; - foreach (@leaf_names) { - $name_hash{$_} = $i; - my @zeroes = (0)x$n_leaves; - $ortho_hash{$_} = \@zeroes; - $i++; - } - my @leaves = $self->get_leaves(); - foreach (@leaves) { - my $name = $_->get_name(); - my $o_array = $ortho_hash{$name}; - # print STDERR join(" ", @$o_array), "\n"; - my @orthologs = $_->collect_orthologs_of_leaf(); - foreach (@orthologs) { - my $o_name = $_; #->get_name(); - # print STDERR $o_name, " ", $name_hash{$o_name}, "\n"; - $o_array->[$name_hash{$o_name}] = 1; # in the array for $name set the right element to 1 - } - } -# foreach (@leaf_names) { -# my $ortho_array_ref = $ortho_hash{$_}; -# printf STDERR ("%50s ", $_); print STDERR join(" ", @$ortho_array_ref), "\n"; -# } - return \%ortho_hash; +sub ortho_matrix_hash { + my $self = shift; + my @leaf_names = (); + for ( $self->get_leaves() ) { + next if ( $_->get_hide_label() ); # do not include hidden labels + push @leaf_names, $_->get_name(); + } + @leaf_names = sort @leaf_names; + + # print STDERR join(" ", @leaf_names), "\n"; + my $n_leaves = scalar @leaf_names; + my %name_hash; + my %ortho_hash; + + my $i = 0; + foreach (@leaf_names) { + $name_hash{$_} = $i; + my @zeroes = (0) x $n_leaves; + $ortho_hash{$_} = \@zeroes; + $i++; + } + my @leaves = $self->get_leaves(); + foreach (@leaves) { + my $name = $_->get_name(); + my $o_array = $ortho_hash{$name}; + + # print STDERR join(" ", @$o_array), "\n"; + my @orthologs = (); + my @cand_orthologs = $_->collect_orthologs_of_leaf(); + + # keep only leaves whose species appear in species tree + my $non_species_tree_leaf_node_names = $self->non_speciestree_leafnode_names(); + if ( scalar keys %$non_species_tree_leaf_node_names > 0 ) { + foreach (@cand_orthologs) { + next if ( exists $non_species_tree_leaf_node_names->{$_} ); + push @orthologs, $_; + } + } else { + @orthologs = @cand_orthologs; + } + foreach (@orthologs) { + my $o_name = $_; #->get_name(); + # print STDERR $o_name, " ", $name_hash{$o_name}, "\n"; + $o_array->[ $name_hash{$o_name} ] = 1; # in the array for $name set the right element to 1 + } + } + + # foreach (@leaf_names) { + # my $ortho_array_ref = $or tho_hash{$_}; + # printf STDERR ("%50s ", $_); print STDERR join(" ", @$ortho_array_ref), "\n"; + # } + return \%ortho_hash; +} + +# return a hashref whose keys are the names of the leaf nodes +# whose species don't appear in the species tree. +# keys: names, values: node objects +# must have already set the species bit patterns for each node, using +sub non_speciestree_leafnode_names { + my $self = shift; + my @leaves = $self->get_leaf_list(); + my %non_species_tree_leaf_node_names = (); + for (@leaves) { + if ( $_->get_attribute("species_bit_pattern") == 0 ) { + $non_species_tree_leaf_node_names{ $_->get_name() } = $_; # keys: names, values: node objects + } + } + return \%non_species_tree_leaf_node_names; } -sub set_branch_lengths_equal{ -my $self = shift; -my $bl = shift || 1.0; -$self->get_root()->recursive_set_branch_length($bl); +sub set_branch_lengths_equal { + my $self = shift; + my $bl = shift || 1.0; + $self->get_root()->recursive_set_branch_length($bl); } -sub decircularize{ -my $self = shift; -$self->get_root()->recursive_decircularize(); -$self->set_renderer(undef); -$self->set_layout(undef); +sub decircularize { + my $self = shift; + $self->get_root()->recursive_decircularize(); + $self->set_renderer(undef); + $self->set_layout(undef); } sub DESTROY { - my $self = shift; -# $self->decircularize(); + my $self = shift; + + # $self->decircularize(); # check for an overridden destructor... $self->SUPER::DESTROY if $self->can("SUPER::DESTROY"); + # now do your own thing before or after } diff --git a/t/CXGN/Phylo/orthologger.t b/t/CXGN/Phylo/orthologger.t new file mode 100644 index 00000000..efe15469 --- /dev/null +++ b/t/CXGN/Phylo/orthologger.t @@ -0,0 +1,153 @@ +#!/usr/bin/perl -w +use strict; +use warnings FATAL => 'all'; + +# tests for Orthologger Module/ +use Test::More tests => 10; + +#use lib '/home/tomfy/cxgn/cxgn-corelibs/lib'; +#use lib '/home/tomfy/Orthologger/lib'; + +use CXGN::Phylo::Parser; +use CXGN::Phylo::Orthologger; + + +# This is family 2830, with 25 genes in 13 taxa. +my $gene_tree_newick = +'(((Solyc03g121130.2.1[species=tomato]:0.03887,(((POPTR_0014s08830.1[species=poplar]:0.0001,POPTR_0014s08830.2[species=poplar]:0.0001):0.05550,(X_30131.m006857[species=castorbean]:0.01095,X_30131.m007044[species=castorbean]:0.00016):0.02176):0.04630,(evm.model.supercontig_184.11[species=papaya]:0.04198,(AT2G46230.1[species=arabidopsis]:0.01153,AT2G46230.2[species=arabidopsis]:0.07275):0.15150):0.02371):0.01878):0.01095,((GSVIVT01026915001[species=grape]:0.0001,GSVIVT01027024001[species=grape]:0.0001):0.07067,(((Glyma12g31740.1[species=soybean]:0.0001,Glyma13g38690.1[species=soybean]:0.0001):0.0001,(Glyma13g38690.2[species=soybean]:0.0001,Glyma13g38690.3[species=soybean]:0.0001):0.0001):0.01992,(IMGA_Medtr6g086290.1[species=medicago]:0.02191,IMGA_Medtr7g116720.1[species=medicago]:0.00015):0.06024):0.05419):0.01302):0.0148619097222222,((Bradi1g48740.1[species=brachypodium]:0.03551,(((Sb10g004030.1[species=sorghum]:0.0001,GRMZM2G140689_P02[species=maize]:0.0001):0.00015,(GRMZM2G016330_P01[species=maize]:0.0001,GRMZM2G016330_P03[species=maize]:0.0001):0.00536):0.02677,(LOC_Os06g06410.1[species=rice]:0.01662,LOC_Os06g06410.2[species=rice]:0.00015):0.00288):0.02333):0.08447,(jgi_Selmo1_91292[species=selaginella]:0.00532,jgi_Selmo1_158231[species=selaginella]:0.00016):0.13724):0.00183809027777778)'; + +# $gene_tree_newick =~ s/\s*//g; # remove whitespace + +my $species_tree_newick = +'(Selaginella[species=Selaginella]:1,(((sorghum[species=Sorghum_bicolor]:1,maize[species=Zea_mays]:1):1,(rice[species=Oryza_sativa]:1,brachypodium[species=Brachypodium_distachyon]:1):1):1,(tomato[species=Solanum_lycopersicum]:1,(grape[species=Vitis_vinifera]:1,((papaya[species=Carica_papaya]:1,arabidopsis[species=Arabidopsis_thaliana]:1):1,((soy[species=Glycine_max]:1,medicago[species=Medicago_truncatula]:1):1,(castorbean[species=Ricinus_communis]:1,Poplar[species=Populus_trichocarpa]:1):1):1):1):1):1):1)'; + +$species_tree_newick = +'( chlamydomonas[species=Chlamydomonas_reinhardtii]:1, ( physcomitrella[species=Physcomitrella_patens]:1, ( selaginella[species=Selaginella_moellendorffii]:1, ( loblolly_pine[species=Pinus_taeda]:1, ( amborella[species=Amborella_trichopoda]:1, ( ( date_palm[species=Phoenix_dactylifera]:1, ( ( foxtail_millet[species=Setaria_italica]:1, ( sorghum[species=Sorghum_bicolor]:1, maize[species=Zea_mays]:1 ):1 ):1, ( rice[species=Oryza_sativa]:1, ( brachypodium[species=Brachypodium_distachyon]:1, ( wheat[species=Triticum_aestivum]:1, barley[species=Hordeum_vulgare]:1 ):1 ):1 ):1 ):1 ):1, ( columbine[species=Aquilegia_coerulea]:1, ( ( ( ( ( ( ( ( ( ( tomato[species=Solanum_lycopersicum]:1, potato[species=Solanum_tuberosum]:1 ):1, eggplant[species=Solanum_melongena]:1 ):1, pepper[species=Capsicum_annuum]:1 ):1, tobacco[species=Nicotiana_tabacum]:1 ):1, petunia[species=Petunia]:1 ):1, sweet_potato[species=Ipomoea_batatas]:1 ):1, ( arabica_coffee[species=Coffea_arabica]:1, robusta_coffee[species=Coffea_canephora]:1 ):1 ):1, snapdragon[species=Antirrhinum]:1 ):1, ( ( sunflower[species=Helianthus_annuus]:1, lettuce[species=Lactuca_sativa]:1 ):1, carrot[species=Daucus_carota]:1 ):1 ):1, ( grape[species=Vitis_vinifera]:1, ( ( eucalyptus[species=Eucalyptus_grandis]:1, ( ( orange[species=Citrus_sinensis]:1, clementine[species=Citrus_clementina]:1 ):1, ( ( cacao[species=Theobroma_cacao]:1, cotton[species=Gossypium_raimondii]:1 ):1, ( papaya[species=Carica_papaya]:1, ( turnip[species=Brassica_rapa]:1, ( salt_cress[species=Thellungiella_parvula]:1, ( red_shepherds_purse[species=Capsella_rubella]:1, ( arabidopsis_thaliana[species=Arabidopsis_thaliana]:1, arabidopsis_lyrata[species=Arabidopsis_lyrata]:1 ):1 ):1 ):1 ):1 ):1 ):1 ):1 ):1, ( ( ( peanut[species=Arachis_hypogaea]:1, ( ( soy[species=Glycine_max]:1, pigeon_pea[species=Cajanus_cajan]:1 ):1, ( medicago[species=Medicago_truncatula]:1, lotus[species=Lotus_japonicus]:1 ):1 ):1 ):1, ( hemp[species=Cannabis_sativa]:1, ( ( ( apple[species=Malus_domestica]:1, peach[species=Prunus_persica]:1 ):1, woodland_strawberry[species=Fragaria_vesca]:1 ):1, cucumber[species=Cucumis_sativus]:1 ):1 ):1 ):1, ( ( castorbean[species=Ricinus_communis]:1, cassava[species=Manihot_esculenta]:1 ):1, ( poplar[species=Populus_trichocarpa]:1, flax[species=Linum_usitatissimum]:1 ):1 ):1 ):1 ):1 ):1 ):1 ):1 ):1 ):1 ):1 ):1 ):1 ) :1'; + +# $species_tree_newick =~ s/\s*//g; # remove whitespace + +my $gt_parser = CXGN::Phylo::Parse_newick->new($gene_tree_newick); +my $gene_tree = $gt_parser->parse(); + +my $st_parser = CXGN::Phylo::Parse_newick->new($species_tree_newick); +my $species_tree = $st_parser->parse(); + +#********* test Orthologger->new returns Orthologger obj. ********** +my $Orthologger_obj = + CXGN::Phylo::Orthologger->new( { 'gene_tree' => $gene_tree, 'species_tree' => $species_tree, 'reroot_method' => 'mindl' } ); +ok( defined $Orthologger_obj, 'new() returned something.' ); +isa_ok( $Orthologger_obj, 'CXGN::Phylo::Orthologger' ); + +#********* test mindl rerooting of tree *************** +$Orthologger_obj->get_gene_tree()->show_newick_attribute('species'); +my $got_rerooted_tree = $Orthologger_obj->get_gene_tree()->generate_newick(); + +my $expected_rerooted_tree = +'((jgi_Selmo1_91292[species=Selaginella_moellendorffii]:0.00532,jgi_Selmo1_158231[species=Selaginella_moellendorffii]:0.00016)[speciation=0]:0.06862,((Bradi1g48740.1[species=Brachypodium_distachyon]:0.03551,(((Sb10g004030.1[species=Sorghum_bicolor]:0.0001,GRMZM2G140689_P02[species=Zea_mays]:0.0001)[speciation=1]:0.00015,(GRMZM2G016330_P01[species=Zea_mays]:0.0001,GRMZM2G016330_P03[species=Zea_mays]:0.0001)[speciation=0]:0.00536)[speciation=0]:0.02677,(LOC_Os06g06410.1[species=Oryza_sativa]:0.01662,LOC_Os06g06410.2[species=Oryza_sativa]:0.00015)[speciation=0]:0.00288)[speciation=1]:0.02333)[speciation=0]:0.08447,((Solyc03g121130.2.1[species=Solanum_lycopersicum]:0.03887,(((POPTR_0014s08830.1[species=Populus_trichocarpa]:0.0001,POPTR_0014s08830.2[species=Populus_trichocarpa]:0.0001)[speciation=0]:0.05550,(X_30131.m006857[species=Ricinus_communis]:0.01095,X_30131.m007044[species=Ricinus_communis]:0.00016)[speciation=0]:0.02176)[speciation=1]:0.04630,(evm.model.supercontig_184.11[species=Carica_papaya]:0.04198,(AT2G46230.1[species=Arabidopsis_thaliana]:0.01153,AT2G46230.2[species=Arabidopsis_thaliana]:0.07275)[speciation=0]:0.15150)[speciation=1]:0.02371)[speciation=1]:0.01878)[speciation=1]:0.01095,((GSVIVT01026915001[species=Vitis_vinifera]:0.0001,GSVIVT01027024001[species=Vitis_vinifera]:0.0001)[speciation=0]:0.07067,(((Glyma12g31740.1[species=Glycine_max]:0.0001,Glyma13g38690.1[species=Glycine_max]:0.0001)[speciation=0]:0.0001,(Glyma13g38690.2[species=Glycine_max]:0.0001,Glyma13g38690.3[species=Glycine_max]:0.0001)[speciation=0]:0.0001)[speciation=0]:0.01992,(IMGA_Medtr6g086290.1[species=Medicago_truncatula]:0.02191,IMGA_Medtr7g116720.1[species=Medicago_truncatula]:0.00015)[speciation=0]:0.06024)[speciation=1]:0.05419)[speciation=1]:0.01302)[speciation=0]:0.0167)[speciation=1]:0.06862)'; + +is( $got_rerooted_tree, $expected_rerooted_tree, "Check rerooted tree newick is as expected." ); + +#*********** test ortholog result string is as expected ********** +my $ortholog_result_string = $Orthologger_obj->ortholog_result_string(); +my $expected_ortholog_result_string = +'orthologs of jgi_Selmo1_91292: AT2G46230.1 AT2G46230.2 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 GSVIVT01026915001 GSVIVT01027024001 Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 LOC_Os06g06410.1 LOC_Os06g06410.2 POPTR_0014s08830.1 POPTR_0014s08830.2 Sb10g004030.1 Solyc03g121130.2.1 X_30131.m006857 X_30131.m007044 evm.model.supercontig_184.11 +orthologs of jgi_Selmo1_158231: AT2G46230.1 AT2G46230.2 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 GSVIVT01026915001 GSVIVT01027024001 Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 LOC_Os06g06410.1 LOC_Os06g06410.2 POPTR_0014s08830.1 POPTR_0014s08830.2 Sb10g004030.1 Solyc03g121130.2.1 X_30131.m006857 X_30131.m007044 evm.model.supercontig_184.11 +orthologs of Bradi1g48740.1: AT2G46230.1 AT2G46230.2 GSVIVT01026915001 GSVIVT01027024001 Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 POPTR_0014s08830.1 POPTR_0014s08830.2 Solyc03g121130.2.1 X_30131.m006857 X_30131.m007044 evm.model.supercontig_184.11 jgi_Selmo1_158231 jgi_Selmo1_91292 +orthologs of Sb10g004030.1: GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 AT2G46230.1 AT2G46230.2 GSVIVT01026915001 GSVIVT01027024001 Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 POPTR_0014s08830.1 POPTR_0014s08830.2 Solyc03g121130.2.1 X_30131.m006857 X_30131.m007044 evm.model.supercontig_184.11 jgi_Selmo1_158231 jgi_Selmo1_91292 +orthologs of GRMZM2G140689_P02: Sb10g004030.1 LOC_Os06g06410.1 LOC_Os06g06410.2 AT2G46230.1 AT2G46230.2 GSVIVT01026915001 GSVIVT01027024001 Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 POPTR_0014s08830.1 POPTR_0014s08830.2 Solyc03g121130.2.1 X_30131.m006857 X_30131.m007044 evm.model.supercontig_184.11 jgi_Selmo1_158231 jgi_Selmo1_91292 +orthologs of GRMZM2G016330_P01: LOC_Os06g06410.1 LOC_Os06g06410.2 AT2G46230.1 AT2G46230.2 GSVIVT01026915001 GSVIVT01027024001 Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 POPTR_0014s08830.1 POPTR_0014s08830.2 Solyc03g121130.2.1 X_30131.m006857 X_30131.m007044 evm.model.supercontig_184.11 jgi_Selmo1_158231 jgi_Selmo1_91292 +orthologs of GRMZM2G016330_P03: LOC_Os06g06410.1 LOC_Os06g06410.2 AT2G46230.1 AT2G46230.2 GSVIVT01026915001 GSVIVT01027024001 Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 POPTR_0014s08830.1 POPTR_0014s08830.2 Solyc03g121130.2.1 X_30131.m006857 X_30131.m007044 evm.model.supercontig_184.11 jgi_Selmo1_158231 jgi_Selmo1_91292 +orthologs of LOC_Os06g06410.1: GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 Sb10g004030.1 AT2G46230.1 AT2G46230.2 GSVIVT01026915001 GSVIVT01027024001 Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 POPTR_0014s08830.1 POPTR_0014s08830.2 Solyc03g121130.2.1 X_30131.m006857 X_30131.m007044 evm.model.supercontig_184.11 jgi_Selmo1_158231 jgi_Selmo1_91292 +orthologs of LOC_Os06g06410.2: GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 Sb10g004030.1 AT2G46230.1 AT2G46230.2 GSVIVT01026915001 GSVIVT01027024001 Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 POPTR_0014s08830.1 POPTR_0014s08830.2 Solyc03g121130.2.1 X_30131.m006857 X_30131.m007044 evm.model.supercontig_184.11 jgi_Selmo1_158231 jgi_Selmo1_91292 +orthologs of Solyc03g121130.2.1: AT2G46230.1 AT2G46230.2 POPTR_0014s08830.1 POPTR_0014s08830.2 X_30131.m006857 X_30131.m007044 evm.model.supercontig_184.11 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292 +orthologs of POPTR_0014s08830.1: X_30131.m006857 X_30131.m007044 AT2G46230.1 AT2G46230.2 evm.model.supercontig_184.11 Solyc03g121130.2.1 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292 +orthologs of POPTR_0014s08830.2: X_30131.m006857 X_30131.m007044 AT2G46230.1 AT2G46230.2 evm.model.supercontig_184.11 Solyc03g121130.2.1 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292 +orthologs of X_30131.m006857: POPTR_0014s08830.1 POPTR_0014s08830.2 AT2G46230.1 AT2G46230.2 evm.model.supercontig_184.11 Solyc03g121130.2.1 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292 +orthologs of X_30131.m007044: POPTR_0014s08830.1 POPTR_0014s08830.2 AT2G46230.1 AT2G46230.2 evm.model.supercontig_184.11 Solyc03g121130.2.1 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292 +orthologs of evm.model.supercontig_184.11: AT2G46230.1 AT2G46230.2 POPTR_0014s08830.1 POPTR_0014s08830.2 X_30131.m006857 X_30131.m007044 Solyc03g121130.2.1 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292 +orthologs of AT2G46230.1: evm.model.supercontig_184.11 POPTR_0014s08830.1 POPTR_0014s08830.2 X_30131.m006857 X_30131.m007044 Solyc03g121130.2.1 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292 +orthologs of AT2G46230.2: evm.model.supercontig_184.11 POPTR_0014s08830.1 POPTR_0014s08830.2 X_30131.m006857 X_30131.m007044 Solyc03g121130.2.1 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292 +orthologs of GSVIVT01026915001: Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292 +orthologs of GSVIVT01027024001: Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292 +orthologs of Glyma12g31740.1: IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 GSVIVT01026915001 GSVIVT01027024001 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292 +orthologs of Glyma13g38690.1: IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 GSVIVT01026915001 GSVIVT01027024001 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292 +orthologs of Glyma13g38690.2: IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 GSVIVT01026915001 GSVIVT01027024001 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292 +orthologs of Glyma13g38690.3: IMGA_Medtr6g086290.1 IMGA_Medtr7g116720.1 GSVIVT01026915001 GSVIVT01027024001 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292 +orthologs of IMGA_Medtr6g086290.1: Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 GSVIVT01026915001 GSVIVT01027024001 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292 +orthologs of IMGA_Medtr7g116720.1: Glyma12g31740.1 Glyma13g38690.1 Glyma13g38690.2 Glyma13g38690.3 GSVIVT01026915001 GSVIVT01027024001 Bradi1g48740.1 GRMZM2G016330_P01 GRMZM2G016330_P03 GRMZM2G140689_P02 LOC_Os06g06410.1 LOC_Os06g06410.2 Sb10g004030.1 jgi_Selmo1_158231 jgi_Selmo1_91292 +Leaves not in species tree: '; + +my @ortholog_lines = split( "\n", $ortholog_result_string ); +my @expected_ortholog_lines = split( "\n", $expected_ortholog_result_string ); +my ( $nlines, $nxlines ) = ( scalar @ortholog_lines, scalar @expected_ortholog_lines ); +is( $nlines, $nxlines, "Check number of output lines agrees with expectation: $nlines, $nxlines." ); + +my @sorted_ortholog_lines = sort @ortholog_lines; +my @sorted_expected_ortholog_lines = sort @expected_ortholog_lines; +$ortholog_result_string = join( "\n", @sorted_ortholog_lines ); +$expected_ortholog_result_string = join( "\n", @sorted_expected_ortholog_lines ); + +#$ortholog_result_string =~ s/ +/ /g; +#$ortholog_result_string =~ s/ *\n */\n/g; +#$expected_ortholog_result_string =~ s/ +/ /g; +#$expected_ortholog_result_string =~ s/ *\n */\n/g; +is( $ortholog_result_string, $expected_ortholog_result_string, + 'Check ortholog result string agrees with expectation.' ); + +$gene_tree_newick = +'((foxtail_millet[species=Setaria_italica]:1,(sorghum[species=Sorghum_bicolor]:1,maize[species=Zea_mays_x]:1):1):1,(rice[species=Oryza_sativa]:1,brachypodium[species=Brachypodium_distachyon]:1):1)'; + +# the following is a misrooted tree +$gene_tree_newick = '( + ( + foxtail_millet_1[species=Setaria_italica]:1, + ( + rice_1[species=Oryza_sativa]:1, + brachypodium_1[species=Brachypodium_distachyon]:1 + ):1 + ):0.5, + ( + sorghum_1[species=Sorghum_bicolor]:1, + maize_1[species=Zea_mays_x]:1 + ):0.5 +)'; + +$gt_parser = CXGN::Phylo::Parse_newick->new($gene_tree_newick); +$gene_tree = $gt_parser->parse(); + +$st_parser = CXGN::Phylo::Parse_newick->new($species_tree_newick); +$species_tree = $st_parser->parse(); + +#********* test Orthologger->new returns Orthologger obj. ********** +$Orthologger_obj = + CXGN::Phylo::Orthologger->new( { 'gene_tree' => $gene_tree, 'species_tree' => $species_tree, 'reroot_method' => 'mindl' } ); +ok( defined $Orthologger_obj, 'new() returned something.' ); +isa_ok( $Orthologger_obj, 'CXGN::Phylo::Orthologger' ); + +#********* test mindl rerooting of tree *************** +$Orthologger_obj->get_gene_tree()->show_newick_attribute('species'); +$got_rerooted_tree = $Orthologger_obj->get_gene_tree()->generate_newick(); +$expected_rerooted_tree = +'((rice_1[species=Oryza_sativa]:1,brachypodium_1[species=Brachypodium_distachyon]:1)[speciation=1]:0.5,(foxtail_millet_1[species=Setaria_italica]:1,(sorghum_1[species=Sorghum_bicolor]:1,maize_1[species=Zea_mays_x]:1)[speciation=0]:1)[speciation=1]:0.5)'; +is( $got_rerooted_tree, $expected_rerooted_tree, "Check rerooted tree newick is as expected." ); +$ortholog_result_string = $Orthologger_obj->ortholog_result_string(); + +$expected_ortholog_result_string = 'orthologs of rice_1: brachypodium_1 foxtail_millet_1 sorghum_1 +orthologs of brachypodium_1: rice_1 foxtail_millet_1 sorghum_1 +orthologs of foxtail_millet_1: sorghum_1 brachypodium_1 rice_1 +orthologs of sorghum_1: foxtail_millet_1 brachypodium_1 rice_1 +Leaves not in species tree: maize_1'; + +@ortholog_lines = split( "\n", $ortholog_result_string ); +@expected_ortholog_lines = split( "\n", $expected_ortholog_result_string ); +( $nlines, $nxlines ) = ( scalar @ortholog_lines, scalar @expected_ortholog_lines ); +is( $nlines, $nxlines, "Check number of output lines agrees with expectation: $nlines, $nxlines." ); + +@sorted_ortholog_lines = sort @ortholog_lines; +@sorted_expected_ortholog_lines = sort @expected_ortholog_lines; +$ortholog_result_string = join( "\n", @sorted_ortholog_lines ); +$expected_ortholog_result_string = join( "\n", @sorted_expected_ortholog_lines ); + +is( $ortholog_result_string, $expected_ortholog_result_string, + 'Check ortholog result string agrees with expectation.' ); + diff --git a/t/CXGN/Phylo/phylo.t b/t/CXGN/Phylo/phylo.t index fff63de2..d0c6ebb4 100755 --- a/t/CXGN/Phylo/phylo.t +++ b/t/CXGN/Phylo/phylo.t @@ -1,7 +1,8 @@ #!/usr/bin/perl -use Test::Most qw/no_plan/; +use Test::Most tests => 52; # qw/no_plan/; use Modern::Perl; +# tests Parser, Tree, and Node modules. use CXGN::Phylo::Tree; use CXGN::Phylo::Node; @@ -12,14 +13,14 @@ use Carp; # expression to test the Phylo packages with # -my $expression = "(1:0.082376,(2:0.196674,((3:0.038209,6:0.354293):0.026742,5:0.094338):0.064142):0.067562,4:0.295612)"; - -my $parser = CXGN::Phylo::Parse_newick -> new($expression); +my $newick_expression = + "(1:0.082376,(2:0.196674,((3:0.038209,6:0.354293):0.026742,5:0.094338):0.064142):0.067562,4:0.295612)"; +my $parser = CXGN::Phylo::Parse_newick -> new($newick_expression); # test tokenizer # -my @tokens = $parser -> tokenize($expression); -print STDERR "\tTOKENS: ".join("|", @tokens)."\n"; +my @tokens = $parser -> tokenize($newick_expression); +# print STDERR "\tTOKENS: ".join("|", @tokens)."\n"; is (@tokens, 22, "Token count test"); my $tree = $parser-> parse(); @@ -45,7 +46,7 @@ my $n = $tree->get_node(5); #$tree->get_root()->rotate_node(); #$n4->set_hidden(1); -my $subtree_len = $tree->get_root()->calculate_subtree_distances(); +#my $subtree_len = $tree->get_root()->calculate_subtree_distances(); #is ($subtree_len, 12, "subtree length test"); # test the leaf functions in two different ways @@ -63,6 +64,7 @@ is ($tree->get_root()->is_leaf, 0, "root leaf test"); is (@leaflist, 6, "leaf list test"); + # test the root # my $root = $tree->get_root(); @@ -81,49 +83,59 @@ is ($inner_node_subnode_count, 6, "inner node subnode count test"); # set species information to test subtree_species_count stuff # -$tree->get_node(2)->set_species("coffee"); -$tree->get_node(4)->set_species("tomato"); -$tree->get_node(7)->set_species("potato"); -$tree->get_node(8)->set_species("pepper"); -$tree->get_node(9)->set_species("coffee"); -$tree->get_node(10)->set_species("tomato"); +my @species_list = ("coffee", "tomato", "potato", "pepper", "eggplant", "brachypodium"); +#my @node_list = values %{$tree->{node_hash}}; +my $i = 0; +foreach my $n (values %{$tree->{node_hash}}) { + next if(scalar $n->get_children() > 0); # skip non-leaves + $n->set_species($species_list[$i % 6]); +# print "i, species: $i ", $n->get_species(), "\n"; + $i++; +} # test the subtree_species count functions #$tree->get_root()->recursive_text_render(); -#exit; -# +#exit;# $tree->get_root()->recursive_set_leaf_species_count(); #$tree->get_root()->calculate_subtree_species_count(); # pick out a node and test the count # -is ($tree->get_node(5)->get_attribute("leaf_species_count"), 3, "subtree leaf species count test"); +#print "node keys: ", join(" ", keys %{$tree->{node_hash}}), "\n"; +is($tree->get_root()->get_attribute("leaf_species_count"), 6, "subtree leaf species count test"); +#is ($tree->get_node(5)->get_attribute("leaf_species_count"), 3, "subtree leaf species count test"); # test the remove_child function # +#print STDERR "before tree copy\n"; my $rm_tree = $tree->copy(); -$n = $rm_tree->get_node(5); -my @children = $n->get_children(); -print STDERR "\tRemove child\nbefore: ".$n->to_string()."\n"; -is ($n->get_children, 2, "get_children test"); -print STDERR "\t(Removing child ".$children[0]->get_node_key().")\n"; -$n->remove_child($children[0]); -is ($n->get_children(), 1, "remove child test"); -print STDERR "\tafter : ".$n->to_string()."\n"; +#print STDERR 'after $tree->copy() \n'; +my @root_children = $rm_tree->get_root()->get_children(); +my $n1 = $root_children[1]; +my @children =$n1->get_children(); +#print STDERR "\tRemove child\nbefore: ".$n->to_string()."\n"; +is ($n1->get_children, 2, "get_children test"); +#print STDERR "\t(Removing child ".$children[0]->get_node_key().")\n"; +$n1->remove_child($children[0]); +is ($n1->get_children(), 1, "remove child test"); +#print STDERR "\tafter : ".$n1->to_string()."\n"; my @root_kids = $rm_tree->get_root()->get_children(); is (@root_kids, 3, "root children count test"); $rm_tree->get_root()->remove_child($root_kids[1]); -print STDERR "\tRemoving child key=".($root_kids[1]->get_name())."\n"; +#print STDERR "\tRemoving child key=".($root_kids[1]->get_name())."\n"; #foreach my $c ($rm_tree->get_root()->get_children()) { print "current children = ".$c->get_name()."\n"; } is ($rm_tree->get_root()->get_children(), 2, "removed one root child test"); # test reset_root # -my $nn = $tree->get_node(5); -$tree->reset_root($nn); +#print STDERR $tree->generate_newick(); + +$tree->reset_root( $tree->get_node(5) ); + +#print STDERR $tree->generate_newick(); # test the compare function # initialize two identical trees and compare @@ -159,41 +171,51 @@ is ($tree_a->compare_rooted($tree_b), 1, "tree topology specification test"); # my $new_tree = $tree->copy(); if ($tree->compare_rooted($new_tree)) { # should be the same, shouldn't it? - print STDERR "Compared tree to newtree and found them to be identical.\n"; + # print STDERR "Compared tree to newtree and found them to be identical.\n"; } else { print STDERR "newtree and tree are not identical. Oops.\n"; } is ($new_tree->compare_rooted($tree), 1, "copied tree identity check"); isnt ( $new_tree, $tree, "tree pointer non-identity check"); +my ($rfd, $symd, $d3) = $tree->RF_distance($new_tree); +is($rfd, 0, "check RF distance between tree and copy is 0."); +is($symd, 0, "check RF distance between tree and copy is 0."); + # check if I can remove a node in new_tree without affecting $tree # +#print "node keys: ", join(" ", keys %{$tree->{node_hash}}), "\n"; $new_tree->delete_node(3); + + +# print $tree->generate_newick(), "\n"; +# print $new_tree->generate_newick(), "\n"; + is($new_tree->compare_rooted($tree), 0, "changed copied tree identity check"); # test the collapsing function - test a tree with many nodes that # have only one child. # -print STDERR "\tTesting CXGN::Phylo::Node::recursive_collapse_nodes\n"; +#print STDERR "\tTesting CXGN::Phylo::Node::recursive_collapse_nodes\n"; my $c_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:1)C:1)D:1)E:1)"))->parse(); $c_tree->set_renderer(CXGN::Phylo::Text_tree_renderer->new($c_tree)); -print STDERR "The original tree: \n"; -$c_tree->render(); -print STDERR "=====\n\n"; -# readline stdin; +#print STDERR "The original tree: \n"; +#$c_tree->render(); +#print STDERR "=====\n\n"; is ($c_tree->get_all_nodes(), 6, "node count before collapse"); $c_tree->collapse_tree(); -print STDERR "The collapsed tree:\n"; -$c_tree->render(); -print STDERR "=====\n\n"; +#print STDERR "The collapsed tree:\n"; +#$c_tree->render(); +#print STDERR "=====\n\n"; is ($c_tree->get_all_nodes(), 3, "node count after collapse"); -$new_tree->set_renderer(CXGN::Phylo::Text_tree_renderer->new($new_tree)); -$new_tree->render(); +#$new_tree->set_renderer(CXGN::Phylo::Text_tree_renderer->new($new_tree)); +#$new_tree->render(); + #if(1 || $c_tree->get_all_nodes() != 3){ #$c_tree->print_node_keys(); #$c_tree->get_root()->print_subtree(); @@ -203,23 +225,16 @@ $new_tree->render(); # $c_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:1)C:1)D:1)E:1, (((G:1, F:1)H:1)I:1)J:1)"))->parse(); $c_tree->set_renderer(CXGN::Phylo::Text_tree_renderer->new($c_tree)); -print STDERR "Original tree\n"; -$c_tree->render(); $c_tree->collapse_tree(); -print STDERR "Collapsed tree:\n"; -$c_tree->render(); -print STDERR "=====\n\n"; # test a tree collapsing with a tree that has branch lengths of zero. # my $z_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:0)C:0)D:0)E:1, (((G:1, F:1)H:0)I:1)J:1)"))->parse(); -print STDERR "Testing the recursive_collapse_zero_branches() function...\nOriginal tree:\n"; -$z_tree->get_root()->recursive_text_render(); +#print STDERR "Testing the recursive_collapse_zero_branches() function...\nOriginal tree:\n"; +#$z_tree->get_root()->print_subtree(); + my $z_tree_node_count = $z_tree->get_node_count(); $z_tree ->get_root()->recursive_collapse_zero_branches(); -print STDERR "New tree:\n"; -$z_tree->get_root()->recursive_text_render(); -print STDERR "====\n\n"; is ($z_tree->get_node_count(), $z_tree_node_count-4, "recursive_collapse_zero_nodes test"); @@ -230,7 +245,6 @@ print STDERR "\tDeleting internal node (key=4)...\n"; my $ind_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:1)C:1)D:1)E:1, (((G:1, F:1)H:1)I:1)J:1)"))->parse(); my $ind_tree_copy = $ind_tree->copy(); $ind_tree->delete_node(4); -$ind_tree->render(); is ($ind_tree_copy->get_all_nodes(), ($ind_tree->get_all_nodes()+1), "node count after delete test"); is ($ind_tree->get_node(4), undef, "has node really disappeared test"); @@ -238,7 +252,7 @@ is ($ind_tree->get_node(4), undef, "has node really disappeared test"); # print STDERR "\tDeleting a leaf node (key=2)...\n"; $ind_tree->delete_node(2); -$ind_tree->render(); +#$ind_tree->render(); is ($ind_tree_copy->get_all_nodes(), ($ind_tree->get_all_nodes()+2), "node count after leaf node deletion"); # test the newick generation from the node @@ -292,13 +306,13 @@ isnt($binary_fail, 1, "Binary tree test: all children count <= 2"); # recover original tree. Check that rooted and unrooted compares both give 1. # $tree = CXGN::Phylo::Parse_newick->new("(A:1, (B:1, C:1):1)")->parse(); -$expression = "(A:0.082376,(B:0.196674,((C:0.038209,F:0.354293):0.026742,E:0.094338):0.064142):0.067562,D:0.295612)"; -#my $expression = "(A:1,(B:1,((C:2,F:4):1,E:1):2.02):1,D:2)"; -#my $expression = "((A:1, D:2):1, (B:1, C:2, E:3):2)"; -#my $expression = "((A:0.89, D:1.2):1.4, (B:1, C:1.1, E:0.9):1)"; -#my $expression = "(C:1, D:3, (A:5, B:2): 1)"; -#my $expression = "(A:3, ((B:1, C:2):1.5):1)"; -$tree = CXGN::Phylo::Parse_newick->new($expression)->parse(); +$newick_expression = "(A:0.082376,(B:0.196674,((C:0.038209,F:0.354293):0.026742,E:0.094338):0.064142):0.067562,D:0.295612)"; +#my $newick_expression = "(A:1,(B:1,((C:2,F:4):1,E:1):2.02):1,D:2)"; +#my $newick_expression = "((A:1, D:2):1, (B:1, C:2, E:3):2)"; +#my $newick_expression = "((A:0.89, D:1.2):1.4, (B:1, C:1.1, E:0.9):1)"; +#my $newick_expression = "(C:1, D:3, (A:5, B:2): 1)"; +#my $newick_expression = "(A:3, ((B:1, C:2):1.5):1)"; +$tree = CXGN::Phylo::Parse_newick->new($newick_expression)->parse(); ok($tree->test_tree(), "tree test 1"); $tree->get_root()->recursive_collapse_single_nodes(); ok($tree->test_tree(), "tree test 2"); @@ -312,7 +326,7 @@ $tree->reset_root_to_point_on_branch($tree->min_leaf_dist_variance_point()); $tree->get_root()->recursive_implicit_names(); -$tree->get_root()->print_subtree("\n"); +# $tree->get_root()->print_subtree("\n"); # readline stdin; #$tree->reset_root_min_max_root_leaf_distance(); @@ -321,7 +335,7 @@ $tree->get_root()->print_subtree("\n"); ##exit(); my $total_branch_length = subtree_branch_length($tree->get_root()); -$new_tree = $tree->copy(); + $new_tree = $tree->copy(); my ($new_root, $da) = $new_tree->min_leaf_dist_variance_point(); #exit; @@ -343,13 +357,12 @@ for (my $i = 0; $i < @node_list; $i++) { my @new_node_list = $new_tree->get_root()->recursive_subtree_node_list(); my $n = $new_node_list[$i]; - my $small = 0.0; - my $dab = ($small +(1.0 - $small)*rand())*$n->get_branch_length(); #random point on ith branch + my $small = 0.000001; + my $dab = ($small +(1.0 - 2*$small)*rand())*$n->get_branch_length(); #random point on ith branch $new_tree->reset_root_to_point_on_branch($n, $dab); $count_treetesta_ok += $new_tree->test_tree(); - $count_compare_rooted1 += $comp_rooted1 = $tree->compare_rooted($new_tree); # compare_rooted should be true only for $n a child of $new_tree's root. $count_compare_unrooted1 += $comp_unrooted1 = $tree->compare_unrooted($new_tree); # compare_unrooted should be true @@ -377,23 +390,158 @@ for (my $i = 0; $i < @node_list; $i++) { # exit; } - $blc = abs($total_branch_length - subtree_branch_length($new_tree->get_root())); + my $subtree_bl = subtree_branch_length($new_tree->get_root()); + $blc = abs($total_branch_length - $subtree_bl); if ($blc > $max_branch_length_change) { $max_branch_length_change = $blc; +# print STDERR "tbl, stbl: $total_branch_length, $subtree_bl \n"; } } -ok($max_branch_length_change < 5.0e-15*$total_branch_length, "Test that resetting root leaves total branch length unchanged. \n"); -print($count_compare_rooted1, " ", $count_compare_unrooted1, " ", $count_compare_rooted2, " ", $count_compare_unrooted2, "\n"); -is($count_treetesta_ok, @node_list, "tree_test ok on trees rooted at random points.\n"); -is($count_treetestb_ok, @node_list, "tree_test ok on trees rooted at min variance point.\n"); -is($count_compare_rooted1, scalar $tree->get_root()->get_children(), "tree reset_root and compare test 1\n"); -is($count_compare_unrooted1, @node_list, "tree reset_root and compare test 2\n"); -is($count_compare_rooted2, @node_list, "tree reset_root and compare test 3\n"); -is($count_compare_unrooted2, @node_list, "tree reset_root and compare test 4\n"); +ok($max_branch_length_change < 5.0e-15*$total_branch_length, "Test that resetting root leaves total branch length unchanged."); +# print($count_compare_rooted1, " ", $count_compare_unrooted1, " ", $count_compare_rooted2, " ", $count_compare_unrooted2, ".\n"); +is($count_treetesta_ok, @node_list, "tree_test ok on trees rooted at random points."); +is($count_treetestb_ok, @node_list, "tree_test ok on trees rooted at min variance point."); +is($count_compare_rooted1, scalar $tree->get_root()->get_children(), "tree reset_root and compare test 1."); +is($count_compare_unrooted1, @node_list, "tree reset_root and compare test 2."); +is($count_compare_rooted2, @node_list, "tree reset_root and compare test 3."); +is($count_compare_unrooted2, @node_list, "tree reset_root and compare test 4."); + +# Test pre- in- post- order traversals. +my $t_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:1):1, C:1):1, D:1):1, E:1)"))->parse(); + +my $preorder_names_by_hand = "node: .\n" . "node: \n" . "node: \n" . "node: \n" . "node: A\n" . "node: B\n" + . "node: C\n" . "node: D\n" . "node: E\n"; +#print STDERR "$preorder_names_by_hand\n"; +$t_tree->{node_names} = undef; +$t_tree->preorder_traversal( \&traverse_test_function ); +my $preorder_names = $t_tree->{'node_names'}; +is($preorder_names, $preorder_names_by_hand, "preorder traversal test."); + + +my $inorder_names_by_hand = "node: A\n" . "node: \n" . "node: B\n" . "node: \n" . "node: C\n" . "node: \n" + . "node: D\n" . "node: .\n" . "node: E\n"; +#print STDERR "$inorder_names_by_hand\n"; +$t_tree->{node_names} = undef; +$t_tree->inorder_traversal( \&traverse_test_function ); +my $inorder_names = $t_tree->{'node_names'}; +is($inorder_names, $inorder_names_by_hand, "inorder traversal test."); + + +$t_tree->{node_names} = undef; +my $postorder_names_by_hand = "node: A\n" . "node: B\n" . "node: \n" . "node: C\n" . "node: \n" . "node: D\n" + . "node: \n" . "node: E\n" . "node: .\n"; +#print STDERR "$postorder_names_by_hand \n"; +$t_tree->postorder_traversal( \&traverse_test_function ); +# print STDERR "after. \n"; + #sub{ my $node = shift; my $str = "node: " . $node->get_name() . "\n"; print STDERR $node->get_name(), "\n"; return $str;} ); +my $postorder_names = $t_tree->{'node_names'}; +is($postorder_names, $postorder_names_by_hand, "postorder traversal test."); + + +# Test the species bit hash using a bigger tree +my $species_tree_newick_expression = "( chlamydomonas[species=Chlamydomonas_reinhardtii]:1, ( physcomitrella[species=Physcomitrella_patens]:1, ( selaginella[species=Selaginella_moellendorffii]:1, ( loblolly_pine[species=Pinus_taeda]:1, ( amborella[species=Amborella_trichopoda]:1, ( date_palm[species=Phoenix_dactylifera]:1, ( ( foxtail_millet[species=Setaria_italica]:1, ( sorghum[species=Sorghum_bicolor]:1, maize[species=Zea_mays]:1 ):1 ):1, ( rice[species=Oryza_sativa]:1, ( brachypodium[species=Brachypodium_distachyon]:1, ( (wheat[species=Triticum_aestivum]:1, wheat_x[species=Triticum_aestivum_x]:1):1, barley[species=Hordeum_vulgare]:1 ):1 ):1 ):1 ):1):1):1):1):1):1)"; +$species_tree = CXGN::Phylo::Parse_newick -> new($species_tree_newick_expression)->parse(); + +my $gene_tree_newick_expression = "( chlamydomonas[species=Chlamydomonas_reinhardtii]:1, ( physcomitrella[species=Physcomitrella_patens]:1, ( selaginella[species=Selaginella_moellendorffii]:1, ( loblolly_pine[species=Pinus_taeda]:1, ( amborella[species=Amborella_trichopoda]:1, ( date_palm[species=Phoenix_dactylifera]:1, ( ( foxtail_millet[species=Setaria_italica]:1, ( sorghum[species=Sorghum_bicolor]:1, maize[species=Zea_mays]:1 ):1 ):1, ( rice[species=Oryza_sativa]:1, ( brachypodium[species=Brachypodium_distachyon]:1, ( wheat[species=Triticum_aestivum]:1, barley[species=Hordeum_vulgare]:1 ):1 ):1 ):1 ):1):1):1):1):1):1)"; +my $gene_tree = CXGN::Phylo::Parse_newick -> new($gene_tree_newick_expression)->parse(); + +$gene_tree->show_newick_attribute('species'); +my $nwck = $gene_tree->generate_newick(); #print $nwck, "\n"; + +my $spec_bithash = $gene_tree->get_species_bithash($species_tree); +my $spec_bithash_got = ''; + foreach (sort keys %$spec_bithash){ + $spec_bithash_got .= $_ . " " . $spec_bithash->{$_} . " \n"; +} +#print $spec_bithash_got, "\n"; + +my $spec_bithash_expected = +"Amborella_trichopoda 1 \n" . +"Brachypodium_distachyon 2 \n" . +"Chlamydomonas_reinhardtii 4 \n" . +"Hordeum_vulgare 8 \n" . +"Oryza_sativa 16 \n" . +"Phoenix_dactylifera 32 \n" . +"Physcomitrella_patens 64 \n" . +"Pinus_taeda 128 \n" . +"Selaginella_moellendorffii 256 \n" . +"Setaria_italica 512 \n" . +"Sorghum_bicolor 1024 \n" . +"Triticum_aestivum 2048 \n" . +#"Triticum_aestivum_x 4096 \n" . +"Zea_mays 4096 \n"; + +is($spec_bithash_got, $spec_bithash_expected, "Species bithash test 1."); + + +$gene_tree_newick_expression = "( chlamydomonas[species=Chlamydomonas_reinhardtii]:1, ( physcomitrella[species=Physcomitrella_patens]:1, ( selaginella_x[species=Selaginella_moellendorffii_x]:1, ( loblolly_pine[species=Pinus_taeda]:1, ( amborella[species=Amborella_trichopoda]:1, ( date_palm[species=Phoenix_dactylifera]:1, ( ( foxtail_millet[species=Setaria_italica]:1, ( sorghum[species=Sorghum_bicolor]:1, maize[species=Zea_mays]:1 ):1 ):1, ( rice[species=Oryza_sativa]:1, ( brachypodium[species=Brachypodium_distachyon]:1, ( wheat[species=Triticum_aestivum]:1, barley[species=Hordeum_vulgare]:1 ):1 ):1 ):1 ):1):1):1):1):1):1)"; +$gene_tree = CXGN::Phylo::Parse_newick -> new($gene_tree_newick_expression)->parse(); + +$gene_tree->show_newick_attribute('species'); +$nwck = $gene_tree->generate_newick(); #print $nwck, "\n"; + +$spec_bithash = $gene_tree->get_species_bithash($species_tree); +$spec_bithash_got = ''; + foreach (sort keys %$spec_bithash){ + $spec_bithash_got .= $_ . " " . $spec_bithash->{$_} . " \n"; +} +#print $spec_bithash_got, "\n"; + +$spec_bithash_expected = +"Amborella_trichopoda 1 \n" . +"Brachypodium_distachyon 2 \n" . +"Chlamydomonas_reinhardtii 4 \n" . +"Hordeum_vulgare 8 \n" . +"Oryza_sativa 16 \n" . +"Phoenix_dactylifera 32 \n" . +"Physcomitrella_patens 64 \n" . +"Pinus_taeda 128 \n" . +#"Selaginella_moellendorffii 256 \n" . +"Setaria_italica 256 \n" . +"Sorghum_bicolor 512 \n" . +"Triticum_aestivum 1024 \n" . +#"Triticum_aestivum_x 2048 \n" . +"Zea_mays 2048 \n"; + +is($spec_bithash_got, $spec_bithash_expected, "Species bithash test 2."); + + +$gene_tree_newick_expression = "( chlamydomonas[species=Chlamydomonas_reinhardtii]:1, ( physcomitrella[species=Physcomitrella_patens]:1, ( selaginella_x[species=Selaginella_moellendorffii_x]:1, ( loblolly_pine[species=Pinus_taeda]:1, ( amborella[species=Amborella_trichopoda]:1, ( date_palm_x[species=Phoenix_dactylifera_x]:1, ( ( foxtail_millet[species=Setaria_italica]:1, ( sorghum[species=Sorghum_bicolor]:1, maize[species=Zea_mays]:1 ):1 ):1, ( rice[species=Oryza_sativa]:1, ( brachypodium_x[species=Brachypodium_distachyon_x]:1, ( wheat[species=Triticum_aestivum]:1, barley[species=Hordeum_vulgare]:1 ):1 ):1 ):1 ):1):1):1):1):1):1)"; +$gene_tree = CXGN::Phylo::Parse_newick -> new($gene_tree_newick_expression)->parse(); + +$gene_tree->show_newick_attribute('species'); +$nwck = $gene_tree->generate_newick(); #print $nwck, "\n"; + +$spec_bithash = $gene_tree->get_species_bithash($species_tree); +$spec_bithash_got = ''; + foreach (sort keys %$spec_bithash){ + $spec_bithash_got .= $_ . " " . $spec_bithash->{$_} . " \n"; +} +#print $spec_bithash_got, "\n"; + +$spec_bithash_expected = +"Amborella_trichopoda 1 \n" . +#"Brachypodium_distachyon 2 \n" . +"Chlamydomonas_reinhardtii 2 \n" . +"Hordeum_vulgare 4 \n" . +"Oryza_sativa 8 \n" . +#"Phoenix_dactylifera 32 \n" . +"Physcomitrella_patens 16 \n" . +"Pinus_taeda 32 \n" . +#"Selaginella_moellendorffii 256 \n" . +"Setaria_italica 64 \n" . +"Sorghum_bicolor 128 \n" . +"Triticum_aestivum 256 \n" . +#"Triticum_aestivum_x 2048 \n" . +"Zea_mays 512 \n"; + +is($spec_bithash_got, $spec_bithash_expected, "Species bithash test 3."); + +exit; sub subtree_branch_length{ - my $self = shift; # node + my $self = shift; # node my @node_list = $self->recursive_subtree_node_list(); my $total_branch_length = 0.0; foreach my $n (@node_list) { @@ -403,3 +551,14 @@ sub subtree_branch_length{ } +sub traverse_test_function{ +my $node = shift; +my $tree = $node->get_tree(); +#my $new_node_names = (defined $tree->{'node_names'})? $tree->{'node_names'}: ''; +#$new_node_names .= "node: [" . $node->get_name() . "]\n"; print STDERR "node: [", $node->get_name(), "]\n"; +#$tree->{'node_names'} = $new_node_names; +$tree->{'node_names'} .= "node: " . $node->get_name() . "\n"; +} + + + diff --git a/t/CXGN/Phylo/phylo_test.t b/t/CXGN/Phylo/phylo_test.t deleted file mode 100755 index b270f376..00000000 --- a/t/CXGN/Phylo/phylo_test.t +++ /dev/null @@ -1,452 +0,0 @@ -#!/usr/bin/perl -use strict; -use Test::More qw/no_plan/; - -use CXGN::Phylo::Tree; -use CXGN::Phylo::Node; -use CXGN::Phylo::Parser; -use Data::Dumper; - -use Carp; - -# expression to test the Phylo packages with -# -my $newick_expression = - "(1:0.082376,(2:0.196674,((3:0.038209,6:0.354293):0.026742,5:0.094338):0.064142):0.067562,4:0.295612)"; -my $parser = CXGN::Phylo::Parse_newick -> new($newick_expression); - -# test tokenizer -# -my @tokens = $parser -> tokenize($newick_expression); -print STDERR "\tTOKENS: ".join("|", @tokens)."\n"; -is (@tokens, 22, "Token count test"); - -my $tree = $parser-> parse(); - -#print STDERR Dumper($tree); - -#print STDERR "Total Nodes: ".(keys(%{$tree->{node_hash}}))."\n"; -#is (keys(%{$tree->{node_hash}}), 10, "node count test"); - -# check the number of nodes returned by get_all_nodes -# -is ($tree->get_all_nodes(), 10, "node count test [node_hash]"); - -# pick an element and verify if it is a CXGN::Phylo::Node object -# -is ( UNIVERSAL::isa(($tree->get_all_nodes())[4], "CXGN::Phylo::Node"), 1, "node id test"); - -my $n = $tree->get_node(5); -#print STDERR "NODE 5: ".$n->get_name()."\n"; -#$n->set_hilited(1); -#print STDERR "Set node 5 to hilited ".$n->get_hilited()."\n"; -#$n->rotate_node(); -#$tree->get_root()->rotate_node(); -#$n4->set_hidden(1); - -#my $subtree_len = $tree->get_root()->calculate_subtree_distances(); -#is ($subtree_len, 12, "subtree length test"); - -# test the leaf functions in two different ways -# -is ($tree->get_leaf_count(), 6, "leaf count test"); - -my @leaflist = $tree->get_leaf_list(); -# foreach my $leaf (@leaflist) { -# print STDERR "Leaf: ".$leaf->get_name()."\n"; -# } - -is ($tree->get_root()->is_leaf, 0, "root leaf test"); - -# foreach my $l (@leaflist) { print STDERR "LEAFLIST: ". ($l->get_name())."\n"; } - -is (@leaflist, 6, "leaf list test"); - -#$tree->preorder_traversal(sub {my $node = shift; print STDERR "Preorder. node name: ", $node->get_name(), " ", $node->get_branch_length(), "\n";}); -#print STDERR "\n"; - -#$tree->postorder_traversal(sub {my $node = shift; print STDERR "Postorder. node name: ", $node->get_name(), " ", $node->get_branch_length(), "\n";}); - -#exit; - -# test the root -# -my $root = $tree->get_root(); -is ($root->is_root(), 1, "root test"); - -# test the subtree node count functions -# -#my @ortho_groups = $tree->get_orthologs(); -$tree->get_root()->calculate_subtree_node_count(); -my $root_subnode_count = $tree->get_root()->get_subtree_node_count(); -my $leaf_subnode_count = $tree->get_node(7)->get_subtree_node_count(); -my $inner_node_subnode_count = $tree->get_node(3)->get_subtree_node_count(); -is ($root_subnode_count, 9, "root subnode count test"); -is ($leaf_subnode_count, 0, "leaf subnode count test"); -is ($inner_node_subnode_count, 6, "inner node subnode count test"); - -# set species information to test subtree_species_count stuff -# -my @species_list = ("coffee", "tomato", "potato", "pepper", "eggplant", "brachypodium"); -my @node_list = values %{$tree->{node_hash}}; -my $i = 0; -foreach my $n (@node_list) { - next if(scalar $n->get_children() > 0); # skip non-leaves - $n->set_species($species_list[$i % 6]); - print "i, species: $i ", $n->get_species(), "\n"; - $i++; -} - -# test the subtree_species count functions - -#$tree->get_root()->recursive_text_render(); -#exit;# -$tree->get_root()->recursive_set_leaf_species_count(); -#$tree->get_root()->calculate_subtree_species_count(); - -# pick out a node and test the count -# -print "node keys: ", join(" ", keys %{$tree->{node_hash}}), "\n"; -is($tree->get_root()->get_attribute("leaf_species_count"), 6, "subtree leaf species count test"); -#is ($tree->get_node(5)->get_attribute("leaf_species_count"), 3, "subtree leaf species count test"); - -# test the remove_child function -# -print STDERR 'before $tree->copy() ', "\n"; -my $rm_tree = $tree->copy(); -print STDERR 'after $tree->copy() \n'; -my @root_children = $rm_tree->get_root()->get_children(); -my $n = $root_children[1]; -my @children =$n->get_children(); -print STDERR "\tRemove child\nbefore: ".$n->to_string()."\n"; -is ($n->get_children, 2, "get_children test"); -print STDERR "\t(Removing child ".$children[0]->get_node_key().")\n"; -$n->remove_child($children[0]); -is ($n->get_children(), 1, "remove child test"); -print STDERR "\tafter : ".$n->to_string()."\n"; - -my @root_kids = $rm_tree->get_root()->get_children(); -is (@root_kids, 3, "root children count test"); -$rm_tree->get_root()->remove_child($root_kids[1]); -print STDERR "\tRemoving child key=".($root_kids[1]->get_name())."\n"; -#foreach my $c ($rm_tree->get_root()->get_children()) { print "current children = ".$c->get_name()."\n"; } -is ($rm_tree->get_root()->get_children(), 2, "removed one root child test"); - -# test reset_root -# - -#my $nn = $tree->get_node(5); -#$tree->reset_root($nn); - - - - -# test the compare function -# initialize two identical trees and compare -# (should return 1) -# -my $species_tree_newick = "((((( tomato_tomato:1, potato_potato:1):1, pepper_pepper:1 ):1, eggplant_eggplant):1, nicotiana_nicotiana:1):1, coffee_coffee:1)"; -my $species_tree_parser = CXGN::Phylo::Parse_newick->new($species_tree_newick); -my $species_tree = $species_tree_parser->parse(); - -my $species_tree_newick2 = "((((( tomato_tomato:5, potato_potato:1):1, pepper_pepper:1 ):1, eggplant_eggplant):1, nicotiana_nicotiana:1):1, coffee_coffee:1)"; -my $species_tree_parser2 = CXGN::Phylo::Parse_newick->new($species_tree_newick2); -my $species_tree2 = $species_tree_parser2->parse(); - -# compare the tree to itself -# -is ($species_tree->compare_rooted($species_tree), 1, "tree self comparison test (tree1-tree1)"); -is ($species_tree->compare_rooted($species_tree2), 1, "tree comparison test (tree1-tree2)"); -is ($species_tree2->compare_rooted($species_tree2), 1, "tree self comparison test (tree2-tree2)"); -is ($species_tree2->compare_rooted($species_tree), 1, "tree comparion test (tree2-tree1)"); - -# test that a different tree returns 0 -# -is($species_tree->compare_rooted($tree), 0, "tree inequality test"); - -# test that two topologically identical but specified differently match in the comparison -# -my $tree_a = CXGN::Phylo::Parse_newick->new("(A:1, B:1)")->parse(); -my $tree_b = CXGN::Phylo::Parse_newick->new("(B:1, A:1)")->parse(); -is ($tree_a->compare_rooted($tree_b), 1, "tree topology specification test"); - - -# test the copy function -# -my $new_tree = $tree->copy(); -if ($tree->compare_rooted($new_tree)) { # should be the same, shouldn't it? - print STDERR "Compared tree to newtree and found them to be identical.\n"; -} -else { print STDERR "newtree and tree are not identical. Oops.\n"; } -is ($new_tree->compare_rooted($tree), 1, "copied tree identity check"); -isnt ( $new_tree, $tree, "tree pointer non-identity check"); - -my ($rfd, $symd, $d3) = $tree->RF_distance($new_tree); -is($rfd, 0, "check RF distance between tree and copy is 0.\n"); -is($symd, 0, "check RF distance between tree and copy is 0.\n"); - -# check if I can remove a node in new_tree without affecting $tree -# -#print "node keys: ", join(" ", keys %{$tree->{node_hash}}), "\n"; -$new_tree->delete_node(3); - - -# print $tree->generate_newick(), "\n"; -# print $new_tree->generate_newick(), "\n"; - -is($new_tree->compare_rooted($tree), 0, "changed copied tree identity check"); - -# test the collapsing function - test a tree with many nodes that -# have only one child. -# -print STDERR "\tTesting CXGN::Phylo::Node::recursive_collapse_nodes\n"; -my $c_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:1)C:1)D:1)E:1)"))->parse(); - -$c_tree->set_renderer(CXGN::Phylo::Text_tree_renderer->new($c_tree)); - -#print STDERR "The original tree: \n"; -#$c_tree->render(); -#print STDERR "=====\n\n"; - -is ($c_tree->get_all_nodes(), 6, "node count before collapse"); - -$c_tree->collapse_tree(); -#print STDERR "The collapsed tree:\n"; -#$c_tree->render(); -#print STDERR "=====\n\n"; - -is ($c_tree->get_all_nodes(), 3, "node count after collapse"); - -#$new_tree->set_renderer(CXGN::Phylo::Text_tree_renderer->new($new_tree)); -#$new_tree->render(); - -#if(1 || $c_tree->get_all_nodes() != 3){ -#$c_tree->print_node_keys(); -#$c_tree->get_root()->print_subtree(); -#}exit; - -# test a more complex case for collapsing -# -$c_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:1)C:1)D:1)E:1, (((G:1, F:1)H:1)I:1)J:1)"))->parse(); -$c_tree->set_renderer(CXGN::Phylo::Text_tree_renderer->new($c_tree)); -$c_tree->collapse_tree(); - -# test a tree collapsing with a tree that has branch lengths of zero. -# -my $z_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:0)C:0)D:0)E:1, (((G:1, F:1)H:0)I:1)J:1)"))->parse(); -print STDERR "Testing the recursive_collapse_zero_branches() function...\nOriginal tree:\n"; -$z_tree->get_root()->print_subtree(); - -my $z_tree_node_count = $z_tree->get_node_count(); -$z_tree ->get_root()->recursive_collapse_zero_branches(); - -is ($z_tree->get_node_count(), $z_tree_node_count-4, "recursive_collapse_zero_nodes test"); - -# check the delete node function -# first, check if we can delete an internal node... -# -print STDERR "\tDeleting internal node (key=4)...\n"; -my $ind_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:1)C:1)D:1)E:1, (((G:1, F:1)H:1)I:1)J:1)"))->parse(); -my $ind_tree_copy = $ind_tree->copy(); -$ind_tree->delete_node(4); -is ($ind_tree_copy->get_all_nodes(), ($ind_tree->get_all_nodes()+1), "node count after delete test"); -is ($ind_tree->get_node(4), undef, "has node really disappeared test"); - -# let's delete a leaf node... -# -print STDERR "\tDeleting a leaf node (key=2)...\n"; -$ind_tree->delete_node(2); -#$ind_tree->render(); -is ($ind_tree_copy->get_all_nodes(), ($ind_tree->get_all_nodes()+2), "node count after leaf node deletion"); - -# test the newick generation from the node -# -my $original_newick = "((((A:1,B:1)C:1)D:1)E:1,(((G:1,F:1)H:1)I:1)J:1)"; -my $t = (CXGN::Phylo::Parse_newick->new($original_newick))->parse(); -my $new = $t->get_root()->recursive_generate_newick(); -# print STDERR "Original: $original_newick\n"; -# print STDERR "Regenerated newick = $new\n"; -my $t2 = (CXGN::Phylo::Parse_newick->new($new))->parse(); -is($t->compare_rooted($t2), 1, "Newick regeneration from tree test"); - - -my $incorp_tree = $tree->copy(); -$incorp_tree->incorporate_nodes(CXGN::Phylo::Node->new()); -is($incorp_tree->get_all_nodes(), 11, "Incorporate Node Test"); - -my $b_tree = $tree->copy(); -is($b_tree->get_all_nodes(), 10, "Binary tree test: copy"); -$b_tree->make_binary(); -my @nodes = $b_tree->get_all_nodes(); -is(@nodes, 11, "Binary tree test: node count"); -my $binary_fail = 0; -foreach(@nodes){ - my @children = $_->get_children(); - $binary_fail = 1 if @children > 2; -} -isnt($binary_fail, 1, "Binary tree test: all children count <= 2"); - -# render the tree -# -# $tree->get_layout()->set_image_width(500); -# $tree->get_layout()->set_image_height(300); - -# my $PNG_tree_renderer = CXGN::Phylo::PNG_tree_renderer -> new($tree); -# $tree->get_layout()->set_left_margin(50); -# $tree->get_layout()->set_right_margin(40); -# $PNG_tree_renderer->render(); - -# my $renderer = CXGN::Phylo::Text_tree_renderer -> new($tree); -# $renderer->render(); - - -# test tree root resetting, and tree comparison -# -# Get tree from newick expression. Reset root so as to minimize maximum root-leaf distance. -# copy a tree, then, for each branch, reset the root to a point along the branch, -# compare to original tree in both rooted and unrooted senses, -# unrooted comparison should give 1, rooted 0, except for branches to orig. root. -# Then reset root again so as to minimize max distance to leaves from root, and should -# recover original tree. Check that rooted and unrooted compares both give 1. - -# $tree = CXGN::Phylo::Parse_newick->new("(A:1, (B:1, C:1):1)")->parse(); -my $newick_expression = "(A:0.082376,(B:0.196674,((C:0.038209,F:0.354293):0.026742,E:0.094338):0.064142):0.067562,D:0.295612)"; -#my $newick_expression = "(A:1,(B:1,((C:2,F:4):1,E:1):2.02):1,D:2)"; -#my $newick_expression = "((A:1, D:2):1, (B:1, C:2, E:3):2)"; -#my $newick_expression = "((A:0.89, D:1.2):1.4, (B:1, C:1.1, E:0.9):1)"; -#my $newick_expression = "(C:1, D:3, (A:5, B:2): 1)"; -#my $newick_expression = "(A:3, ((B:1, C:2):1.5):1)"; -$tree = CXGN::Phylo::Parse_newick->new($newick_expression)->parse(); -ok($tree->test_tree(), "tree test 1"); -$tree->get_root()->recursive_collapse_single_nodes(); -ok($tree->test_tree(), "tree test 2"); - - -#my ($mldv_node, $mldv_dist_above, $min_var) = $tree->min_leaf_dist_variance_point(); -#$tree->reset_root_to_point_on_branch($mldv_node, $mldv_dist_above); - -$tree->reset_root_to_point_on_branch($tree->min_leaf_dist_variance_point()); -# print("tree initially rerooted at min variance point, (i.e. before loop): \n"); - - -$tree->get_root()->recursive_implicit_names(); -# $tree->get_root()->print_subtree("\n"); -# readline stdin; -#$tree->reset_root_min_max_root_leaf_distance(); - -#my ($anode, $adist, $avar) = $tree->min_leaf_dist_variance_point(); -#print("opt node name, dist above, variance, stddev: ", $anode->get_name(), " ", $adist, " ", $avar, " ", sqrt($avar), "\n"); -##exit(); - -my $total_branch_length = subtree_branch_length($tree->get_root()); -my $new_tree = $tree->copy(); -my ($new_root, $da) = $new_tree->min_leaf_dist_variance_point(); -#exit; - -my $count_compare_rooted1 = 0; -my $count_compare_unrooted1 = 0; -my $count_compare_rooted2 = 0; -my $count_compare_unrooted2 = 0; -my $count_treetesta_ok = 0; -my $count_treetestb_ok = 0; -my @node_list = $tree->get_root()->recursive_subtree_node_list(); - -my $max_branch_length_change = -1.0; -my $blc; -my ($comp_rooted1, $comp_unrooted1, $comp_rooted2, $comp_unrooted2) = (-1, -1, -1, -1); - -srand(132456); -for (my $i = 0; $i < @node_list; $i++) { - my $new_tree = $tree->copy(); - - my @new_node_list = $new_tree->get_root()->recursive_subtree_node_list(); - my $n = $new_node_list[$i]; - my $small = 0.0; - my $dab = ($small +(1.0 - $small)*rand())*$n->get_branch_length(); #random point on ith branch - - $new_tree->reset_root_to_point_on_branch($n, $dab); -my $testres = $new_tree->test_tree(); -# print "testres: $testres \n"; - $count_treetesta_ok += $testres; -# print "count_treetesta_ok: $count_treetesta_ok \n"; - - $count_compare_rooted1 += $comp_rooted1 = $tree->compare_rooted($new_tree); # compare_rooted should be true only for $n a child of $new_tree's root. - $count_compare_unrooted1 += $comp_unrooted1 = $tree->compare_unrooted($new_tree); # compare_unrooted should be true - -#put in some RF distance tests here. - - $blc = abs($total_branch_length - subtree_branch_length($new_tree->get_root())); - if ($blc > $max_branch_length_change) { - $max_branch_length_change = $blc; - } - - # my ($new_root, $da, $var) = $new_tree->min_leaf_dist_variance_point(); - # $new_tree->reset_root_to_point_on_branch($new_root, $da); - - $new_tree->reset_root_to_point_on_branch($new_tree->min_leaf_dist_variance_point()); - $count_treetestb_ok += $new_tree->test_tree(); - - $count_compare_rooted2 += $comp_rooted2 = $tree->compare_rooted($new_tree); - $count_compare_unrooted2 += $comp_unrooted2 = $tree->compare_unrooted($new_tree); - - if (!$comp_rooted2 || !$comp_unrooted2) { - print("tree : \n"); - $tree->get_root()->print_subtree("\n"); - print("new_tree: \n"); - $new_tree->get_root()->print_subtree("\n"); - # exit; - } - - my $subtree_bl = subtree_branch_length($new_tree->get_root()); - $blc = abs($total_branch_length - $subtree_bl); - if ($blc > $max_branch_length_change) { - $max_branch_length_change = $blc; - print STDERR "tbl, stbl: $total_branch_length, $subtree_bl \n"; - } -} -ok($max_branch_length_change < 5.0e-15*$total_branch_length, "Test that resetting root leaves total branch length unchanged. \n"); -print($count_compare_rooted1, " ", $count_compare_unrooted1, " ", $count_compare_rooted2, " ", $count_compare_unrooted2, "\n"); -is($count_treetesta_ok, @node_list, "tree_test ok on trees rooted at random points.\n"); -is($count_treetestb_ok, @node_list, "tree_test ok on trees rooted at min variance point.\n"); -is($count_compare_rooted1, scalar $tree->get_root()->get_children(), "tree reset_root and compare test 1\n"); -is($count_compare_unrooted1, @node_list, "tree reset_root and compare test 2\n"); -is($count_compare_rooted2, @node_list, "tree reset_root and compare test 3\n"); -is($count_compare_unrooted2, @node_list, "tree reset_root and compare test 4\n"); - -# Test pre- in- post- order traversals. -my $t_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:1):1, C:1):1, D:1):1, E:1)"))->parse(); -my $preorder_names_by_hand = "node: .\n" . "node: \n" . "node: \n" . "node: \n" . "node: A\n" . "node: B\n" - . "node: C\n" . "node: D\n" . "node: E\n"; -#our $preorder_names = ""; -my $preorder_names = $t_tree->preorder_traversal( sub{ my $str = "node: " . shift->get_name() . "\n"; return $str;} ); -is($preorder_names, $preorder_names_by_hand, "preorder traversal test.\n"); -#print STDERR "preorder_names: \n", $preorder_names, "\n\n", $preorder_names_by_hand. "\n\n"; - -my $inorder_names_by_hand = "node: A\n" . "node: \n" . "node: B\n" . "node: \n" . "node: C\n" . "node: \n" - . "node: D\n" . "node: .\n" . "node: E\n"; -my $inorder_names = $t_tree->inorder_traversal( sub{ my $str = "node: " . shift->get_name() . "\n"; return $str; } ); -is($inorder_names, $inorder_names_by_hand, "inorder traversal test.\n"); -#print STDERR "inorder_names: \n", $inorder_names, "\n\n", $inorder_names_by_hand. "\n\n"; - -my $postorder_names_by_hand = "node: A\n" . "node: B\n" . "node: \n" . "node: C\n" . "node: \n" . "node: D\n" - . "node: \n" . "node: E\n" . "node: .\n"; -my $postorder_names = $t_tree->postorder_traversal( sub{ my $str = "node: " . shift->get_name() . "\n"; return $str; } ); -is($postorder_names, $postorder_names_by_hand, "postorder traversal test.\n"); -#print STDERR "postorder_names: \n", $postorder_names, "\n\n", $postorder_names_by_hand. "\n\n"; - - -sub subtree_branch_length{ - my $self = shift; # node - my @node_list = $self->recursive_subtree_node_list(); - my $total_branch_length = 0.0; - foreach my $n (@node_list) { - $total_branch_length += $n->get_branch_length(); - } - return $total_branch_length; -} - - -sub traverse_test_function{ - my $node = shift; - my $str = "node: " . $node->get_name() . "\n"; return $str; -}