diff --git a/docs/src/man/converting_coal2generation_units.md b/docs/src/man/converting_coal2generation_units.md index 5eb8247..cad6bf7 100644 --- a/docs/src/man/converting_coal2generation_units.md +++ b/docs/src/man/converting_coal2generation_units.md @@ -42,7 +42,7 @@ Ne writenewick(tree, round=true) # lengths in coalescent units: before unit conversion # convert edge lengths in gene tree from coalescent units to # generations for e in tree.edge - e.length = round(e.length * Ne[e.inte1]) # round: to get integers + e.length = round(e.length * Ne[population_mappedto(e)]) # round: to get integers end writenewick(tree, round=true) # lengths in # of generations ``` @@ -112,7 +112,7 @@ convenience wrapper function that takes **Nₑ as an extra input** to: ```@repl converting genetree_gen = simulatecoalescent(net_gen,3,1, Ne; nodemapping=true); -writeMultiTopology(genetree_gen, stdout) # 3 gene trees, lengths in #generations +writemultinewick(genetree_gen, stdout) # 3 gene trees, lengths in #generations ``` !!! warning diff --git a/docs/src/man/correlated_inheritance.md b/docs/src/man/correlated_inheritance.md index 08847ae..9e5dd33 100644 --- a/docs/src/man/correlated_inheritance.md +++ b/docs/src/man/correlated_inheritance.md @@ -83,7 +83,7 @@ gt3 = simulatecoalescent(net, 1, 6; inheritancecorrelation=0.99, nodemapping=tru using DataFrames speciespath(phy) = DataFrame( number = [e.number for e in phy.edge], - label = [e.inte1 for e in phy.edge] + label = [population_mappedto(e) for e in phy.edge] ) el1 = speciespath(gt1); el2 = speciespath(gt2); el3 = speciespath(gt3); diff --git a/docs/src/man/getting_started.md b/docs/src/man/getting_started.md index afae719..53ddefc 100644 --- a/docs/src/man/getting_started.md +++ b/docs/src/man/getting_started.md @@ -84,8 +84,8 @@ We can work with these gene trees within Julia with downstream code, and/or we can save them to a file: ```@repl getting_started -writeMultiTopology(trees, stdout) # write them to standout output (screen here) -writeMultiTopology(trees, "genetrees.phy") # warning: will overwrite "genetrees.phy" if this file existed +writemultinewick(trees, stdout) # write them to standout output (screen here) +writemultinewick(trees, "genetrees.phy") # warning: will overwrite "genetrees.phy" if this file existed rm("genetrees.phy") # hide ``` @@ -105,7 +105,7 @@ for i in 1:2 gt = trees[i] plot(gt, tipoffset=0.1, edgelabel=DataFrame(number = [e.number for e in gt.edge], - label = [e.inte1 for e in gt.edge])); + label = [population_mappedto(e) for e in gt.edge])); R"mtext"("gene $i", line=-1) # hide end R"mtext"("numbers: network edge each gene lineage maps to, at time of coalescence.\n8 = number of edge above the network root", side=1, line=-1, outer=true); # hide @@ -135,5 +135,5 @@ We can set 0 individuals within a species to simulate missing data. ```@repl getting_started genetrees = simulatecoalescent(net, 3, Dict("A"=>2, "B"=>1, "C"=>0)); -writeMultiTopology(genetrees, stdout) +writemultinewick(genetrees, stdout) ``` diff --git a/docs/src/man/mapping_genetree_to_network.md b/docs/src/man/mapping_genetree_to_network.md index 079e875..81fbedd 100644 --- a/docs/src/man/mapping_genetree_to_network.md +++ b/docs/src/man/mapping_genetree_to_network.md @@ -55,10 +55,10 @@ plot(net, showedgenumber=true, shownodelabel=true, tipoffset=0.1); R"mtext"("species network", side=3, line=-1); # hide R"mtext"("grey: population edge number", side=1, line=-1, cex=0.9); # hide plot(tree, edgelabel=DataFrame(number=[e.number for e in tree.edge], - label=[e.inte1 for e in tree.edge]), + label=[population_mappedto(e) for e in tree.edge]), edgelabelcolor="red4", shownodelabel=true, tipoffset=0.1); R"mtext"("gene tree", side=3, line=-1); # hide -R"mtext"("red (edge inte1 value): population a gene edge maps into", side=1, line=-2, cex=0.9); # hide +R"mtext"("red: population a gene edge maps into (grey in net)", side=1, line=-2, cex=0.9); # hide R"mtext"("black (node names): speciation/reticulation a node maps to", side=1, line=-1, cex=0.9); # hide R"dev.off()" # hide nothing # hide @@ -84,9 +84,9 @@ When the `nodemapping` argument is used, the mapping of a gene tree within a spe is fully contained within the newick string of the gene tree. This mapping is encoded with the degree-2 node names as mentioned above. Gene trees from `simulatecoalescent` also store the mapping for species edges -in the `inte1` field of gene tree edges. This information is 'lost' when a gene -tree is written in newick format. However, we can re-encode this information -with `gene_edgemapping!`. +in the `inte1` internal attribute of gene tree edges. +This information is 'lost' when a gene tree is written in newick format. +However, we can re-encode this information with [`gene_edgemapping!`](@ref). We can see an example of re-encoding using the gene tree and network from above. Let's focus, for example, on the edge that the hybrid lineage inherited from, whose child node has name "H1" @@ -101,8 +101,8 @@ Next we write our gene tree and read it back from the newick string, see that the mapping of edges was "lost", and how to recover it using [`gene_edgemapping!`](@ref): ```@repl mapping -tree_newick = writeTopology(tree) -tree = readTopology(tree_newick); # read the tree back from the newick string +tree_newick = writenewick(tree) +tree = readnewick(tree_newick); # read the tree back from the newick string # next: find the edge above H1, as before e_index = findfirst(e -> getchild(e).name == "H1", tree.edge) # may have changed e = tree.edge[e_index] diff --git a/docs/src/man/more_examples.md b/docs/src/man/more_examples.md index 9afb134..bc802e9 100644 --- a/docs/src/man/more_examples.md +++ b/docs/src/man/more_examples.md @@ -85,19 +85,23 @@ but it's best to access it via the function [`population_mappedto`](@ref) From the plot above, the minor "gene flow" edge is edge number 5 and the major hybrid edge has number 3. So we can count the gene lineages inherited via gene flow -as the number of gene tree edges with `inte1` equal to 5. +as the number of gene tree edges with `population_mappedto(edge)` equal to 5. If the gene trees have been saved to a file and later read from this file, -then the `.inte1` attributes are no longer stored in memory. In this case, -we can retrieve the mapping information by the internal node names. -The edges going through gene flow are those whose child node is named "H1" -and parent node is named "i2". - -We use the first option with the `.inte1` attribute below. +then they no longer have the internal attributes that store the population +each edge arose from. +But we can retrieve this mapping information from the internal node names. +For example, the edges going through gene flow are those whose child node is +named "H1" and parent node is named "i2". +To recover the internal edge attributes, use [`gene_edgemapping!`](@ref) as +described in section +"[mapping gene tree edges back into species edges](@ref)". + +We assume below that the internal edge attribute are up-to-date (as when gene +trees were simulated just before). We get that our one simulated gene tree was indeed inherited via gene flow: ```@repl downstreamexamples -sum(e.inte1 == 5 for e in tree.edge) # or: sum(population_mappedto(e) == 5 for e in tree.edge) ``` @@ -157,7 +161,7 @@ Finally, we multiply the length of each gene lineage by the rate of the species edge it maps into: ```@repl downstreamexamples for e in tree.edge - e.length *= networkedge_rate[e.inte1] + e.length *= networkedge_rate[population_mappedto(e)] end writenewick(tree, round=true, digits=4) # after rate variation ``` diff --git a/src/simulatecoalescent_network.jl b/src/simulatecoalescent_network.jl index c65814d..89a13c4 100644 --- a/src/simulatecoalescent_network.jl +++ b/src/simulatecoalescent_network.jl @@ -34,7 +34,8 @@ is carried by the `.name` attribute. Namely: that occurred along a population *edge* in `net`. Its `.intn1` attribute is set to the number of that network population edge. Its parent edge has its `.inte1` attribute also set to the number of - the population edge that it originated from. + the population edge that it originated from. This internal coding could in + future version: retrieve it with [`population_mappedto`](@ref). - The gene tree's root node (of degree 2) represents a coalescent event along the network's root edge. Its `.intn1` attribute is the number assigned to the network's root edge, @@ -343,7 +344,7 @@ julia> using Random; Random.seed!(54321); # for replicability of example below julia> genetrees = simulatecoalescent(net, 2, 1, Ne); -julia> writeMultiTopology(genetrees, stdout) # branch lengths: number of generations +julia> writemultinewick(genetrees, stdout) # branch lengths: number of generations (B:546.0,A:546.0); (B:3155.0,A:3155.0); @@ -382,7 +383,7 @@ function simulatecoalescent( # convert lengths to #generations in gene trees for gt in genetreelist for e in gt.edge - len = e.length * Neff[e.inte1] + len = e.length * Neff[population_mappedto(e)] e.length = (round_generationnumber ? round(len) : len ) end nodemapping || PN.removedegree2nodes!(gt, true) # 'true' option requires PN v0.15.1 diff --git a/test/test_multispeciesnetwork.jl b/test/test_multispeciesnetwork.jl index 452dc84..2cc1569 100644 --- a/test/test_multispeciesnetwork.jl +++ b/test/test_multispeciesnetwork.jl @@ -42,7 +42,7 @@ end function checknodeattributes(genetree) nd2 = 0 # number of degree-2 nodes in gene tree for node in genetree.node - isroot = node === genetree.node[genetree.rooti] + isroot = node === getroot(genetree) degree = length(node.edge) @test degree in [1,2,3] if degree == 2 nd2 += 1; end