From 2221f10bca39dfd6963ed4323a7a8f311b503350 Mon Sep 17 00:00:00 2001 From: Cecile Ane Date: Fri, 24 Feb 2023 15:24:07 -0600 Subject: [PATCH 1/4] more on the correlated model in manual; citations file --- CITATION.bib | 22 +++++++++++++++++ docs/make.jl | 4 +++ docs/src/index.md | 34 +++++++++++++++++--------- docs/src/lib/internal.md | 21 ++++++++++++++++ docs/src/lib/public.md | 18 ++++++++++++++ docs/src/man/correlated_inheritance.md | 30 ++++++++++++++++++----- src/simulatecoalescent_network.jl | 8 ++++-- src/utils.jl | 11 ++++++--- 8 files changed, 126 insertions(+), 22 deletions(-) create mode 100644 CITATION.bib create mode 100644 docs/src/lib/internal.md create mode 100644 docs/src/lib/public.md diff --git a/CITATION.bib b/CITATION.bib new file mode 100644 index 0000000..f3b9fa8 --- /dev/null +++ b/CITATION.bib @@ -0,0 +1,22 @@ +% reference for the package, and for the correlated inheritance model +@article{2023fogg_phylocoalsimulations, + author = {Fogg, John and Allman, Elizabeth S and An{\'e}, C{\'e}cile}, + title = {{PhyloCoalSimulations}: A simulator for network multispecies coalescent + models, including a new extension for the inheritance of gene flow}, + elocation-id = {2023.01.11.523690}, + year = {2023}, + doi = {10.1101/2023.01.11.523690}, + journal = {bioRxiv} +} + +% reference for the PhyloNetworks package, which this package depends on +@article{2017SolislemusBastideAne_PhyloNetworks, + author = {Sol{\'\i}s-Lemus, Claudia and Bastide, Paul and An{\'e}, C{\'e}cile}, + title = {PhyloNetworks: A Package for Phylogenetic Networks}, + journal = {Molecular Biology and Evolution}, + year = {2017}, + volume = {34}, + number = {12}, + pages = {3292-3298}, + doi = {10.1093/molbev/msx235}, +} diff --git a/docs/make.jl b/docs/make.jl index ef78af7..ba70f0f 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -22,6 +22,10 @@ makedocs(; "correlated inheritance" => "man/correlated_inheritance.md", "more examples" => "man/more_examples.md", ], + "library" => [ + "public" => "lib/public.md", + "internals" => "lib/internal.md", + ] ], ) diff --git a/docs/src/index.md b/docs/src/index.md index cf4bddf..ad42927 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -10,6 +10,22 @@ simulate phylogenies under the coalescent. It depends on [PhyloNetworks](https://github.com/crsl4/PhyloNetworks.jl) for the phylogenetic data structures, and manipulation of phylogenies. +References: +please see [bibtex entries here](https://github.com/cecileane/PhyloCoalSimulations.jl/blob/main/CITATION.bib). + +- for this package and the network coalescent model with inheritance correlation:\ + [Fogg, Allman & Ané (2023)](https://doi.org/10.1101/2023.01.11.523690) + PhyloCoalSimulations: A simulator for network multispecies coalescent models, + including a new extension for the inheritance of gene flow. + +- for [PhyloNetworks](https://github.com/crsl4/PhyloNetworks.jl), + which this package depends on:\ + Solís-Lemus, Bastide & Ané (2017). + PhyloNetworks: a package for phylogenetic networks. + [Molecular Biology and Evolution](https://academic.oup.com/mbe/article/doi/10.1093/molbev/msx235/4103410/PhyloNetworks-a-package-for-phylogenetic-networks?guestAccessKey=230afceb-df28-4160-832d-aa7c73f86369) + 34(12):3292–3298. + [doi:10.1093/molbev/msx235](https://doi.org/10.1093/molbev/msx235) + For a tutorial, see the manual: ```@contents @@ -23,16 +39,12 @@ Pages = [ Depth = 3 ``` -References: -upcoming - -## functions - -Most functions are internal (not exported). +For help on individual functions, see the library: -```@index -``` - -```@autodocs -Modules = [PhyloCoalSimulations] +```@contents +Pages = [ + "lib/public.md", + "lib/internal.md", +] +Depth = 3 ``` diff --git a/docs/src/lib/internal.md b/docs/src/lib/internal.md new file mode 100644 index 0000000..cb48b69 --- /dev/null +++ b/docs/src/lib/internal.md @@ -0,0 +1,21 @@ +# internal documentation + +Documentation for `PhyloCoalSimulations`'s internal functions. +These functions are not exported and their access (API) should not be +considered stable. But they can still be used, like this for example: +`PhyloCoalSimulations.foo()` for a function named `foo()`. + + +## functions & types + +```@autodocs +Modules = [PhyloCoalSimulations] +Public = false +Order = [:type,:function] +``` + +## index + +```@index +Pages = ["internal.md"] +``` diff --git a/docs/src/lib/public.md b/docs/src/lib/public.md new file mode 100644 index 0000000..ec60580 --- /dev/null +++ b/docs/src/lib/public.md @@ -0,0 +1,18 @@ +# public documentation + +Documentation for `PhyloCoalSimulations`'s public (exported) functions. +Most functions are internal (not exported). + +## functions & types + +```@autodocs +Modules = [PhyloCoalSimulations] +Private = false +Order = [:function,:type] +``` + +## index + +```@index +Pages = ["public.md"] +``` diff --git a/docs/src/man/correlated_inheritance.md b/docs/src/man/correlated_inheritance.md index a0f521b..e488eef 100644 --- a/docs/src/man/correlated_inheritance.md +++ b/docs/src/man/correlated_inheritance.md @@ -23,16 +23,34 @@ of 2 lineages of the same locus at a hybrid node, exactly 1 of them comes from one parent, and the other comes from the other parent. This model would have negative correlation between lineages. -By default, `simulatecoalescent` uses the traditional model, with independent -lineages. +By default, [`simulatecoalescent`](@ref) uses the traditional model, +with independent lineages. -It also has an option to simulate lineage inheritance with positive correlation. +It also has an option to simulate lineage inheritance with positive correlation, +under a coalescent model described in +[Fogg, Allman & Ané (2023)](https://doi.org/10.1101/2023.01.11.523690). For this, lineages' parents are drawn according to a Dirichlet process with base distribution determined by the γ values, and with concentration parameter α = (1-r)/r, that is, r = 1/(1+α), where r is the input inheritance correlation. -For example, if this correlation is set to 1, then α = 0 and all lineages inherit -from the same (randomly sampled) parent. The independence model corresponds -to r=0 and α infinite. +More specifically, consider 2 individuals (alleles) at a given locus, +that have not coalesced yet and are present at a give hybrid node. +According to the coalescent model with correlated inheritance, +the second individual is inherited from the same parent as the first individual +with probability r. And with probability 1-r, the second individual is +inherited from any parent (including the parent chosen by the first individual) +based on their γ values. + +This model can be the result of different loci evolving according to different γ +inheritance values. For example, loci that are under selection in the environment +where gene flow occurred may be more likely to be passed through gene flow, +whereas loci that are involved in reproduction barriers might be less likely +to be passed through gene flow. This would result in different sets of γ values +across different loci, and correlated inheritance overall. + +At one extreme, with correlation r=1 we have α = 0 and all lineages inherit +from the same (randomly sampled) parent. This is the *common inheritance* model. +The *independence model* corresponds to the other extreme r=0 and α infinite. + The same correlation r (or concentration α) parameter is used at all hybrid nodes, but the Dirichlet process is applied independently across hybrid nodes. diff --git a/src/simulatecoalescent_network.jl b/src/simulatecoalescent_network.jl index b5df6a3..d769886 100644 --- a/src/simulatecoalescent_network.jl +++ b/src/simulatecoalescent_network.jl @@ -7,7 +7,8 @@ can be used as a unique identifier of the edge above the network's root. get_rootedgenumber(net) = max(0, maximum(e.number for e in net.edge)) + 1 """ - simulatecoalescent(net, nloci, nindividuals; nodemapping=false, inheritancecorrelation=0.0) + simulatecoalescent(net, nloci, nindividuals; + nodemapping=false, inheritancecorrelation=0.0) Simulate `nloci` gene trees with `nindividuals` from each species under the multispecies network coalescent, along network `net` @@ -54,6 +55,8 @@ the same (randomly sampled) parent. More generally, the lineages' parents are simulated according to a Dirichlet process with base distribution determined by the γ values, and with concentration parameter α = (1-r)/r, that is, r = 1/(1+α), where `r` is the input inheritance correlation. +For more details about this model, please read the package manual or refer +to [Fogg, Allman & Ané (2023)](https://doi.org/10.1101/2023.01.11.523690). Assumptions: - `net` must have non-missing edge lengths and γ values. @@ -269,7 +272,8 @@ end """ simulatecoalescent(net, nloci, nindividuals, populationsize; - nodemapping=false, round_generationnumber=true, inheritancecorrelation=0.0) + nodemapping=false, round_generationnumber=true, + inheritancecorrelation=0.0) Simulate `nloci` gene trees with `nindividuals` from each species under the multispecies network coalescent, along network `net`, diff --git a/src/utils.jl b/src/utils.jl index e3c2a1c..f0bf7f9 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -3,7 +3,9 @@ Boolean: `true` if `node` has a single child edge, based on the edge's `isChild1` attribute. -This function will soon be moved to PhyloNetworks. +This function is now in [PhyloNetworks](https://crsl4.github.io/PhyloNetworks.jl/dev/lib/public/#PhyloNetworks.hassinglechild) +and will be removed from this package once a new version +of PhyloNetworks is registered. """ hassinglechild(node::PN.Node) = sum(e -> PN.getParent(e) === node, node.edge) == 1 @@ -11,7 +13,10 @@ hassinglechild(node::PN.Node) = sum(e -> PN.getParent(e) === node, node.edge) == singlechildedge(node) Child edge of `node`. Checks that it's a single child. -This function will soon be moved to PhyloNetworks. +This function is now implemented in PhyloNetworks as +[`getchildedge`](https://crsl4.github.io/PhyloNetworks.jl/dev/lib/public/#PhyloNetworks.getchild) +and will be removed from this package once a new version +of PhyloNetworks is registered. """ function singlechildedge(node::PN.Node) ce_ind = findall(e -> PN.getParent(e) === node, node.edge) @@ -40,7 +45,7 @@ ismappingnode(n::PN.Node) = length(n.edge) == 2 && hassinglechild(n) && n.name ! """ mappingnodes(gene tree) -Type to define an iterator over degree-2 mapping node in a gene tree, assuming these +Type to define an iterator over degree-2 mapping nodes in a gene tree, assuming these degree-2 nodes (other than the root) have a name to map them to nodes in a species phylogeny. See [`ismappingnode`](@ref PhyloCoalSimulations.mappingnodes). """ From 945f644c28aba5ed58cc76de8c666cfe35b9c0e9 Mon Sep 17 00:00:00 2001 From: Cecile Ane Date: Mon, 3 Apr 2023 13:49:20 -0500 Subject: [PATCH 2/4] rm hassinglechild and singlechildedge --- docs/src/man/more_examples.md | 2 +- src/simulatecoalescent_network.jl | 2 +- src/simulatecoalescent_onepop.jl | 2 +- src/utils.jl | 28 +--------------------------- test/test_multispeciesnetwork.jl | 2 +- 5 files changed, 5 insertions(+), 31 deletions(-) diff --git a/docs/src/man/more_examples.md b/docs/src/man/more_examples.md index 61ebcaf..5549010 100644 --- a/docs/src/man/more_examples.md +++ b/docs/src/man/more_examples.md @@ -41,7 +41,7 @@ to extract the mapping information. edge_count = Dict(e.number => 0 for e in net.edge) const PCS = PhyloCoalSimulations; # for lazy typing! for n in PCS.mappingnodes(tree) # iterate over degree-2 mapping nodes in the gene tree - child = PCS.singlechildedge(n) + child = getchildedge(n) popid = PCS.population_mappedto(child) # number of species edge that 'n' came from # sanity check below isnothing(popid) && error("""population ID not found for the child edge of diff --git a/src/simulatecoalescent_network.jl b/src/simulatecoalescent_network.jl index d769886..58957fd 100644 --- a/src/simulatecoalescent_network.jl +++ b/src/simulatecoalescent_network.jl @@ -176,7 +176,7 @@ function simulatecoalescent(net::PN.HybridNetwork, nloci::Integer, nindividuals; parentedgelist = PN.Edge[] childedgelist = PN.Edge[] for e in nn.edge - PN.getChild(e) === nn ? push!(parentedgelist, e) : push!(childedgelist, e) + PN.getchild(e) === nn ? push!(parentedgelist, e) : push!(childedgelist, e) end push!(parentedges, parentedgelist) push!(childedges, childedgelist) diff --git a/src/simulatecoalescent_onepop.jl b/src/simulatecoalescent_onepop.jl index 8948160..18a6858 100644 --- a/src/simulatecoalescent_onepop.jl +++ b/src/simulatecoalescent_onepop.jl @@ -273,7 +273,7 @@ function convert2tree!(rootnode::PN.Node) end function collect_edges_nodes!(net, parentedge) push!(net.edge, parentedge) - nn = PN.getChild(parentedge) + nn = PN.getchild(parentedge) push!(net.node, nn) for childedge in nn.edge childedge !== parentedge || continue # skip parentedge diff --git a/src/utils.jl b/src/utils.jl index f0bf7f9..33f11ee 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,29 +1,3 @@ -""" - hassinglechild(node) - -Boolean: `true` if `node` has a single child edge, -based on the edge's `isChild1` attribute. -This function is now in [PhyloNetworks](https://crsl4.github.io/PhyloNetworks.jl/dev/lib/public/#PhyloNetworks.hassinglechild) -and will be removed from this package once a new version -of PhyloNetworks is registered. -""" -hassinglechild(node::PN.Node) = sum(e -> PN.getParent(e) === node, node.edge) == 1 - -""" - singlechildedge(node) - -Child edge of `node`. Checks that it's a single child. -This function is now implemented in PhyloNetworks as -[`getchildedge`](https://crsl4.github.io/PhyloNetworks.jl/dev/lib/public/#PhyloNetworks.getchild) -and will be removed from this package once a new version -of PhyloNetworks is registered. -""" -function singlechildedge(node::PN.Node) - ce_ind = findall(e -> PN.getParent(e) === node, node.edge) - length(ce_ind) == 1 || error("node number $(node.number) has $(length(ce_ind)) children instead of 1 child") - return node.edge[ce_ind[1]] -end - """ population_mappedto(edge or node) @@ -40,7 +14,7 @@ population_mappedto(e::Union{PN.Edge,PN.Node}) = (e.inCycle == -1 ? nothing : e. Boolean: true if `node` is of degree 2, has a single child, and has a name. (The root is of degree-2 but is not a mapping node). """ -ismappingnode(n::PN.Node) = length(n.edge) == 2 && hassinglechild(n) && n.name != "" +ismappingnode(n::PN.Node) = length(n.edge) == 2 && PN.hassinglechild(n) && n.name != "" """ mappingnodes(gene tree) diff --git a/test/test_multispeciesnetwork.jl b/test/test_multispeciesnetwork.jl index 615cb4a..42a8b1b 100644 --- a/test/test_multispeciesnetwork.jl +++ b/test/test_multispeciesnetwork.jl @@ -14,7 +14,7 @@ genetree = simulatecoalescent(net, 1, Dict("A"=>2, "B"=>1)) genetree = simulatecoalescent(net, 1, 3; nodemapping=true)[1] tmp = map(n -> n.name, PCS.mappingnodes(genetree)) @test !isempty(tmp) && all(tmp .== "minus2") -@test Set(unique(PCS.population_mappedto.(PCS.singlechildedge.(PCS.mappingnodes(genetree))))) == Set((1,2)) +@test Set(unique(PCS.population_mappedto.(PN.getchildedge.(PCS.mappingnodes(genetree))))) == Set((1,2)) @test Set(unique(PCS.population_mappedto.(genetree.node))) == Set((1,2,3,nothing)) @test Set(unique(PCS.population_mappedto.(genetree.edge))) == Set((1,2,3)) From 86e173e7d2ab3d010265e2e8a43277d720e263c5 Mon Sep 17 00:00:00 2001 From: Cecile Ane Date: Mon, 3 Apr 2023 13:49:59 -0500 Subject: [PATCH 3/4] v0.1.2, requires PN v0.16 --- Project.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 001c27c..9149391 100644 --- a/Project.toml +++ b/Project.toml @@ -2,7 +2,7 @@ name = "PhyloCoalSimulations" uuid = "53f7b83a-06b5-4910-9a21-1896bf762ade" authors = ["Cecile Ane ", "John Fogg <70609062+fogg-uw@users.noreply.github.com>"] -version = "0.1.1" +version = "0.1.2" [deps] Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" @@ -10,5 +10,5 @@ PhyloNetworks = "33ad39ac-ed31-50eb-9b15-43d0656eaa72" [compat] Distributions = "0.25" -PhyloNetworks = "0.15.3" +PhyloNetworks = "0.16" julia = "1.5" From 1f6b201d0cc393649d3a7e2619892b89422ee9c4 Mon Sep 17 00:00:00 2001 From: Cecile Ane Date: Mon, 3 Apr 2023 14:51:44 -0500 Subject: [PATCH 4/4] tiny text --- docs/src/man/correlated_inheritance.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/src/man/correlated_inheritance.md b/docs/src/man/correlated_inheritance.md index e488eef..8ea0987 100644 --- a/docs/src/man/correlated_inheritance.md +++ b/docs/src/man/correlated_inheritance.md @@ -16,7 +16,8 @@ Another model is that the lineages at a hybrid node are *all* inherited from the same parent, still choosing a (common) parent according to the γ inheritance probabilities. This model has full (and positive) correlation between lineages, and was used by -[Gerard, Gibbs & Kubatko (2011)](https://doi.org/10.1186/1471-2148-11-291). +[Gerard, Gibbs & Kubatko (2011)](https://doi.org/10.1186/1471-2148-11-291) +for example. The other extreme might be interesting for modelling allopolyploid events: of 2 lineages of the same locus at a hybrid node, exactly 1 of them comes