diff --git a/Project.toml b/Project.toml index bf54feb4..2cfb3c8e 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PhyloNetworks" uuid = "33ad39ac-ed31-50eb-9b15-43d0656eaa72" license = "MIT" -version = "0.15.3" +version = "0.16.0" [deps] BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59" diff --git a/README.md b/README.md index cbca0168..95d56c20 100644 --- a/README.md +++ b/README.md @@ -70,8 +70,8 @@ with or without transgressive evolution after reticulations: Systematic Biology, 67(5):800–820. [doi:10.1093/sysbio/syy033](https://doi.org/10.1093/sysbio/syy033). SI on [dryad](http://dx.doi.org/10.5061/dryad.nt2g6) - including a [tutorial for trait evolution](https://datadryad.org/bitstream/handle/10255/dryad.177752/xiphophorus_PCM_analysis.html?sequence=1) - and a [tutorial for network calibration](https://datadryad.org/bitstream/handle/10255/dryad.177754/xiphophorus_networks_calibration.html?sequence=1). + including a tutorial for trait evolution + and a tutorial for network calibration. Continuous traits, accounting for within-species variation: diff --git a/src/addHybrid.jl b/src/addHybrid.jl index e1a0b02f..c7c61c96 100644 --- a/src/addHybrid.jl +++ b/src/addHybrid.jl @@ -103,13 +103,13 @@ function addhybridedge!(net::HybridNetwork, nohybridladder::Bool, no3cycle::Bool end hybridpartnernew = (fixroot ? true : rand() > 0.2) # if true: partner hybrid = new edge above edge 2 ## check that the new network will be a DAG: no directional conflict - if directionalconflict(net, p1, edge2, hybridpartnernew) + if directionalconflict(p1, edge2, hybridpartnernew) if fixroot # don't try to change the direction of edge2 push!(blacklist, (e1,e2)) continue end # else: try harder: change direction of edge2 and move root hybridpartnernew = !hybridpartnernew # try again with opposite - if directionalconflict(net, p1, edge2, hybridpartnernew) + if directionalconflict(p1, edge2, hybridpartnernew) push!(blacklist, (e1,e2)) continue end # else: switching hybridpartnernew worked @@ -231,8 +231,7 @@ function hybrid3cycle(edge1::Edge, edge2::Edge) end """ - directionalconflict(net::HybridNetwork, parent::Node, edge::Edge, - hybridpartnernew::Bool) + directionalconflict(parent::Node, edge::Edge, hybridpartnernew::Bool) Check if creating a hybrid edge down of `parent` node into the middle of `edge` would create a directed cycle in `net`, i.e. not a DAG. The proposed hybrid @@ -243,7 +242,7 @@ Does *not* modify the network. Output: `true` if a conflict would arise (non-DAG), `false` if no conflict. """ -function directionalconflict(net::HybridNetwork, parent::Node, edge2::Edge, hybridpartnernew::Bool) +function directionalconflict(parent::Node, edge2::Edge, hybridpartnernew::Bool) if hybridpartnernew # all edges would retain their directions: use isChild1 fields c2 = getchild(edge2) return parent === c2 || isdescendant(parent, c2) diff --git a/src/addHybrid_snaq.jl b/src/addHybrid_snaq.jl index 469f0e9c..bda76f24 100644 --- a/src/addHybrid_snaq.jl +++ b/src/addHybrid_snaq.jl @@ -141,9 +141,10 @@ chooseEdgesGamma(net::HybridNetwork) = chooseEdgesGamma(net, false, net.edge) chooseEdgesGamma(net::HybridNetwork, blacklist::Bool) = chooseEdgesGamma(net, blacklist, net.edge) # aux function for addHybridization -# that takes the output edge1, edge2, gamma from -# chooseEdgesGamma and created necessary edges +# that takes the output edge1, edge2. # returns edge3, edge4, and adjusts edge1, edge2 to shorter length +# fixit: problem if edge1 or edge2 have a missing length, coded as -1.0. +# would be best to set lengths of e3, e4 to 0.0, and leave lengths of e1,e2 unchanged function parameters4createHybrid!(edge1::Edge, edge2::Edge,net::HybridNetwork) max_edge = maximum([e.number for e in net.edge]); t1 = rand()*edge1.length; @@ -343,21 +344,21 @@ end addHybridizationUpdateSmart!(net::HybridNetwork, N::Integer) = addHybridizationUpdateSmart!(net, false,N) -# ----------------------------------- add alternative hybridizations found in bootstrap ------------------------------------ +# --- add alternative hybridizations found in bootstrap """ addAlternativeHybridizations!(net::HybridNetwork, BSe::DataFrame; cutoff=10::Number, top=3::Int) -Modify the network `net` (the best network estimated with snaq) by adding other hybridizations -that are present in the bootstrap networks. By default, it will only consider hybrid edges with -more than 10% bootstrap support (`cutoff`) and it will only include the three top hybridizations -(`top`) sorted by bootstrap support. +Modify the network `net` (the best estimated network) by adding some of +the hybridizations present in the bootstrap networks. By default, it will only +add hybrid edges with more than 10% bootstrap support (`cutoff`) and it will +only include the top 3 hybridizations (`top`) sorted by bootstrap support. -The function also modifies the dataframe `BSe`. In the original `BSe`, +The dataframe `BSe` is also modified. In the original `BSe`, supposedly obtained with `hybridBootstrapSupport`, hybrid edges that do not appear in the best network have a missing number. -After the hybrid edges are added with `addAlternativeHybridizations`, `BSe` is modified to include the -edge numbers of the newly added hybrid edges. +After hybrid edges from bootstrap networks are added, +`BSe` is modified to include the edge numbers of the newly added hybrid edges. To distinguish hybrid edges present in the original network versus new edges, an extra column of true/false values is also added to `BSe`, named "alternative", with true for newly added edges absent from the original network. @@ -367,76 +368,109 @@ major tree topology. # example -```julia -bootnet = readMultiTopology("bootstrap-networks.txt") -bestnet = readTopology("best.tre") -BSn, BSe, BSc, BSgam, BSedgenum = hybridBootstrapSupport(bootnet, bestnet); -addAlternativeHybridizations!(bestnet,BSe) -using PhyloPlots -plot(bestnet, edgelabel=BSe[[:edge,:BS_hybrid_edge]]) +```jldoctest +julia> bootnet = readMultiTopology(joinpath(dirname(pathof(PhyloNetworks)), "..","examples", "bootsnaq.out")); # vector of 10 networks + +julia> bestnet = readTopology("((O,(E,#H7:::0.196):0.314):0.332,(((A)#H7:::0.804,B):10.0,(C,D):10.0):0.332);"); + +julia> BSn, BSe, BSc, BSgam, BSedgenum = hybridBootstrapSupport(bootnet, bestnet); + +julia> BSe[1:6,[:edge,:hybrid_clade,:sister_clade,:BS_hybrid_edge]] +6×4 DataFrame + Row │ edge hybrid_clade sister_clade BS_hybrid_edge + │ Int64? String String Float64 +─────┼───────────────────────────────────────────────────── + 1 │ 7 H7 B 33.0 + 2 │ 3 H7 E 32.0 + 3 │ missing c_minus3 c_minus8 44.0 + 4 │ missing c_minus3 H7 44.0 + 5 │ missing E O 12.0 + 6 │ missing c_minus6 c_minus8 9.0 + +julia> PhyloNetworks.addAlternativeHybridizations!(bestnet, BSe) + +julia> BSe[1:6,[:edge,:hybrid_clade,:sister_clade,:BS_hybrid_edge,:alternative]] +6×5 DataFrame + Row │ edge hybrid_clade sister_clade BS_hybrid_edge alternative + │ Int64? String String Float64 Bool +─────┼────────────────────────────────────────────────────────────────── + 1 │ 7 H7 B 33.0 false + 2 │ 3 H7 E 32.0 false + 3 │ 16 c_minus3 c_minus8 44.0 true + 4 │ 19 c_minus3 H7 44.0 true + 5 │ 22 E O 12.0 true + 6 │ missing c_minus6 c_minus8 9.0 false + +julia> # using PhyloPlots; plot(bestnet, edgelabel=BSe[:,[:edge,:BS_hybrid_edge]]); ``` """ function addAlternativeHybridizations!(net::HybridNetwork,BSe::DataFrame; cutoff=10::Number,top=3::Int) top > 0 || error("top must be greater than 0") - BSe[:alternative] = falses(length(BSe[:hybrid])) - newBSe = BSe[BSe[:BS_hybrid_edge] .> cutoff,:] - newBSe = newBSe[.!ismissing.(newBSe[:hybrid]) & .!ismissing.(newBSe[:sister]),:] - newBSe = newBSe[ismissing.(newBSe[:edge]),:] - newHyb = newBSe[1:top,:] - - if(size(newHyb,1) == 0) - @warn "Did not find any alternative hybridizations with bootstrap support greater than the cutoff, so nothign added" + BSe[!,:alternative] = falses(nrow(BSe)) + newBSe = subset(BSe, + :BS_hybrid_edge => x -> x.> cutoff, :edge => ByRow( ismissing), + :hybrid => ByRow(!ismissing), :sister => ByRow(!ismissing), + ) + top = min(top,nrow(newBSe)) + if top==0 + @info "no alternative hybridizations with support > cutoff $cutoff%, so nothing added." return end - for i in 1:top - hybnum = newHyb[:hybrid][i] - sisnum = newHyb[:sister][i] - edgenum = addHybridBetweenClades!(hybnum,sisnum,net) - ind1 = findall(x->!ismissing(x) && x==hybnum,BSe[:hybrid]) - ind2 = findall(x->!ismissing(x) && x==sisnum,BSe[:sister]) + hybnum = newBSe[i,:hybrid] + sisnum = newBSe[i,:sister] + edgenum = addHybridBetweenClades!(net, hybnum, sisnum) + if isnothing(edgenum) + @warn "cannot add desired hybrid (BS=$(newBSe[i,:BS_hybrid_edge])): the network would have a directed cycle" + continue + end + ind1 = findall(x->!ismissing(x) && x==hybnum, BSe[!,:hybrid]) + ind2 = findall(x->!ismissing(x) && x==sisnum, BSe[!,:sister]) ind = intersect(ind1,ind2) - BSe[ind,:edge] = edgenum - BSe[ind,:alternative] = true + BSe[ind,:edge] .= edgenum + BSe[ind,:alternative] .= true end end -## function to a hybrid edge between hybrid clade (hybnum = node number) and sister clade (sisnum = node number) -## the function finds the parent edges to these nodes, and puts a hybrid edge between them -## the function modifies net, and it returns the number of the minor hybrid edge added -function addHybridBetweenClades!(hybnum::Number,sisnum::Number,net::HybridNetwork) - hybind = getIndexNode(hybnum,net) - sisind = getIndexNode(sisnum,net) +""" + addHybridBetweenClades!(net::HybridNetwork, hybnum::Number, sisnum::Number) - ## hybridization ed1->ed2 - edge1 = getparentedge(net.node[sisind]) # major parent edges - edge2 = getparentedge(net.node[hybind]) +Modify `net` by adding a minor hybrid edge from "donor" to "recipient", +where "donor" is the major parent edge `e1` of node number `hybnum` and +"recipient" is the major parent edge `e2` of node number `sisnum`. +The new nodes are currently inserted at the middle of these parent edges. - edge3,edge4 = parameters4createHybrid!(edge1, edge2,net) - hybridnode = createHybrid!(edge1, edge2, edge3, edge4, net, 0.1) ## gamma=0.1, fixed later - if(edge1.length < 0) - setBranchLength!(edge1,-1.0) - setBranchLength!(edge3,-1.0) - end - if(edge2.length < 0) - setBranchLength!(edge2,-1.0) - setBranchLength!(edge4,-1.0) - end +If a hybrid edge from `e1` to `e2` would create a directed cycle in the network, +then this hybrid cannot be added. +In that case, the donor edge `e1` is moved up if its parent is a hybrid node, +to ensure that the sister clade to the new hybrid would be a desired (the +descendant taxa from `e1`) and a new attempt is made to create a hybrid edge. - if(edge2.isChild1) - edge4.hybrid = true - setGamma!(edge4,0.9) - edge4.isChild1 = true - else - edge2.hybrid = true - setGamma!(edge2,0.9) - edge2.isChild1 = false +Output: number of the new hybrid edge, or `nothing` if the desired hybridization +is not possible. + +See also: +[`addhybridedge!`](@ref) (used by this method) and +[`directionalconflict`](@ref) to check that `net` would still be a DAG. +""" +function addHybridBetweenClades!(net::HybridNetwork, hybnum::Number, sisnum::Number) + hybind = getIndexNode(hybnum,net) + sisind = getIndexNode(sisnum,net) + e1 = getparentedge(net.node[sisind]) # major parent edges + e2 = getparentedge(net.node[hybind]) + p1 = getparent(e1) + if directionalconflict(p1, e2, true) # then: first try to move the donor up + # so long as the descendant taxa (= sister clade) remain the same + while p1.hybrid + e1 = getparentedge(p1) # major parent edge: same descendant taxa + p1 = getparent(e1) + end + directionalconflict(p1, e2, true) && return nothing end - ## used gamma=0.1 to make the new edge a minor edge, but we really do not have gamma value: - emaj = getparentedge(hybridnode) - emaj.gamma = -1 - e = getparentedgeminor(hybridnode) - e.gamma = -1 - return e.number + hn, he = addhybridedge!(net, e1, e2, true) # he: missing length & gamma by default + # ideally: add option "where" to breakedge!, used by addhybridedge! + # so as to place the new nodes at the base of each clade. + # currently: the new nodes are inserted at the middle of e1 and e2. + return he.number end diff --git a/src/auxiliary.jl b/src/auxiliary.jl index 7a2fb1e1..84e36bc4 100644 --- a/src/auxiliary.jl +++ b/src/auxiliary.jl @@ -576,11 +576,13 @@ end """ deleteNode!(net::HybridNetwork, n::Node) + deleteNode!(net::QuartetNetwork, n::Node) Delete node `n` from a network, i.e. removes it from net.node, and from net.hybrid or net.leaf as appropriate. Update attributes `numNodes`, `numTaxa`, `numHybrids`. -Warning: `net.names` is *not* updated. +Warning: `net.names` is *not* updated, and this is a feature (not a bug) +for networks of type QuartetNetwork. Warning: if the root is deleted, the new root is arbitrarily set to the first node in the list. This is intentional to save time because this function @@ -606,13 +608,6 @@ function deleteNode!(net::HybridNetwork, n::Node) end end -# function to delete a Node in net.node and -# update numNodes and numTaxa for QuartetNetwork -# if hybrid node, it deletes also from net.hybrid -# and updates numHybrids -# note that net.names is never updated to keep it -# accurate -# if n is leaf, we delete from qnet.leaf function deleteNode!(net::QuartetNetwork, n::Node) index = findfirst(no -> no.number == n.number, net.node) # isEqual (from above) checks for more than node number diff --git a/src/deprecated.jl b/src/deprecated.jl index 4c2691d5..d702ef2f 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -14,20 +14,21 @@ function Base.getproperty(mm::PhyloNetworkLinearModel, f::Symbol) if f === :model - Base.depwarn("accessing the `model` field of PhyloNetworkLinearModel.jl models is deprecated, " * - "as they are no longer wrapped in a `TableRegressionModel` " * - "and can be used directly now", :getproperty) + Base.depwarn("""accessing the 'model' field of PhyloNetworkLinearModel's is + deprecated, as they are no longer wrapped in a TableRegressionModel. + They can be used directly now.""", + :getproperty) # force=true to show warning. currently silent by default return mm elseif f === :mf - Base.depwarn("accessing the `mf` field of PhyloNetworkLinearModel.jl models is deprecated, " * - "as they are no longer wrapped in a `TableRegressionModel`." * - "Use `formula(m)` to access the model formula.", :getproperty) + Base.depwarn("""accessing the 'mf' field of PhyloNetworkLinearModel's is + deprecated, as they are no longer wrapped in a TableRegressionModel. + Use `formula(m)` to access the model formula.""", :getproperty) form = formula(mm) return ModelFrame{Nothing, typeof(mm)}(form, nothing, nothing, typeof(mm)) elseif f === :mm - Base.depwarn("accessing the `mm` field of PhyloNetworkLinearModel.jl models is deprecated, " * - "as they are no longer wrapped in a `TableRegressionModel`." * - "Use `modelmatrix(m)` to access the model matrix.", :getproperty) + Base.depwarn("""accessing the 'mm' field of PhyloNetworkLinearModel's is + deprecated, as they are no longer wrapped in a TableRegressionModel. + Use `modelmatrix(m)` to access the model matrix.""", :getproperty) modmatr = modelmatrix(mm) return ModelMatrix{typeof(modmatr)}(modmatr, Int[]) else diff --git a/src/types.jl b/src/types.jl index 99117424..bff62c6c 100644 --- a/src/types.jl +++ b/src/types.jl @@ -342,7 +342,6 @@ mutable struct QuartetNetwork <: Network function QuartetNetwork(net::HybridNetwork) net2 = deepcopy(net); #fixit: maybe we dont need deepcopy of all, maybe only arrays new(net2.numTaxa,net2.numNodes,net2.numEdges,net2.node,net2.edge,net2.hybrid,net2.leaf,net2.numHybrids, [true for e in net2.edge],[],-1,[], -1.,net2.names,Int8[-1,-1,-1,-1],Int8[-1,-1,-1],[0,0,0],[],true,[]) - #new(sum([n.leaf?1:0 for n in net.node]),size(net.node,1),size(net.edge,1),copy(net.node),copy(net.edge),copy(net.hybrid),size(net.hybrid,1), [true for e in net2.edge],[],-1,[],-1.,net2.names,[-1,-1,-1,-1],[-1,-1,-1],[],true,[]) end QuartetNetwork() = new(0,0,0,[],[],[],[],0,[],[],-1,[],-1.0,[],[],[],[],[],true,[]) end diff --git a/test/test_addHybrid.jl b/test/test_addHybrid.jl index 9849835a..c213d8fc 100644 --- a/test/test_addHybrid.jl +++ b/test/test_addHybrid.jl @@ -70,11 +70,11 @@ netl1 = readTopology(str_level1); # directional: throws error if the new network would not be a DAG, e.g. if edge 1 is a directed descendant of edge 2 # case 6 nodeS145 = PhyloNetworks.getparent(netl1.edge[6]) -@test PhyloNetworks.directionalconflict(netl1, nodeS145, netl1.edge[15], true) +@test PhyloNetworks.directionalconflict(nodeS145, netl1.edge[15], true) # case 2 -@test PhyloNetworks.directionalconflict(netl1, nodeS145, netl1.edge[18], true) +@test PhyloNetworks.directionalconflict(nodeS145, netl1.edge[18], true) # case 3 (bad hybrid edge choice leads to a nonDAG) -@test PhyloNetworks.directionalconflict(netl1, nodeS145, netl1.edge[20], true) -@test PhyloNetworks.directionalconflict(netl1, nodeS145, netl1.edge[4], false) -@test !PhyloNetworks.directionalconflict(netl1, nodeS145, netl1.edge[4], true) +@test PhyloNetworks.directionalconflict(nodeS145, netl1.edge[20], true) +@test PhyloNetworks.directionalconflict(nodeS145, netl1.edge[4], false) +@test !PhyloNetworks.directionalconflict(nodeS145, netl1.edge[4], true) end # of edge checking functions diff --git a/test/test_bootstrap.jl b/test/test_bootstrap.jl index 16752a01..938edd07 100644 --- a/test/test_bootstrap.jl +++ b/test/test_bootstrap.jl @@ -11,7 +11,7 @@ bootnet = readMultiTopology(joinpath(exdir,"fish3hyb_20boostrap.net")); # include(string(home, "bootstrap.jl")) resn, rese, resc, gam, edgenum = hybridBootstrapSupport(bootnet,bestnet); #@show resn; @show rese; showall(gam); @show edgenum; resc -# plot(bestnet, showIntNodeNumber=true) +# plot(bestnet, shownodenumber=true); @test resn[1:2,:clade] == ["H26","H25"] @test resn[1:2,:BS_hybrid_samesisters] == [25.0,100.0] @@ -35,6 +35,11 @@ resn, rese, resc, gam, edgenum = hybridBootstrapSupport(bootnet,bestnet); @test gam[:,2] == [.0,.0,.192,.0,.0,.0,.0,.0,.193,.0,.184,.193,.0,.0,.0,.0,.0,.193,.0,.0] @test gam[:,4] == [.165,.166,.165,.166,.165,.165,.166,.165,.165,.166,.164,.166,.166,.165,.165,.165,.166,.165,.166,.166] @test edgenum ==[25,39,43,7] + +PhyloNetworks.addAlternativeHybridizations!(bestnet, rese, cutoff=4) +@test rese[[5,10,11],:edge] == [54,57,60] +@test ismissing(rese[6,:edge]) +@test length(bestnet.hybrid) == 5 end # of testset, hybridBootstrapSupport @testset "bootsnaq from quartet CF intervals" begin