diff --git a/Project.toml b/Project.toml index 78c85a68..51f36bf0 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PhyloNetworks" uuid = "33ad39ac-ed31-50eb-9b15-43d0656eaa72" license = "MIT" -version = "0.16.2" +version = "0.16.3" [deps] BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59" diff --git a/docs/Project.toml b/docs/Project.toml index c1dd2fc4..6d939b98 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -3,7 +3,6 @@ BioSymbols = "3c28c6f8-a34d-59c4-9654-267d177fcfa9" CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -DocumenterMarkdown = "997ab1e6-3595-5248-9280-8efb232c3433" PhyloNetworks = "33ad39ac-ed31-50eb-9b15-43d0656eaa72" PhyloPlots = "c0d5b6db-e3fc-52bc-a87d-1d050989ed3b" RCall = "6f49c342-dc21-5d91-9882-a32aef131414" @@ -11,5 +10,5 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" [compat] -Documenter = "~0.27" +Documenter = "~1" PhyloPlots = "1" diff --git a/docs/make.jl b/docs/make.jl index 88a404ce..5c0fac64 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,4 +1,4 @@ -using Documenter, DocumenterMarkdown +using Documenter using Pkg Pkg.add(PackageSpec(name="PhyloPlots", rev="master")) @@ -11,7 +11,13 @@ makedocs( sitename = "PhyloNetworks.jl", authors = "Claudia Solís-Lemus, Cécile Ané, Paul Bastide and contributors.", modules = [PhyloNetworks], # to list methods from PhyloNetworks only, not from Base etc. - format = Documenter.HTML(prettyurls = get(ENV, "CI", nothing) == "true"), # easier local build + format = Documenter.HTML( + prettyurls = get(ENV, "CI", nothing) == "true", # easier local build + size_threshold = 600 * 2^10, size_threshold_warn = 500 * 2^10), # 600 KiB + # exception, so warning-only for :missing_docs. List all others: + warnonly = Documenter.except(:autodocs_block, :cross_references, :docs_block, + :doctest, :eval_block, :example_block, :footnote, :linkcheck_remotes, + :linkcheck, :meta_block, :parse_error, :setup_block), pages = [ "Home" => "index.md", "Manual" => [ diff --git a/src/auxiliary.jl b/src/auxiliary.jl index 84e36bc4..5cbd728f 100644 --- a/src/auxiliary.jl +++ b/src/auxiliary.jl @@ -481,17 +481,6 @@ function getIndexHybrid(node::Node, net::Network) return i end -# function that given a hybrid node, it gives you the minor hybrid edge -# warning: assumes level-1 network: see getparentedgeminor for a general network -function getHybridEdge(node::Node) - node.hybrid || error("node $(node.number) is not hybrid node, cannot get hybrid edges") - a = nothing; - for e in node.edge - (e.hybrid && !e.isMajor) ? a = e : nothing; # assumes level-1: child of hybrid node must be a tree edge - end - isa(a,Nothing) ? error("hybrid node $(node.number) does not have minor hybrid edge, edges: $([e.number for e in node.edge])") : return a -end - # function that given two nodes, it gives you the edge that connects them # returns error if they are not connected by an edge @@ -684,36 +673,24 @@ end # function to delete an internal node with only 2 edges function deleteIntNode!(net::Network, n::Node) size(n.edge,1) == 2 || error("node $(n.number) does not have only two edges") -# isEqual(n,net.node[net.root]) && println("deleting the root $(n.number) because it has only two edges attached") index = n.edge[1].number < n.edge[2].number ? 1 : 2; - edge1 = n.edge[index]; - edge2 = n.edge[index==1 ? 2 : 1]; - if(!edge1.hybrid && !edge2.hybrid) - node1 = getOtherNode(edge1,n); - node2 = getOtherNode(edge2,n); - removeEdge!(node2,edge2); - removeNode!(n,edge1); - setEdge!(node2,edge1); - setNode!(edge1,node2); - deleteNode!(net,n); - deleteEdge!(net,edge2); - else - @warn "the two edges $([edge1.number,edge2.number]) attached to node $(n.number) must be tree edges to delete node" - if(edge1.hybrid) - hybedge = edge1 - otheredge = edge2 - elseif(edge2.hybrid) - hybedge = edge2 - otheredge = edge1 + edge1 = n.edge[index]; # edge1 will be kept + edge2 = n.edge[index==1 ? 2 : 1] # we will delete edge2 and n, except if edge2 is hybrid + if edge2.hybrid + (edge2, edge1) = (edge1, edge2) + if getchild(edge1) === n || edge2.hybrid + @error "node with incoming hybrid edge or incident to 2 hybrid edges: will not be removed" + return nothing end - othernode = getOtherNode(otheredge,n) - removeNode!(n,hybedge) - removeEdge!(othernode,otheredge) - setEdge!(othernode,hybedge) - setNode!(hybedge,othernode) - deleteNode!(net,n) - deleteEdge!(net,otheredge) end + node2 = getOtherNode(edge2,n) + removeEdge!(node2,edge2) + removeNode!(n,edge1) + setEdge!(node2,edge1) + setNode!(edge1,node2) + deleteNode!(net,n) + deleteEdge!(net,edge2) + return nothing end diff --git a/src/deleteHybrid.jl b/src/deleteHybrid.jl index 50b860fe..ab5254f9 100644 --- a/src/deleteHybrid.jl +++ b/src/deleteHybrid.jl @@ -16,8 +16,8 @@ function identifyInCycle(net::Network,node::Node) node.hybrid || error("node $(node.number) is not hybrid, cannot identifyInCycle") start = node; - hybedge = getHybridEdge(node); - last = getOtherNode(hybedge,node); + hybedge = getparentedgeminor(node) + lastnode = getOtherNode(hybedge,node) dist = 0; queue = PriorityQueue(); path = Node[]; @@ -33,32 +33,19 @@ function identifyInCycle(net::Network,node::Node) return true, net.edges_changed, net.nodes_changed else curr = dequeue!(queue); - if(isEqual(curr,last)) + if isEqual(curr,lastnode) found = true; push!(path,curr); - else - if(!net.visited[getIndex(curr,net)]) - net.visited[getIndex(curr,net)] = true; - if(isEqual(curr,start)) - for e in curr.edge - if(!e.hybrid || e.isMajor) - other = getOtherNode(e,curr); - other.prev = curr; - dist = dist+1; - enqueue!(queue,other,dist); - end - end - else - for e in curr.edge - if(!e.hybrid || e.isMajor) - other = getOtherNode(e,curr); - if(!other.leaf && !net.visited[getIndex(other,net)]) - other.prev = curr; - dist = dist+1; - enqueue!(queue,other,dist); - end - end - end + elseif !net.visited[getIndex(curr,net)] + net.visited[getIndex(curr,net)] = true + atstart = isEqual(curr,start) + for e in curr.edge + e.isMajor || continue + other = getOtherNode(e,curr) + if atstart || (!other.leaf && !net.visited[getIndex(other,net)]) + other.prev = curr + dist = dist+1 + enqueue!(queue,other,dist) end end end @@ -149,17 +136,9 @@ function deleteHybridizationUpdate!(net::HybridNetwork, hybrid::Node, random::Bo push!(edgesRoot, edges[2]) undoContainRoot!(edgesRoot); end - @debug begin - msg = "" - if edges[1].gamma < 0.5 - msg = "strange major hybrid edge $(edges[1].number) with gamma $(edges[1].gamma) less than 0.5" - end - if edges[1].gamma == 1.0 - msg = "strange major hybrid edge $(edges[1].number) with gamma $(edges[1].gamma) equal to 1.0" - end - msg - end limit = edges[1].gamma + @debug (limit < 0.5 ? "strange major hybrid edge $(edges[1].number) with γ $limit < 0.5" : + (limit == 1.0 ? "strange major hybrid edge $(edges[1].number) with γ = $limit" : "")) if(random) minor = rand() < limit ? false : true else diff --git a/src/manipulateNet.jl b/src/manipulateNet.jl index c5747559..c8f7caa9 100644 --- a/src/manipulateNet.jl +++ b/src/manipulateNet.jl @@ -95,18 +95,16 @@ end """ hybridatnode!(net::HybridNetwork, nodeNumber::Integer) - hybridatnode(net, nodeNumber) -Change the status of edges in network `net`, +Change the direction and status of edges in network `net`, to move the hybrid node in a cycle to the node with number `nodeNumber`. This node must be in one (and only one) cycle, otherwise an error will be thrown. +Check and update the nodes' field `inCycle`. -The second method does not modify `net`, checks that it's of level 1, and -returns the new network after hybrid modification. +Output: `net` after hybrid modification. -`net` is assumed to be of level 1, that is, each blob has a +Assumption: `net` must be of level 1, that is, each blob has a single cycle with a single reticulation. -Check and update the nodes' field `inCycle`. # example @@ -150,7 +148,7 @@ Move the reticulation from `hybrid` to `newNode`, which must in the same cycle. `net` is assumed to be of level 1, but **no checks** are made and fields are supposed up-to-date. -Called by `hybridatnode!(net, node number)`, which is itself +Called by `hybridatnode!(net, nodenumber)`, which is itself called by [`undirectedOtherNetworks`](@ref). """ function hybridatnode!(net::HybridNetwork, hybrid::Node, newNode::Node) @@ -182,7 +180,13 @@ end # does not call hybridatnode! but repeats its code: oops! violates DRY principle # nodeNumber should correspond to the number assigned by readTopologyLevel1, # and the node numbers in `net` are irrelevant. -@doc (@doc hybridatnode!) hybridatnode +""" + hybridatnode(net::HybridNetwork, nodeNumber::Integer) + +Move the hybrid node in a cycle to make node number `nodeNumber` a hybrid node +Compared to [`hybridatnode!`], this method checks that `net` is of level 1 +(required) and does not modify it. +""" function hybridatnode(net0::HybridNetwork, nodeNumber::Integer) net = readTopologyLevel1(writeTopologyLevel1(net0)) # we need inCycle attributes ind = 0 diff --git a/src/readwrite.jl b/src/readwrite.jl index 0e83ece8..cfb8393e 100644 --- a/src/readwrite.jl +++ b/src/readwrite.jl @@ -750,16 +750,11 @@ function cleanAfterRead!(net::HybridNetwork, leaveRoot::Bool) nodes = copy(net.node) for n in nodes if isNodeNumIn(n,net.node) # very important to check - if size(n.edge,1) == 2 - if !n.hybrid - if !leaveRoot || !isEqual(net.node[net.root],n) #if n is the root - deleteIntNode!(net,n); - end - else - hyb = count([e.hybrid for e in n.edge]); - if hyb == 1 - deleteIntNode!(net,n); - end + if size(n.edge,1) == 2 # delete n if: + if (!n.hybrid && (!leaveRoot || !isEqual(net.node[net.root],n)) || + (n.hybrid && sum(e.hybrid for e in n.edge) == 1)) + deleteIntNode!(net,n) + continue # n was deleted: skip the rest end end if !n.hybrid diff --git a/src/traitsLikDiscrete.jl b/src/traitsLikDiscrete.jl index 92f4f830..f69fd982 100644 --- a/src/traitsLikDiscrete.jl +++ b/src/traitsLikDiscrete.jl @@ -133,7 +133,7 @@ The `rvsymbol` should be as required by [`RateVariationAcrossSites`](@ref). The network's gamma values are modified if they are missing. After that, a deep copy of the network is passed to the inner constructor. """ -function StatisticalSubstitutionModel(net::HybridNetwork, fastafile::String, +function StatisticalSubstitutionModel(net::HybridNetwork, fastafile::AbstractString, modsymbol::Symbol, rvsymbol=:noRV::Symbol, ratecategories=4::Int; maxhybrid=length(net.hybrid)::Int) for e in net.edge # check for missing or inappropriate γ values @@ -449,14 +449,14 @@ end #species, dat version, no ratemodel function fitdiscrete(net::HybridNetwork, model::SubstitutionModel, - species::Array{String}, dat::DataFrame; kwargs...) + species::Array{<:AbstractString}, dat::DataFrame; kwargs...) ratemodel = RateVariationAcrossSites(ncat=1) fitdiscrete(net, model, ratemodel, species, dat; kwargs...) end #species, dat version with ratemodel function fitdiscrete(net::HybridNetwork, model::SubstitutionModel, - ratemodel::RateVariationAcrossSites, species::Array{String}, + ratemodel::RateVariationAcrossSites, species::Array{<:AbstractString}, dat::DataFrame; kwargs...) dat2 = traitlabels2indices(dat, model) # vec of vec, indices o, net = check_matchtaxonnames!(copy(species), dat2, net) @@ -465,7 +465,7 @@ end #wrapper: species, dat version with model symbol function fitdiscrete(net::HybridNetwork, modSymbol::Symbol, - species::Array{String}, dat::DataFrame, rvSymbol=:noRV::Symbol; kwargs...) + species::Array{<:AbstractString}, dat::DataFrame, rvSymbol=:noRV::Symbol; kwargs...) rate = startingrate(net) labels = learnlabels(modSymbol, dat) if modSymbol == :JC69 @@ -498,7 +498,7 @@ end #dnadata with dnapatternweights version with ratemodel function fitdiscrete(net::HybridNetwork, model::SubstitutionModel, ratemodel::RateVariationAcrossSites,dnadata::DataFrame, - dnapatternweights::Array{Float64}; kwargs...) + dnapatternweights::Array{<:AbstractFloat}; kwargs...) dat2 = traitlabels2indices(dnadata[!,2:end], model) o, net = check_matchtaxonnames!(dnadata[:,1], dat2, net) kwargs = (:siteweights => dnapatternweights, kwargs...) @@ -508,7 +508,7 @@ end #wrapper for dna data function fitdiscrete(net::HybridNetwork, modSymbol::Symbol, dnadata::DataFrame, - dnapatternweights::Array{Float64}, rvSymbol=:noRV::Symbol; kwargs...) + dnapatternweights::Array{<:AbstractFloat}, rvSymbol=:noRV::Symbol; kwargs...) rate = startingrate(net) if modSymbol == :JC69 model = JC69([1.0], true) # 1.0 instead of rate because relative version @@ -580,7 +580,7 @@ function fit!(obj::SSM; optimizeQ=true::Bool, optimizeRVAS=true::Bool, return obj end if optimizeQ - function loglikfun(x::Vector{Float64}, grad::Vector{Float64}) # modifies obj + function loglikfun(x::Vector{<:AbstractFloat}, grad::Vector{<:AbstractFloat}) # modifies obj setrates!(obj.model, x) res = discrete_corelikelihood!(obj) verbose && println("loglik: $res, model rates: $x") @@ -609,7 +609,7 @@ function fit!(obj::SSM; optimizeQ=true::Bool, optimizeRVAS=true::Bool, verbose && println("got $(round(fmax, digits=5)) at $(round.(xmax, digits=5)) after $(optQ.numevals) iterations (return code $(ret))") end if optimizeRVAS - function loglikfunRVAS(alpha::Vector{Float64}, grad::Vector{Float64}) + function loglikfunRVAS(alpha::Vector{<:AbstractFloat}, grad::Vector{<:AbstractFloat}) setparameters!(obj.ratemodel, alpha) res = discrete_corelikelihood!(obj) verbose && println("loglik: $res, rate variation model shape parameter alpha: $(alpha[1])") diff --git a/src/update.jl b/src/update.jl index a952ebc8..85ddfe21 100644 --- a/src/update.jl +++ b/src/update.jl @@ -21,95 +21,63 @@ # unlike updateInCycle recursive, but it is expected # to be much faster function updateInCycle!(net::HybridNetwork,node::Node) - if(node.hybrid) - start = node; - node.inCycle = node.number; - node.k = 1; - hybedge = getHybridEdge(node); - hybedge.inCycle = node.number; - last = getOtherNode(hybedge,node); - dist = 0; - queue = PriorityQueue(); - path = Node[]; - net.edges_changed = Edge[]; - net.nodes_changed = Node[]; - push!(net.edges_changed,hybedge); - push!(net.nodes_changed,node); - found = false; - net.visited = [false for i = 1:size(net.node,1)]; - enqueue!(queue,node,dist); -# println("start is $(start.number), last is $(last.number)") - while(!found) - if(isempty(queue)) - return false, true, net.edges_changed, net.nodes_changed - else - #println("at this moment, queue is $([n.number for n in queue])") - curr = dequeue!(queue); - # println("curr $(curr.number)") - if(isEqual(curr,last)) - # println("encuentra a la last $(curr.number)") - found = true; - push!(path,curr); - else - if(!net.visited[getIndex(curr,net)]) - # println("curr not visited $(curr.number)") - net.visited[getIndex(curr,net)] = true; - if(isEqual(curr,start)) - #println("curr is start") - for e in curr.edge - if(!e.hybrid || e.isMajor) - other = getOtherNode(e,curr); - # println("other is $(other.number), pushed to queue") - other.prev = curr; - dist = dist+1; - enqueue!(queue,other,dist); - end - end - else - for e in curr.edge - if(!e.hybrid || e.isMajor) - other = getOtherNode(e,curr); - # println("other is $(other.number)") - if(!other.leaf && !net.visited[getIndex(other,net)]) - # println("dice que other $(other.number) no es leaf ni visited, lo mete a queue") - other.prev = curr; - dist = dist+1; - enqueue!(queue,other,dist); - end - end - end - end - end + node.hybrid || error("node is not hybrid") + start = node + node.inCycle = node.number + node.k = 1 + hybedge = getparentedgeminor(node) + hybedge.inCycle = node.number + lastnode = getOtherNode(hybedge,node) + dist = 0 + queue = PriorityQueue() + path = Node[] + net.edges_changed = Edge[] + net.nodes_changed = Node[] + push!(net.edges_changed,hybedge) + push!(net.nodes_changed,node) + found = false + net.visited = falses(length(net.node)) + enqueue!(queue,node,dist) + while !found + if isempty(queue) + return false, true, net.edges_changed, net.nodes_changed + end + curr = dequeue!(queue) + if isEqual(curr,lastnode) + found = true + push!(path,curr) + elseif !net.visited[getIndex(curr,net)] + net.visited[getIndex(curr,net)] = true + atstart = isEqual(curr,start) + for e in curr.edge + e.isMajor || continue + other = getOtherNode(e,curr) + if atstart || (!other.leaf && !net.visited[getIndex(other,net)]) + other.prev = curr + dist = dist+1 + enqueue!(queue,other,dist) end end - end # end while - # println("after while, path has $([n.number for n in path])") - curr = pop!(path); - # println("path should be empty here: $(isempty(path))") - while(!isEqual(curr, start)) - # println("curr es $(curr.number)") - if(curr.inCycle!= -1) - push!(path,curr); - curr = curr.prev; - else - curr.inCycle = start.number; - push!(net.nodes_changed, curr); - node.k = node.k + 1; - edge = getConnectingEdge(curr,curr.prev); - edge.inCycle = start.number; - push!(net.edges_changed, edge); - curr = curr.prev; - end end - if(!isempty(path) || node.k<3) - @debug "warning: new cycle intersects existing cycle" - return false, false, net.edges_changed, net.nodes_changed + end # end while + curr = pop!(path) + while !isEqual(curr, start) + if curr.inCycle!= -1 + push!(path,curr) + curr = curr.prev else - return true, false, net.edges_changed, net.nodes_changed + curr.inCycle = start.number + push!(net.nodes_changed, curr) + node.k = node.k + 1 + edge = getConnectingEdge(curr,curr.prev) + edge.inCycle = start.number + push!(net.edges_changed, edge) + curr = curr.prev end - else - error("node is not hybrid") end + flag = isempty(path) # || node.k<3 + !flag && @debug "warning: new cycle intersects existing cycle" + return flag, false, net.edges_changed, net.nodes_changed end """ @@ -218,7 +186,7 @@ function updateGammaz!(net::HybridNetwork, node::Node, allow::Bool) edge_maj, edge_min, tree_edge2 = hybridEdges(node); other_maj = getOtherNode(edge_maj,node); other_min = getOtherNode(edge_min,node); - node.k > 2 || return false, [] + node.k > 2 || error("cycle with only 2 nodes: parallel edges") # return false, [] if(node.k == 4) # could be bad diamond I,II # net.numTaxa >= 5 || return false, [] #checked inside optTopRuns now edgebla,edge_min2,tree_edge3 = hybridEdges(other_min); @@ -346,8 +314,6 @@ end # by nodesChanged vector (obtained from updateInCycle) # warning: needs updateInCycle for all hybrids before running this function updatePartition!(net::HybridNetwork, nodesChanged::Vector{Node}) - length(nodesChanged) > 2 || error("incycle with only 2 nodes in it after updateGammaz") - #println("nodesChanged are $([n.number for n in nodesChanged])") if(net.numHybrids == 0) net.partition = Partition[] end diff --git a/test/test_auxillary.jl b/test/test_auxillary.jl index fc22ef71..906b05a4 100644 --- a/test/test_auxillary.jl +++ b/test/test_auxillary.jl @@ -7,6 +7,10 @@ using CSV @testset "auxiliary" begin +@testset "read level 1: hyb edge at root then 2-cycle" begin +@test_throws "cycle with only 2 nodes" readTopologyLevel1("((t9,((t3,t2))#H12:::0.52),#H12:::0.48);") +end + @testset "setlengths and setgammas" begin originalstdout = stdout redirect_stdout(devnull) # requires julia v1.6 diff --git a/test/test_traitLikDiscrete.jl b/test/test_traitLikDiscrete.jl index 11302b68..5c4cd067 100644 --- a/test/test_traitLikDiscrete.jl +++ b/test/test_traitLikDiscrete.jl @@ -231,7 +231,7 @@ redirect_stdout(originalstdout) @test StatsBase.loglikelihood(fit2) ≈ -2.6447247349802496 atol=2e-4 m2.rate[:] = [0.2, 0.3]; dat2 = DataFrame(trait1= ["hi","hi","lo","lo","hi"], trait2=["hi",missing,"lo","hi","lo"]); -fit3 = (@test_logs fitdiscrete(net, m2, species, dat2; optimizeQ=false, optimizeRVAS=false)) +fit3 = (@test_logs fitdiscrete(net, m2, String7.(species), dat2; optimizeQ=false, optimizeRVAS=false)) @test fit3.loglik ≈ (-2.6754091090953693 - 2.1207856874033491) PhyloNetworks.fit!(fit3; optimizeQ=true, optimizeRVAS=false)