Skip to content

Commit

Permalink
for v0.16.2 (#200)
Browse files Browse the repository at this point in the history
- bug fix in writeTableCF: columns with taxon name not restricted to be 1:4
- new: posterior_loghybridweight for the posterior probability of a trait passed by "gene flow"
  • Loading branch information
cecileane authored May 11, 2023
1 parent e4cbb04 commit 524fcb8
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 36 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "PhyloNetworks"
uuid = "33ad39ac-ed31-50eb-9b15-43d0656eaa72"
license = "MIT"
version = "0.16.1"
version = "0.16.2"

[deps]
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
Expand Down
67 changes: 34 additions & 33 deletions src/readData.jl
Original file line number Diff line number Diff line change
Expand Up @@ -162,28 +162,28 @@ end

function readTableCF!(df::DataFrames.DataFrame; summaryfile=""::AbstractString, kwargs...)
@debug "assume the numbers for the taxon read from the observed CF table match the numbers given to the taxon when creating the object network"
taxoncolnames = [[:t1, :tx1, :tax1, :taxon1], [:t2, :tx2, :tax2, :taxon2],
[:t3, :tx3, :tax3, :taxon3], [:t4, :tx4, :tax4, :taxon4] ]
taxoncol = [findfirst(x-> x taxoncolnames[1], DataFrames.propertynames(df)),
findfirst(x-> x taxoncolnames[2], DataFrames.propertynames(df)),
findfirst(x-> x taxoncolnames[3], DataFrames.propertynames(df)),
findfirst(x-> x taxoncolnames[4], DataFrames.propertynames(df))]
alternativecolnames = [ # obsCF12 is as exported by fittedQuartetCF()
[:CF12_34, Symbol("CF12.34"), :obsCF12],
[:CF13_24, Symbol("CF13.24"), :obsCF13],
[:CF14_23, Symbol("CF14.23"), :obsCF14]
[:CF12_34, Symbol("CF12.34"), :obsCF12, :CF1234],
[:CF13_24, Symbol("CF13.24"), :obsCF13, :CF1324],
[:CF14_23, Symbol("CF14.23"), :obsCF14, :CF1423]
]
obsCFcol = [findfirst(x-> x alternativecolnames[1], DataFrames.propertynames(df)),
findfirst(x-> x alternativecolnames[2], DataFrames.propertynames(df)),
findfirst(x-> x alternativecolnames[3], DataFrames.propertynames(df))]
ngenecol = findfirst(isequal(:ngenes), DataFrames.propertynames(df))
withngenes = ngenecol !== nothing
if nothing in obsCFcol # one or more col names for CFs were not found
size(df,2) == (withngenes ? 8 : 7) ||
@warn """Column names for quartet concordance factors (CFs) were not recognized.
Was expecting CF12_34, CF13_24 and CF14_23 for the columns with CF values,
or CF12.34 or obsCF12, etc.
Will assume that the first 4 columns give the taxon names, and that columns 5-7 give the CFs."""
obsCFcol = [5,6,7] # assuming CFs are in columns 5,6,7, with colname mismatch
end
minimum(obsCFcol) > 4 ||
error("CFs found in columns $obsCFcol, but taxon labels expected in columns 1-4")
# fixit: what about columns giving the taxon names: always assumed to be columns 1-4? No warning if not?
columns = [[1,2,3,4]; obsCFcol]
nothing in taxoncol && error("columns for taxon names were not found")
nothing in obsCFcol && error(
"""Could not identify columns with quartet concordance factors (qCFs).
Was expecting CF12_34, CF13_24 and CF14_23 for the columns with CF values,
or CF12.34 or obsCF12, etc.""")
columns = [taxoncol; obsCFcol]
if withngenes push!(columns, ngenecol) end

d = readTableCF!(df, columns; kwargs...)
Expand Down Expand Up @@ -661,15 +661,16 @@ data: [1.0, 0.0, 0.0, 0.5]
julia> df = writeTableCF(q,t); # to get a DataFrame that can be saved to a file later
julia> show(df, allcols=true)
5×8 DataFrame
Row │ t1 t2 t3 t4 CF12_34 CF13_24 CF14_23 ngenes
│ String String String String Float64 Float64 Float64 Float64
─────┼────────────────────────────────────────────────────────────────────
1 │ A B D E 0.25 0.25 0.5 2.0
2 │ A B D O 0.5 0.5 0.0 1.0
3 │ A B E O 1.0 0.0 0.0 0.5
4 │ A D E O 1.0 0.0 0.0 0.5
5 │ B D E O 0.0 0.0 0.0 0.0
5×9 DataFrame
Row │ qind t1 t2 t3 t4 CF12_34 CF13_24 CF14_23 ngenes
│ Int64 String String String String Float64 Float64 Float64 Float64
─────┼───────────────────────────────────────────────────────────────────────────
1 │ 1 A B D E 0.25 0.25 0.5 2.0
2 │ 2 A B D O 0.5 0.5 0.0 1.0
3 │ 3 A B E O 1.0 0.0 0.0 0.5
4 │ 4 A D E O 1.0 0.0 0.0 0.5
5 │ 5 B D E O 0.0 0.0 0.0 0.0
julia> # using CSV; CSV.write(df, "filename.csv");
julia> tree2 = readTopology("((A,(B,D)),E);");
Expand All @@ -680,15 +681,15 @@ Reading in trees, looking at 5 quartets in each...
**
julia> show(writeTableCF(q,t), allcols=true)
8 DataFrame
Row │ t1 t2 t3 t4 CF12_34 CF13_24 CF14_23 ngenes
│ String String String String Float64 Float64 Float64 Float64
─────┼───────────────────────────────────────────────────────────────────────
1 │ A B D E 0.333333 0.333333 0.333333 3.0
2 │ A B D O 0.5 0.5 0.0 2.0
3 │ A B E O 1.0 0.0 0.0 1.0
4 │ A D E O 1.0 0.0 0.0 1.0
5 │ B D E O 0.0 0.0 0.0 0.0
9 DataFrame
Row │ qind t1 t2 t3 t4 CF12_34 CF13_24 CF14_23 ngenes
Int64 String String String String Float64 Float64 Float64 Float64
─────┼──────────────────────────────────────────────────────────────────────────────
1 │ 1 A B D E 0.333333 0.333333 0.333333 3.0
2 │ 2 A B D O 0.5 0.5 0.0 2.0
3 │ 3 A B E O 1.0 0.0 0.0 1.0
4 │ 4 A D E O 1.0 0.0 0.0 1.0
5 │ 5 B D E O 0.0 0.0 0.0 0.0
```
"""
function countquartetsintrees(tree::Vector{HybridNetwork},
Expand Down
60 changes: 58 additions & 2 deletions src/traitsLikDiscrete.jl
Original file line number Diff line number Diff line change
Expand Up @@ -791,11 +791,15 @@ end
"""
posterior_logtreeweight(obj::SSM, trait = 1)
Return an array A such that A[t] = log of P(tree `t` and trait `trait`)
if a single `trait` is requested, or A[i,t]= log of P(tree `t` and trait `i`)
Array A of log-posterior probabilities for each tree displayed in the network:
such that A[t] = log of P(tree `t` | trait `trait`)
if a single `trait` is requested, or A[t,i]= log of P(tree `t` | trait `i`)
if `trait` is a vector or range (e.g. `trait = 1:obj.nsites`).
These probabilities are conditional on the model parameters in `obj`.
Displayed trees are listed in the order in which they are stored in the fitted
model object `obj`.
**Precondition**: `_loglikcache` updated by [`discrete_corelikelihood!`](@ref)
# examples
Expand Down Expand Up @@ -835,6 +839,58 @@ function posterior_logtreeweight(obj::SSM, trait = 1)
return ts
end

"""
posterior_loghybridweight(obj::SSM, hybrid_name, trait = 1)
posterior_loghybridweight(obj::SSM, edge_number, trait = 1)
Log-posterior probability for all trees displaying the minor parent edge
of hybrid node named `hybrid_name`, or displaying the edge number `edge_number`.
That is: log of P(hybrid minor parent | trait) if a single `trait` is requested,
or A[i]= log of P(hybrid minor parent | trait `i`)
if `trait` is a vector or range (e.g. `trait = 1:obj.nsites`).
These probabilities are conditional on the model parameters in `obj`.
**Precondition**: `_loglikcache` updated by [`discrete_corelikelihood!`](@ref)
# examples
```jldoctest
julia> net = readTopology("(((A:2.0,(B:1.0)#H1:0.1::0.9):1.5,(C:0.6,#H1:1.0::0.1):1.0):0.5,D:2.0);");
julia> m1 = BinaryTraitSubstitutionModel([0.1, 0.1], ["lo", "hi"]); # arbitrary rates
julia> using DataFrames
julia> dat = DataFrame(species=["C","A","B","D"], trait=["hi","lo","lo","hi"]);
julia> fit = fitdiscrete(net, m1, dat); # optimized rates: α=0.27 and β=0.35
julia> plhw = PhyloNetworks.posterior_loghybridweight(fit, "H1");
julia> round(exp(plhw), digits=5) # posterior probability of going through minor hybrid edge
0.08017
julia> hn = net.node[3]; getparentedgeminor(hn).gamma # prior probability
0.1
```
"""
function posterior_loghybridweight(obj::SSM, hybridname::String, trait = 1)
hn_index = findfirst(n -> n.name == hybridname, obj.net.node)
isnothing(hn_index) && error("node named $hybridname not found")
hn = obj.net.node[hn_index]
hn.hybrid || error("node named $hybridname is not a hybrid node")
me = getparentedgeminor(hn)
posterior_loghybridweight(obj, me.number, trait)
end
function posterior_loghybridweight(obj::SSM, edgenum::Integer, trait = 1)
tpp = posterior_logtreeweight(obj, trait) # size: (ntree,) or (ntree,ntraits)
hasedge = tree -> any(e.number == edgenum for e in tree.edge)
tokeep = map(hasedge, obj.displayedtree)
tppe = view(tpp, tokeep, :) # makes it a matrix
epp = dropdims(mapslices(logsumexp, tppe, dims=1); dims=2)
return (size(epp)==(1,) ? epp[1] : epp) # scalar or vector
end

"""
traitlabels2indices(data, model::SubstitutionModel)
Expand Down
1 change: 1 addition & 0 deletions test/test_traitLikDiscrete.jl
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ asr = ancestralStateReconstruction(fit1)
pltw = [-0.08356534477069566, -2.5236181051014333]
@test PhyloNetworks.posterior_logtreeweight(fit1) pltw atol=1e-5
@test PhyloNetworks.posterior_logtreeweight(fit1, 1:1) reshape(pltw, (2,1)) atol=1e-5
@test PhyloNetworks.posterior_loghybridweight(fit1, "H1") -2.5236227134322293

end # end of testset, fixed topology

Expand Down

2 comments on commit 524fcb8

@cecileane
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/83369

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.16.2 -m "<description of version>" 524fcb8456f5f4a1ec4ef1dbecc26dc0345decf7
git push origin v0.16.2

Please sign in to comment.