-
Notifications
You must be signed in to change notification settings - Fork 120
/
Copy pathmutualinfo.jl
41 lines (32 loc) · 1.14 KB
/
mutualinfo.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# Mutual Information
function _mutualinfo(A::AbstractMatrix{<:Integer}, normed::Bool)
N = sum(A)
(N == 0.0) && return 0.0
rows = sum(A, dims=2)
cols = sum(A, dims=1)
entA = entropy(A)
entArows = entropy(rows)
entAcols = entropy(cols)
hck = (entA - entAcols)/N
hc = entArows/N + log(N)
hk = entAcols/N + log(N)
mi = hc - hck
return if normed
2*mi/(hc+hk)
else
mi
end
end
"""
mutualinfo(a, b; normed=true) -> Float64
Compute the *mutual information* between the two clusterings of the same
data points.
`a` and `b` can be either [`ClusteringResult`](@ref) instances or
assignments vectors (`AbstractVector{<:Integer}`).
If `normed` parameter is `true` the return value is the normalized mutual information (symmetric uncertainty),
see "Data Mining Practical Machine Tools and Techniques", Witten & Frank 2005.
# References
> Vinh, Epps, and Bailey, (2009). “Information theoretic measures for clusterings comparison”.
Proceedings of the 26th Annual International Conference on Machine Learning - ICML ‘09.
"""
mutualinfo(a, b; normed::Bool=true) = _mutualinfo(counts(a, b), normed)