From 4207e42ac32ab6ed9434edb7ebe62a408f892d44 Mon Sep 17 00:00:00 2001 From: Daniel Karrasch Date: Thu, 3 Oct 2019 19:10:27 +0200 Subject: [PATCH 1/7] merge wmetrics.jl into metrics.jl --- src/Distances.jl | 3 - src/metrics.jl | 151 ++++++++++++++++++++++++++++++++++++++++- src/wmetrics.jl | 170 ----------------------------------------------- 3 files changed, 148 insertions(+), 176 deletions(-) diff --git a/src/Distances.jl b/src/Distances.jl index faef4f8..b447ee5 100644 --- a/src/Distances.jl +++ b/src/Distances.jl @@ -1,5 +1,3 @@ -__precompile__() - module Distances using LinearAlgebra @@ -102,7 +100,6 @@ export include("common.jl") include("generic.jl") include("metrics.jl") -include("wmetrics.jl") include("haversine.jl") include("mahalanobis.jl") include("bhattacharyya.jl") diff --git a/src/metrics.jl b/src/metrics.jl index 7439f35..601806e 100644 --- a/src/metrics.jl +++ b/src/metrics.jl @@ -5,6 +5,7 @@ # Metric types # ########################################################### +const RealAbstractArray{T <: Real} = AbstractArray{T} struct Euclidean <: Metric thresh::Float64 @@ -13,7 +14,12 @@ struct SqEuclidean <: SemiMetric thresh::Float64 end struct Chebyshev <: Metric end + struct Cityblock <: Metric end +struct WeightedCityblock{W <: RealAbstractArray} <: Metric + weights::W +end + struct TotalVariation <: Metric end struct Jaccard <: Metric end struct RogersTanimoto <: Metric end @@ -21,8 +27,15 @@ struct RogersTanimoto <: Metric end struct Minkowski{T <: Real} <: Metric p::T end +struct WeightedMinkowski{W <: RealAbstractArray,T <: Real} <: Metric + weights::W + p::T +end struct Hamming <: Metric end +struct WeightedHamming{W <: RealAbstractArray} <: Metric + weights::W +end struct CosineDist <: SemiMetric end # CorrDist is excluded from `UnionMetrics` @@ -104,9 +117,6 @@ struct PeriodicEuclidean{W <: AbstractArray{<: Real}} <: Metric periods::W end -const metrics = (Euclidean,SqEuclidean,PeriodicEuclidean,Chebyshev,Cityblock,TotalVariation,Minkowski,Hamming,Jaccard,RogersTanimoto,CosineDist,ChiSqDist,KLDivergence,RenyiDivergence,BrayCurtis,JSDivergence,SpanNormDist,GenKLDivergence) -const UnionMetrics = Union{metrics...} - """ Euclidean([thresh]) @@ -137,6 +147,10 @@ julia> pairwise(Euclidean(1e-12), x, x) """ Euclidean() = Euclidean(0) +struct WeightedEuclidean{W <: RealAbstractArray} <: Metric + weights::W +end + """ SqEuclidean([thresh]) @@ -145,6 +159,10 @@ see [`Euclidean`](@ref). """ SqEuclidean() = SqEuclidean(0) +struct WeightedSqEuclidean{W <: RealAbstractArray} <: SemiMetric + weights::W +end + """ PeriodicEuclidean(L) @@ -165,6 +183,11 @@ julia> evaluate(PeriodicEuclidean(L), x, y) """ PeriodicEuclidean() = PeriodicEuclidean(Int[]) +const metrics = (Euclidean,SqEuclidean,PeriodicEuclidean,Chebyshev,Cityblock,TotalVariation,Minkowski,Hamming,Jaccard,RogersTanimoto,CosineDist,ChiSqDist,KLDivergence,RenyiDivergence,BrayCurtis,JSDivergence,SpanNormDist,GenKLDivergence) +const weightedmetrics = (WeightedEuclidean,WeightedSqEuclidean,WeightedCityblock,WeightedMinkowski,WeightedHamming) +const UnionWeightedMetrics{W} = Union{map(M->M{W}, weightedmetrics)...} +const UnionMetrics = Union{metrics...} + ########################################################### # # Implementations @@ -175,6 +198,8 @@ const ArraySlice{T} = SubArray{T,1,Array{T,2},Tuple{Base.Slice{Base.OneTo{Int}}, @inline parameters(::UnionMetrics) = nothing +Base.eltype(x::UnionWeightedMetrics) = eltype(x.weights) + # breaks the implementation into eval_start, eval_op, eval_reduce and eval_end # Specialized for Arrays and avoids a branch on the size @@ -256,16 +281,58 @@ end end return eval_end(d, s) end + +@inline function _evaluate(d::UnionWeightedMetrics, a::AbstractArray, b::AbstractArray) + @boundscheck if length(a) != length(b) + throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b)).")) + end + @boundscheck if length(a) != length(d.weights) + throw(DimensionMismatch("arrays have length $(length(a)) but weights have length $(length(d.weights)).")) + end + if length(a) == 0 + return zero(result_type(d, a, b)) + end + @inbounds begin + s = eval_start(d, a, b) + if size(a) == size(b) + @simd for I in eachindex(a, b, d.weights) + ai = a[I] + bi = b[I] + wi = d.weights[I] + s = eval_reduce(d, s, eval_op(d, ai, bi, wi)) + end + else + for (Ia, Ib, Iw) in zip(eachindex(a), eachindex(b), eachindex(d.weights)) + ai = a[Ia] + bi = b[Ib] + wi = d.weights[Iw] + s = eval_reduce(d, s, eval_op(d, ai, bi, wi)) + end + end + end + return eval_end(d, s) +end + result_type(dist::UnionMetrics, Ta::Type, Tb::Type) = typeof(evaluate(dist, oneunit(Ta), oneunit(Tb))) eval_start(d::UnionMetrics, a::AbstractArray, b::AbstractArray) = zero(result_type(d, a, b)) eval_end(d::UnionMetrics, s) = s +result_type(dist::UnionWeightedMetrics, Ta::Type, Tb::Type) = + typeof(evaluate(dist, oneunit(Ta), oneunit(Tb))) +@inline function eval_start(d::UnionWeightedMetrics, a::AbstractArray, b::AbstractArray) + zero(result_type(d, a, b)) +end +eval_end(d::UnionWeightedMetrics, s) = s for M in metrics @eval @inline (dist::$M)(a::AbstractArray, b::AbstractArray) = _evaluate(dist, a, b) @eval @inline (dist::$M)(a::Number, b::Number) = eval_end(dist, eval_op(dist, a, b)) end +for M in weightedmetrics + @eval (dist::$M)(a::AbstractArray, b::AbstractArray) = _evaluate(dist, a, b) + @eval (dist::$M)(a::Number, b::Number) = eval_end(dist, eval_op(dist, a, b, oneunit(eltype(dist)))) +end # SqEuclidean @inline eval_op(::SqEuclidean, ai, bi) = abs2(ai - bi) @@ -274,6 +341,11 @@ end sqeuclidean(a::AbstractArray, b::AbstractArray) = SqEuclidean()(a, b) sqeuclidean(a::Number, b::Number) = SqEuclidean()(a, b) +# Weighted Squared Euclidean +@inline eval_op(::WeightedSqEuclidean, ai, bi, wi) = abs2(ai - bi) * wi +@inline eval_reduce(::WeightedSqEuclidean, s1, s2) = s1 + s2 +wsqeuclidean(a::AbstractArray, b::AbstractArray, w::AbstractArray) = WeightedSqEuclidean(w)(a, b) + # Euclidean @inline eval_op(::Euclidean, ai, bi) = abs2(ai - bi) @inline eval_reduce(::Euclidean, s1, s2) = s1 + s2 @@ -281,6 +353,12 @@ eval_end(::Euclidean, s) = sqrt(s) euclidean(a::AbstractArray, b::AbstractArray) = Euclidean()(a, b) euclidean(a::Number, b::Number) = Euclidean()(a, b) +# Weighted Euclidean +@inline eval_op(::WeightedEuclidean, ai, bi, wi) = abs2(ai - bi) * wi +@inline eval_reduce(::WeightedEuclidean, s1, s2) = s1 + s2 +@inline eval_end(::WeightedEuclidean, s) = sqrt(s) +weuclidean(a::AbstractArray, b::AbstractArray, w::AbstractArray) = WeightedEuclidean(w)(a, b) + # PeriodicEuclidean Base.eltype(d::PeriodicEuclidean) = eltype(d.periods) @inline parameters(d::PeriodicEuclidean) = d.periods @@ -307,6 +385,11 @@ peuclidean(a::Number, b::Number, p::Real) = PeriodicEuclidean([p])(a, b) cityblock(a::AbstractArray, b::AbstractArray) = Cityblock()(a, b) cityblock(a::Number, b::Number) = Cityblock()(a, b) +# City Block +@inline eval_op(::WeightedCityblock, ai, bi, wi) = abs((ai - bi) * wi) +@inline eval_reduce(::WeightedCityblock, s1, s2) = s1 + s2 +wcityblock(a::AbstractArray, b::AbstractArray, w::AbstractArray) = WeightedCityblock(w)(a, b) + # Total variation @inline eval_op(::TotalVariation, ai, bi) = abs(ai - bi) @inline eval_reduce(::TotalVariation, s1, s2) = s1 + s2 @@ -329,12 +412,23 @@ eval_end(dist::Minkowski, s) = s.^(1 / dist.p) minkowski(a::AbstractArray, b::AbstractArray, p::Real) = Minkowski(p)(a, b) minkowski(a::Number, b::Number, p::Real) = Minkowski(p)(a, b) +# Weighted Minkowski +@inline eval_op(dist::WeightedMinkowski, ai, bi, wi) = abs(ai - bi).^dist.p * wi +@inline eval_reduce(::WeightedMinkowski, s1, s2) = s1 + s2 +eval_end(dist::WeightedMinkowski, s) = s.^(1 / dist.p) +wminkowski(a::AbstractArray, b::AbstractArray, w::AbstractArray, p::Real) = WeightedMinkowski(w, p)(a, b) + # Hamming @inline eval_op(::Hamming, ai, bi) = ai != bi ? 1 : 0 @inline eval_reduce(::Hamming, s1, s2) = s1 + s2 hamming(a::AbstractArray, b::AbstractArray) = Hamming()(a, b) hamming(a::Number, b::Number) = Hamming()(a, b) +# WeightedHamming +@inline eval_op(::WeightedHamming, ai, bi, wi) = ai != bi ? wi : zero(eltype(wi)) +@inline eval_reduce(::WeightedHamming, s1, s2) = s1 + s2 +whamming(a::AbstractArray, b::AbstractArray, w::AbstractArray) = WeightedHamming(w)(a, b) + # Cosine dist @inline function eval_start(dist::CosineDist, a::AbstractArray, b::AbstractArray) T = Base.promote_typeof(eval_op(dist, oneunit(eltype(a)), oneunit(eltype(b)))...) @@ -626,6 +720,42 @@ function _pairwise!(r::AbstractMatrix, dist::SqEuclidean, a::AbstractMatrix) r end +# Weighted SqEuclidean +function _pairwise!(r::AbstractMatrix, dist::WeightedSqEuclidean, + a::AbstractMatrix, b::AbstractMatrix) + w = dist.weights + m, na, nb = get_pairwise_dims(length(w), r, a, b) + + sa2 = wsumsq_percol(w, a) + sb2 = wsumsq_percol(w, b) + mul!(r, a', b .* w) + for j = 1:nb + @simd for i = 1:na + @inbounds r[i, j] = sa2[i] + sb2[j] - 2 * r[i, j] + end + end + r +end +function _pairwise!(r::AbstractMatrix, dist::WeightedSqEuclidean, + a::AbstractMatrix) + w = dist.weights + m, n = get_pairwise_dims(length(w), r, a) + + sa2 = wsumsq_percol(w, a) + mul!(r, a', a .* w) + + for j = 1:n + for i = 1:(j - 1) + @inbounds r[i, j] = r[j, i] + end + @inbounds r[j, j] = 0 + @simd for i = (j + 1):n + @inbounds r[i, j] = sa2[i] + sa2[j] - 2 * r[i, j] + end + end + r +end + # Euclidean function _pairwise!(r::AbstractMatrix, dist::Euclidean, a::AbstractMatrix, b::AbstractMatrix) @@ -681,6 +811,21 @@ function _pairwise!(r::AbstractMatrix, dist::Euclidean, a::AbstractMatrix) r end +# Weighted Euclidean +function colwise!(r::AbstractArray, dist::WeightedEuclidean, a::AbstractMatrix, b::AbstractMatrix) + sqrt!(colwise!(r, WeightedSqEuclidean(dist.weights), a, b)) +end +function colwise!(r::AbstractArray, dist::WeightedEuclidean, a::AbstractVector, b::AbstractMatrix) + sqrt!(colwise!(r, WeightedSqEuclidean(dist.weights), a, b)) +end +function _pairwise!(r::AbstractMatrix, dist::WeightedEuclidean, + a::AbstractMatrix, b::AbstractMatrix) + sqrt!(_pairwise!(r, WeightedSqEuclidean(dist.weights), a, b)) +end +function _pairwise!(r::AbstractMatrix, dist::WeightedEuclidean, a::AbstractMatrix) + sqrt!(_pairwise!(r, WeightedSqEuclidean(dist.weights), a)) +end + # CosineDist function _pairwise!(r::AbstractMatrix, dist::CosineDist, diff --git a/src/wmetrics.jl b/src/wmetrics.jl index 9e41bdc..e69de29 100644 --- a/src/wmetrics.jl +++ b/src/wmetrics.jl @@ -1,170 +0,0 @@ -# Weighted metrics - - -########################################################### -# -# Metric types -# -########################################################### -const RealAbstractArray{T <: Real} = AbstractArray{T} - - -struct WeightedEuclidean{W <: RealAbstractArray} <: Metric - weights::W -end - -struct WeightedSqEuclidean{W <: RealAbstractArray} <: SemiMetric - weights::W -end - -struct WeightedCityblock{W <: RealAbstractArray} <: Metric - weights::W -end - -struct WeightedMinkowski{W <: RealAbstractArray,T <: Real} <: Metric - weights::W - p::T -end - -struct WeightedHamming{W <: RealAbstractArray} <: Metric - weights::W -end - -const weightedmetrics = (WeightedEuclidean,WeightedSqEuclidean,WeightedCityblock,WeightedMinkowski,WeightedHamming) -const UnionWeightedMetrics{W} = Union{map(M->M{W}, weightedmetrics)...} -Base.eltype(x::UnionWeightedMetrics) = eltype(x.weights) -########################################################### -# -# Implementations -# -########################################################### - -result_type(dist::UnionWeightedMetrics, Ta::Type, Tb::Type) = - typeof(evaluate(dist, oneunit(Ta), oneunit(Tb))) - -@inline function eval_start(d::UnionWeightedMetrics, a::AbstractArray, b::AbstractArray) - zero(result_type(d, a, b)) -end -eval_end(d::UnionWeightedMetrics, s) = s - - - -@inline function _evaluate(d::UnionWeightedMetrics, a::AbstractArray, b::AbstractArray) - @boundscheck if length(a) != length(b) - throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b)).")) - end - @boundscheck if length(a) != length(d.weights) - throw(DimensionMismatch("arrays have length $(length(a)) but weights have length $(length(d.weights)).")) - end - if length(a) == 0 - return zero(result_type(d, a, b)) - end - @inbounds begin - s = eval_start(d, a, b) - if size(a) == size(b) - @simd for I in eachindex(a, b, d.weights) - ai = a[I] - bi = b[I] - wi = d.weights[I] - s = eval_reduce(d, s, eval_op(d, ai, bi, wi)) - end - else - for (Ia, Ib, Iw) in zip(eachindex(a), eachindex(b), eachindex(d.weights)) - ai = a[Ia] - bi = b[Ib] - wi = d.weights[Iw] - s = eval_reduce(d, s, eval_op(d, ai, bi, wi)) - end - end - end - return eval_end(d, s) -end - -for M in weightedmetrics - @eval (dist::$M)(a::AbstractArray, b::AbstractArray) = _evaluate(dist, a, b) - @eval (dist::$M)(a::Number, b::Number) = eval_end(dist, eval_op(dist, a, b, oneunit(eltype(dist)))) -end - -# Squared Euclidean -@inline eval_op(::WeightedSqEuclidean, ai, bi, wi) = abs2(ai - bi) * wi -@inline eval_reduce(::WeightedSqEuclidean, s1, s2) = s1 + s2 -wsqeuclidean(a::AbstractArray, b::AbstractArray, w::AbstractArray) = WeightedSqEuclidean(w)(a, b) - -# Weighted Euclidean -@inline eval_op(::WeightedEuclidean, ai, bi, wi) = abs2(ai - bi) * wi -@inline eval_reduce(::WeightedEuclidean, s1, s2) = s1 + s2 -@inline eval_end(::WeightedEuclidean, s) = sqrt(s) -weuclidean(a::AbstractArray, b::AbstractArray, w::AbstractArray) = WeightedEuclidean(w)(a, b) - -# City Block -@inline eval_op(::WeightedCityblock, ai, bi, wi) = abs((ai - bi) * wi) -@inline eval_reduce(::WeightedCityblock, s1, s2) = s1 + s2 -wcityblock(a::AbstractArray, b::AbstractArray, w::AbstractArray) = WeightedCityblock(w)(a, b) - -# Minkowski -@inline eval_op(dist::WeightedMinkowski, ai, bi, wi) = abs(ai - bi).^dist.p * wi -@inline eval_reduce(::WeightedMinkowski, s1, s2) = s1 + s2 -eval_end(dist::WeightedMinkowski, s) = s.^(1 / dist.p) -wminkowski(a::AbstractArray, b::AbstractArray, w::AbstractArray, p::Real) = WeightedMinkowski(w, p)(a, b) - -# WeightedHamming -@inline eval_op(::WeightedHamming, ai, bi, wi) = ai != bi ? wi : zero(eltype(wi)) -@inline eval_reduce(::WeightedHamming, s1, s2) = s1 + s2 -whamming(a::AbstractArray, b::AbstractArray, w::AbstractArray) = WeightedHamming(w)(a, b) - -########################################################### -# -# Specialized -# -########################################################### - -# SqEuclidean -function _pairwise!(r::AbstractMatrix, dist::WeightedSqEuclidean, - a::AbstractMatrix, b::AbstractMatrix) - w = dist.weights - m, na, nb = get_pairwise_dims(length(w), r, a, b) - - sa2 = wsumsq_percol(w, a) - sb2 = wsumsq_percol(w, b) - mul!(r, a', b .* w) - for j = 1:nb - @simd for i = 1:na - @inbounds r[i, j] = sa2[i] + sb2[j] - 2 * r[i, j] - end - end - r -end -function _pairwise!(r::AbstractMatrix, dist::WeightedSqEuclidean, - a::AbstractMatrix) - w = dist.weights - m, n = get_pairwise_dims(length(w), r, a) - - sa2 = wsumsq_percol(w, a) - mul!(r, a', a .* w) - - for j = 1:n - for i = 1:(j - 1) - @inbounds r[i, j] = r[j, i] - end - @inbounds r[j, j] = 0 - @simd for i = (j + 1):n - @inbounds r[i, j] = sa2[i] + sa2[j] - 2 * r[i, j] - end - end - r -end - -# Euclidean -function colwise!(r::AbstractArray, dist::WeightedEuclidean, a::AbstractMatrix, b::AbstractMatrix) - sqrt!(colwise!(r, WeightedSqEuclidean(dist.weights), a, b)) -end -function colwise!(r::AbstractArray, dist::WeightedEuclidean, a::AbstractVector, b::AbstractMatrix) - sqrt!(colwise!(r, WeightedSqEuclidean(dist.weights), a, b)) -end -function _pairwise!(r::AbstractMatrix, dist::WeightedEuclidean, - a::AbstractMatrix, b::AbstractMatrix) - sqrt!(_pairwise!(r, WeightedSqEuclidean(dist.weights), a, b)) -end -function _pairwise!(r::AbstractMatrix, dist::WeightedEuclidean, a::AbstractMatrix) - sqrt!(_pairwise!(r, WeightedSqEuclidean(dist.weights), a)) -end From acfbe99272cf6bfd03d2dca507fd88496a1a99b9 Mon Sep 17 00:00:00 2001 From: Daniel Karrasch Date: Thu, 3 Oct 2019 19:35:24 +0200 Subject: [PATCH 2/7] rearrange docstrings, no functional modifications --- src/metrics.jl | 141 +++++++++++++++++++++++++------------------------ 1 file changed, 71 insertions(+), 70 deletions(-) diff --git a/src/metrics.jl b/src/metrics.jl index 601806e..5ae9ded 100644 --- a/src/metrics.jl +++ b/src/metrics.jl @@ -10,9 +10,79 @@ const RealAbstractArray{T <: Real} = AbstractArray{T} struct Euclidean <: Metric thresh::Float64 end + +""" + Euclidean([thresh]) + +Create a euclidean metric. + +When computing distances among large numbers of points, it can be much +more efficient to exploit the formula + + (x-y)^2 = x^2 - 2xy + y^2 + +However, this can introduce roundoff error. `thresh` (which defaults +to 0) specifies the relative square-distance tolerance on `2xy` +compared to `x^2 + y^2` to force recalculation of the distance using +the more precise direct (elementwise-subtraction) formula. + +# Example: +```julia +julia> x = reshape([0.1, 0.3, -0.1], 3, 1); + +julia> pairwise(Euclidean(), x, x) +1×1 Array{Float64,2}: + 7.45058e-9 + +julia> pairwise(Euclidean(1e-12), x, x) +1×1 Array{Float64,2}: + 0.0 +``` +""" +Euclidean() = Euclidean(0) + +struct WeightedEuclidean{W <: RealAbstractArray} <: Metric + weights::W +end + +""" + PeriodicEuclidean(L) + +Create a Euclidean metric on a rectangular periodic domain (i.e., a torus or +a cylinder). Periods per dimension are contained in the vector `L`: +```math +\\sqrt{\\sum_i(\\min\\mod(|x_i - y_i|, p), p - \\mod(|x_i - y_i|, p))^2}. +``` +For dimensions without periodicity put `Inf` in the respective component. + +# Example +```jldoctest +julia> x, y, L = [0.0, 0.0], [0.75, 0.0], [0.5, Inf]; + +julia> evaluate(PeriodicEuclidean(L), x, y) +0.25 +``` +""" +struct PeriodicEuclidean{W <: AbstractArray{<: Real}} <: Metric + periods::W +end + struct SqEuclidean <: SemiMetric thresh::Float64 end + +""" + SqEuclidean([thresh]) + +Create a squared-euclidean semi-metric. For the meaning of `thresh`, +see [`Euclidean`](@ref). +""" +SqEuclidean() = SqEuclidean(0) + +struct WeightedSqEuclidean{W <: RealAbstractArray} <: SemiMetric + weights::W +end + struct Chebyshev <: Metric end struct Cityblock <: Metric end @@ -113,76 +183,7 @@ struct MeanSqDeviation <: SemiMetric end struct RMSDeviation <: Metric end struct NormRMSDeviation <: PreMetric end -struct PeriodicEuclidean{W <: AbstractArray{<: Real}} <: Metric - periods::W -end - -""" - Euclidean([thresh]) - -Create a euclidean metric. - -When computing distances among large numbers of points, it can be much -more efficient to exploit the formula - - (x-y)^2 = x^2 - 2xy + y^2 - -However, this can introduce roundoff error. `thresh` (which defaults -to 0) specifies the relative square-distance tolerance on `2xy` -compared to `x^2 + y^2` to force recalculation of the distance using -the more precise direct (elementwise-subtraction) formula. - -# Example: -```julia -julia> x = reshape([0.1, 0.3, -0.1], 3, 1); - -julia> pairwise(Euclidean(), x, x) -1×1 Array{Float64,2}: - 7.45058e-9 - -julia> pairwise(Euclidean(1e-12), x, x) -1×1 Array{Float64,2}: - 0.0 -``` -""" -Euclidean() = Euclidean(0) - -struct WeightedEuclidean{W <: RealAbstractArray} <: Metric - weights::W -end - -""" - SqEuclidean([thresh]) - -Create a squared-euclidean semi-metric. For the meaning of `thresh`, -see [`Euclidean`](@ref). -""" -SqEuclidean() = SqEuclidean(0) - -struct WeightedSqEuclidean{W <: RealAbstractArray} <: SemiMetric - weights::W -end - -""" - PeriodicEuclidean(L) - -Create a Euclidean metric on a rectangular periodic domain (i.e., a torus or -a cylinder). Periods per dimension are contained in the vector `L`: -```math -\\sqrt{\\sum_i(\\min\\mod(|x_i - y_i|, p), p - \\mod(|x_i - y_i|, p))^2}. -``` -For dimensions without periodicity put `Inf` in the respective component. - -# Example -```jldoctest -julia> x, y, L = [0.0, 0.0], [0.75, 0.0], [0.5, Inf]; - -julia> evaluate(PeriodicEuclidean(L), x, y) -0.25 -``` -""" -PeriodicEuclidean() = PeriodicEuclidean(Int[]) - +# Union types const metrics = (Euclidean,SqEuclidean,PeriodicEuclidean,Chebyshev,Cityblock,TotalVariation,Minkowski,Hamming,Jaccard,RogersTanimoto,CosineDist,ChiSqDist,KLDivergence,RenyiDivergence,BrayCurtis,JSDivergence,SpanNormDist,GenKLDivergence) const weightedmetrics = (WeightedEuclidean,WeightedSqEuclidean,WeightedCityblock,WeightedMinkowski,WeightedHamming) const UnionWeightedMetrics{W} = Union{map(M->M{W}, weightedmetrics)...} From 0154de0bc5b1b9526bf226f7443a6186063db113 Mon Sep 17 00:00:00 2001 From: Daniel Karrasch Date: Fri, 4 Oct 2019 10:20:51 +0200 Subject: [PATCH 3/7] reduce unnecessary code after moving, add some scalar metrics, tests --- src/metrics.jl | 230 ++++++++++++++++++--------------------------- test/test_dists.jl | 25 +++-- 2 files changed, 112 insertions(+), 143 deletions(-) diff --git a/src/metrics.jl b/src/metrics.jl index 5ae9ded..87fea25 100644 --- a/src/metrics.jl +++ b/src/metrics.jl @@ -186,8 +186,8 @@ struct NormRMSDeviation <: PreMetric end # Union types const metrics = (Euclidean,SqEuclidean,PeriodicEuclidean,Chebyshev,Cityblock,TotalVariation,Minkowski,Hamming,Jaccard,RogersTanimoto,CosineDist,ChiSqDist,KLDivergence,RenyiDivergence,BrayCurtis,JSDivergence,SpanNormDist,GenKLDivergence) const weightedmetrics = (WeightedEuclidean,WeightedSqEuclidean,WeightedCityblock,WeightedMinkowski,WeightedHamming) -const UnionWeightedMetrics{W} = Union{map(M->M{W}, weightedmetrics)...} -const UnionMetrics = Union{metrics...} +const UnionWeightedMetrics = Union{weightedmetrics...} +const UnionMetrics = Union{metrics...,weightedmetrics...} ########################################################### # @@ -195,205 +195,162 @@ const UnionMetrics = Union{metrics...} # ########################################################### -const ArraySlice{T} = SubArray{T,1,Array{T,2},Tuple{Base.Slice{Base.OneTo{Int}},Int},true} +parameters(::UnionMetrics) = nothing +parameters(d::PeriodicEuclidean) = d.periods +parameters(d::UnionWeightedMetrics) = d.weights -@inline parameters(::UnionMetrics) = nothing +result_type(dist::UnionMetrics, a::AbstractArray, b::AbstractArray) = + result_type(dist, a, b, parameters(dist)) +result_type(dist::UnionMetrics, a::AbstractArray, b::AbstractArray, ::Nothing) = + typeof(_evaluate(dist, oneunit(eltype(a)), oneunit(eltype(b)))) +result_type(dist::UnionMetrics, a::AbstractArray, b::AbstractArray, p) = + typeof(_evaluate(dist, oneunit(eltype(a)), oneunit(eltype(b)), oneunit(eltype(p)))) -Base.eltype(x::UnionWeightedMetrics) = eltype(x.weights) +@inline Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b::AbstractArray) + _evaluate(d, a, b, parameters(d)) +end +_evaluate(dist::UnionMetrics, a::Number, b::Number) = _evaluate(dist, a, b, parameters(dist)) # breaks the implementation into eval_start, eval_op, eval_reduce and eval_end # Specialized for Arrays and avoids a branch on the size -@inline Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::Union{Array, ArraySlice}, b::Union{Array, ArraySlice}) +@inline Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b::AbstractArray, ::Nothing) @boundscheck if length(a) != length(b) throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b)).")) end - p = parameters(d) - @boundscheck if p !== nothing - length(a) != length(p) && throw(DimensionMismatch("arrays have length $(length(a)) but parameters have length $(length(p)).")) - end if length(a) == 0 return zero(result_type(d, a, b)) end @inbounds begin s = eval_start(d, a, b) - if p === nothing + if IndexStyle(a, b) === IndexLinear() @simd for I in 1:length(a) ai = a[I] bi = b[I] s = eval_reduce(d, s, eval_op(d, ai, bi)) end else - @simd for I in 1:length(a) - aI = a[I] - bI = b[I] - pI = p[I] - s = eval_reduce(d, s, eval_op(d, aI, bI, pI)) - end - end - return eval_end(d, s) - end -end - -@inline function _evaluate(d::UnionMetrics, a::AbstractArray, b::AbstractArray) - @boundscheck if length(a) != length(b) - throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b)).")) - end - p = parameters(d) - @boundscheck if p !== nothing - length(a) != length(p) && throw(DimensionMismatch("arrays have length $(length(a)) but parameters have length $(length(p)).")) - end - if length(a) == 0 - return zero(result_type(d, a, b)) - end - @inbounds begin - s = eval_start(d, a, b) - if size(a) == size(b) - if p === nothing + if size(a) == size(b) @simd for I in eachindex(a, b) ai = a[I] bi = b[I] s = eval_reduce(d, s, eval_op(d, ai, bi)) end else - @simd for I in eachindex(a, b, p) - aI = a[I] - bI = b[I] - pI = p[I] - s = eval_reduce(d, s, eval_op(d, aI, bI, pI)) - end - end - else - if p === nothing for (Ia, Ib) in zip(eachindex(a), eachindex(b)) ai = a[Ia] bi = b[Ib] s = eval_reduce(d, s, eval_op(d, ai, bi)) end - else - for (Ia, Ib, Ip) in zip(eachindex(a), eachindex(b), eachindex(p)) - aI = a[Ia] - bI = b[Ib] - pI = p[Ip] - s = eval_reduce(d, s, eval_op(d, aI, bI, pI)) - end end end + return eval_end(d, s) end - return eval_end(d, s) end -@inline function _evaluate(d::UnionWeightedMetrics, a::AbstractArray, b::AbstractArray) +@inline Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b::AbstractArray, p::AbstractArray) @boundscheck if length(a) != length(b) throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b)).")) end - @boundscheck if length(a) != length(d.weights) - throw(DimensionMismatch("arrays have length $(length(a)) but weights have length $(length(d.weights)).")) + @boundscheck if length(a) != length(p) + throw(DimensionMismatch("arrays have length $(length(a)) but parameters have length $(length(p)).")) end if length(a) == 0 - return zero(result_type(d, a, b)) + return zero(result_type(d, a, b, p)) end @inbounds begin s = eval_start(d, a, b) - if size(a) == size(b) - @simd for I in eachindex(a, b, d.weights) + if IndexStyle(a, b, p) === IndexLinear() + @simd for I in 1:length(a) ai = a[I] bi = b[I] - wi = d.weights[I] - s = eval_reduce(d, s, eval_op(d, ai, bi, wi)) + pi = p[I] + s = eval_reduce(d, s, eval_op(d, ai, bi, pi)) end else - for (Ia, Ib, Iw) in zip(eachindex(a), eachindex(b), eachindex(d.weights)) - ai = a[Ia] - bi = b[Ib] - wi = d.weights[Iw] - s = eval_reduce(d, s, eval_op(d, ai, bi, wi)) + if size(a) == size(b) + @simd for I in eachindex(a, b, p) + ai = a[I] + bi = b[I] + pi = p[I] + s = eval_reduce(d, s, eval_op(d, ai, bi, pi)) + end + else + for (Ia, Ib, Ip) in zip(eachindex(a), eachindex(b), eachindex(p)) + ai = a[Ia] + bi = b[Ib] + pi = p[Ip] + s = eval_reduce(d, s, eval_op(d, ai, bi, pi)) + end end end + return eval_end(d, s) end - return eval_end(d, s) end -result_type(dist::UnionMetrics, Ta::Type, Tb::Type) = - typeof(evaluate(dist, oneunit(Ta), oneunit(Tb))) -eval_start(d::UnionMetrics, a::AbstractArray, b::AbstractArray) = - zero(result_type(d, a, b)) -eval_end(d::UnionMetrics, s) = s -result_type(dist::UnionWeightedMetrics, Ta::Type, Tb::Type) = - typeof(evaluate(dist, oneunit(Ta), oneunit(Tb))) -@inline function eval_start(d::UnionWeightedMetrics, a::AbstractArray, b::AbstractArray) - zero(result_type(d, a, b)) +_evaluate(dist::UnionMetrics, a::Number, b::Number, ::Nothing) = eval_end(dist, eval_op(dist, a, b)) +function _evaluate(dist::UnionMetrics, a::Number, b::Number, p) + length(p) != 1 && throw(DimensionMismatch("inputs are scalars but parameters have length $(length(p)).")) + eval_end(dist, eval_op(dist, a, b, first(p))) end -eval_end(d::UnionWeightedMetrics, s) = s -for M in metrics - @eval @inline (dist::$M)(a::AbstractArray, b::AbstractArray) = _evaluate(dist, a, b) - @eval @inline (dist::$M)(a::Number, b::Number) = eval_end(dist, eval_op(dist, a, b)) -end -for M in weightedmetrics - @eval (dist::$M)(a::AbstractArray, b::AbstractArray) = _evaluate(dist, a, b) - @eval (dist::$M)(a::Number, b::Number) = eval_end(dist, eval_op(dist, a, b, oneunit(eltype(dist)))) -end - -# SqEuclidean -@inline eval_op(::SqEuclidean, ai, bi) = abs2(ai - bi) -@inline eval_reduce(::SqEuclidean, s1, s2) = s1 + s2 - -sqeuclidean(a::AbstractArray, b::AbstractArray) = SqEuclidean()(a, b) -sqeuclidean(a::Number, b::Number) = SqEuclidean()(a, b) +eval_start(d::UnionMetrics, a::AbstractArray, b::AbstractArray) = zero(result_type(d, a, b)) +eval_reduce(::UnionMetrics, s1, s2) = s1 + s2 +eval_end(d::UnionMetrics, s) = s -# Weighted Squared Euclidean -@inline eval_op(::WeightedSqEuclidean, ai, bi, wi) = abs2(ai - bi) * wi -@inline eval_reduce(::WeightedSqEuclidean, s1, s2) = s1 + s2 -wsqeuclidean(a::AbstractArray, b::AbstractArray, w::AbstractArray) = WeightedSqEuclidean(w)(a, b) +for M in (metrics..., weightedmetrics...) + @eval @inline (dist::$M)(a::AbstractArray, b::AbstractArray) = _evaluate(dist, a, b, parameters(dist)) + if M != SpanNormDist + @eval @inline (dist::$M)(a::Number, b::Number) = _evaluate(dist, a, b, parameters(dist)) + end +end # Euclidean @inline eval_op(::Euclidean, ai, bi) = abs2(ai - bi) -@inline eval_reduce(::Euclidean, s1, s2) = s1 + s2 eval_end(::Euclidean, s) = sqrt(s) euclidean(a::AbstractArray, b::AbstractArray) = Euclidean()(a, b) euclidean(a::Number, b::Number) = Euclidean()(a, b) # Weighted Euclidean @inline eval_op(::WeightedEuclidean, ai, bi, wi) = abs2(ai - bi) * wi -@inline eval_reduce(::WeightedEuclidean, s1, s2) = s1 + s2 -@inline eval_end(::WeightedEuclidean, s) = sqrt(s) +eval_end(::WeightedEuclidean, s) = sqrt(s) weuclidean(a::AbstractArray, b::AbstractArray, w::AbstractArray) = WeightedEuclidean(w)(a, b) +weuclidean(a::Number, b::Number, w::Real) = WeightedEuclidean([w])(a, b) # PeriodicEuclidean -Base.eltype(d::PeriodicEuclidean) = eltype(d.periods) -@inline parameters(d::PeriodicEuclidean) = d.periods @inline function eval_op(d::PeriodicEuclidean, ai, bi, p) s1 = abs(ai - bi) s2 = mod(s1, p) s3 = min(s2, p - s2) abs2(s3) end -@inline function eval_op(d::PeriodicEuclidean, ai, bi) - periods = d.periods - p = isempty(periods) ? oneunit(eltype(periods)) : first(periods) - eval_op(d, ai, bi, p) -end -@inline eval_reduce(::PeriodicEuclidean, s1, s2) = s1 + s2 -@inline eval_end(::PeriodicEuclidean, s) = sqrt(s) +eval_end(::PeriodicEuclidean, s) = sqrt(s) peuclidean(a::AbstractArray, b::AbstractArray, p::AbstractArray{<: Real}) = PeriodicEuclidean(p)(a, b) peuclidean(a::Number, b::Number, p::Real) = PeriodicEuclidean([p])(a, b) +# SqEuclidean +@inline eval_op(::SqEuclidean, ai, bi) = abs2(ai - bi) +sqeuclidean(a::AbstractArray, b::AbstractArray) = SqEuclidean()(a, b) +sqeuclidean(a::Number, b::Number) = SqEuclidean()(a, b) + +# Weighted Squared Euclidean +@inline eval_op(::WeightedSqEuclidean, ai, bi, wi) = abs2(ai - bi) * wi +wsqeuclidean(a::AbstractArray, b::AbstractArray, w::AbstractArray) = WeightedSqEuclidean(w)(a, b) +wsqeuclidean(a::Number, b::Number, w::Real) = WeightedSqEuclidean([w])(a, b) + # Cityblock @inline eval_op(::Cityblock, ai, bi) = abs(ai - bi) -@inline eval_reduce(::Cityblock, s1, s2) = s1 + s2 cityblock(a::AbstractArray, b::AbstractArray) = Cityblock()(a, b) cityblock(a::Number, b::Number) = Cityblock()(a, b) -# City Block +# Weighted City Block @inline eval_op(::WeightedCityblock, ai, bi, wi) = abs((ai - bi) * wi) -@inline eval_reduce(::WeightedCityblock, s1, s2) = s1 + s2 wcityblock(a::AbstractArray, b::AbstractArray, w::AbstractArray) = WeightedCityblock(w)(a, b) +wcityblock(a::Number, b::Number, w::Real) = WeightedCityblock([w])(a, b) # Total variation @inline eval_op(::TotalVariation, ai, bi) = abs(ai - bi) -@inline eval_reduce(::TotalVariation, s1, s2) = s1 + s2 eval_end(::TotalVariation, s) = s / 2 totalvariation(a::AbstractArray, b::AbstractArray) = TotalVariation()(a, b) totalvariation(a::Number, b::Number) = TotalVariation()(a, b) @@ -407,32 +364,30 @@ chebyshev(a::AbstractArray, b::AbstractArray) = Chebyshev()(a, b) chebyshev(a::Number, b::Number) = Chebyshev()(a, b) # Minkowski -@inline eval_op(dist::Minkowski, ai, bi) = abs(ai - bi).^dist.p -@inline eval_reduce(::Minkowski, s1, s2) = s1 + s2 -eval_end(dist::Minkowski, s) = s.^(1 / dist.p) +@inline eval_op(dist::Minkowski, ai, bi) = abs(ai - bi)^dist.p +@inline eval_end(dist::Minkowski, s) = s^(1 / dist.p) minkowski(a::AbstractArray, b::AbstractArray, p::Real) = Minkowski(p)(a, b) minkowski(a::Number, b::Number, p::Real) = Minkowski(p)(a, b) # Weighted Minkowski -@inline eval_op(dist::WeightedMinkowski, ai, bi, wi) = abs(ai - bi).^dist.p * wi -@inline eval_reduce(::WeightedMinkowski, s1, s2) = s1 + s2 -eval_end(dist::WeightedMinkowski, s) = s.^(1 / dist.p) +@inline eval_op(dist::WeightedMinkowski, ai, bi, wi) = abs(ai - bi)^dist.p * wi +@inline eval_end(dist::WeightedMinkowski, s) = s^(1 / dist.p) wminkowski(a::AbstractArray, b::AbstractArray, w::AbstractArray, p::Real) = WeightedMinkowski(w, p)(a, b) +wminkowski(a::Number, b::Number, w::Real, p::Real) = WeightedMinkowski([w], p)(a, b) # Hamming @inline eval_op(::Hamming, ai, bi) = ai != bi ? 1 : 0 -@inline eval_reduce(::Hamming, s1, s2) = s1 + s2 hamming(a::AbstractArray, b::AbstractArray) = Hamming()(a, b) hamming(a::Number, b::Number) = Hamming()(a, b) # WeightedHamming @inline eval_op(::WeightedHamming, ai, bi, wi) = ai != bi ? wi : zero(eltype(wi)) -@inline eval_reduce(::WeightedHamming, s1, s2) = s1 + s2 whamming(a::AbstractArray, b::AbstractArray, w::AbstractArray) = WeightedHamming(w)(a, b) +whamming(a::Number, b::Number, w::Real) = WeightedHamming([w])(a, b) # Cosine dist @inline function eval_start(dist::CosineDist, a::AbstractArray, b::AbstractArray) - T = Base.promote_typeof(eval_op(dist, oneunit(eltype(a)), oneunit(eltype(b)))...) + T = result_type(dist, a, b) zero(T), zero(T), zero(T) end @inline eval_op(::CosineDist, ai, bi) = ai * bi, ai * ai, bi * bi @@ -443,31 +398,31 @@ end end function eval_end(::CosineDist, s) ab, a2, b2 = s - max(1 - ab / (sqrt(a2) * sqrt(b2)), zero(eltype(ab))) + max(1 - ab / (sqrt(a2) * sqrt(b2)), 0) end cosine_dist(a::AbstractArray, b::AbstractArray) = CosineDist()(a, b) +cosine_dist(a::Number, b::Number) = CosineDist()(a, b) -# Correlation Dist +# CorrDist _centralize(x::AbstractArray) = x .- mean(x) (dist::CorrDist)(a::AbstractArray, b::AbstractArray) = CosineDist()(_centralize(a), _centralize(b)) +(dist::CorrDist)(a::Number, b::Number) = CosineDist()(zero(mean(a)), zero(mean(b))) corr_dist(a::AbstractArray, b::AbstractArray) = CorrDist()(a, b) -result_type(::CorrDist, Ta::Type, Tb::Type) = result_type(CosineDist(), Ta, Tb) +corr_dist(a::Number, b::Number) = CorrDist()(a, b) +# result_type(::CorrDist, Ta::Type, Tb::Type) = result_type(CosineDist(), Ta, Tb) # ChiSqDist @inline eval_op(::ChiSqDist, ai, bi) = (d = abs2(ai - bi) / (ai + bi); ifelse(ai != bi, d, zero(d))) -@inline eval_reduce(::ChiSqDist, s1, s2) = s1 + s2 chisq_dist(a::AbstractArray, b::AbstractArray) = ChiSqDist()(a, b) # KLDivergence @inline eval_op(dist::KLDivergence, ai, bi) = ai > 0 ? ai * log(ai / bi) : zero(eval_op(dist, oneunit(ai), bi)) -@inline eval_reduce(::KLDivergence, s1, s2) = s1 + s2 kl_divergence(a::AbstractArray, b::AbstractArray) = KLDivergence()(a, b) # GenKLDivergence @inline eval_op(dist::GenKLDivergence, ai, bi) = ai > 0 ? ai * log(ai / bi) - ai + bi : oftype(eval_op(dist, oneunit(ai), bi), bi) -@inline eval_reduce(::GenKLDivergence, s1, s2) = s1 + s2 gkl_divergence(a::AbstractArray, b::AbstractArray) = GenKLDivergence()(a, b) # RenyiDivergence @@ -524,6 +479,7 @@ let docstring = Base.Docs.getdoc(RenyiDivergence) end # JSDivergence + @inline function eval_op(::JSDivergence, ai::T, bi::T) where {T} u = (ai + bi) / 2 ta = ai > 0 ? ai * log(ai) / 2 : zero(log(one(T))) @@ -531,14 +487,16 @@ end tu = u > 0 ? u * log(u) : zero(log(one(T))) ta + tb - tu end -@inline eval_reduce(::JSDivergence, s1, s2) = s1 + s2 js_divergence(a::AbstractArray, b::AbstractArray) = JSDivergence()(a, b) # SpanNormDist + +result_type(dist::SpanNormDist, a::AbstractArray, b::AbstractArray) = + typeof(eval_op(dist, oneunit(eltype(a)), oneunit(eltype(b)))) @inline Base.@propagate_inbounds function eval_start(::SpanNormDist, a::AbstractArray, b::AbstractArray) a[1] - b[1], a[1] - b[1] end -@inline eval_op(::SpanNormDist, ai, bi) = ai - bi +eval_op(::SpanNormDist, ai, bi) = ai - bi @inline function eval_reduce(::SpanNormDist, s1, s2) min_d, max_d = s1 if s2 > max_d @@ -550,15 +508,15 @@ end end eval_end(::SpanNormDist, s) = s[2] - s[1] +(::SpanNormDist)(a::Number, b::Number) = zero(promote_type(typeof(a), typeof(b))) spannorm_dist(a::AbstractArray, b::AbstractArray) = SpanNormDist()(a, b) -result_type(dist::SpanNormDist, Ta::Type, Tb::Type) = - typeof(eval_op(dist, oneunit(Ta), oneunit(Tb))) +spannorm_dist(a::Number, b::Number) = SpanNormDist()(a, b) # Jaccard @inline eval_start(::Jaccard, a::AbstractArray{Bool}, b::AbstractArray{Bool}) = 0, 0 @inline function eval_start(dist::Jaccard, a::AbstractArray, b::AbstractArray) - T = Base.promote_typeof(eval_op(dist, oneunit(eltype(a)), oneunit(eltype(b)))...) + T = result_type(dist, a, b) zero(T), zero(T) end @inline function eval_op(::Jaccard, s1, s2) @@ -576,12 +534,12 @@ end return v end jaccard(a::AbstractArray, b::AbstractArray) = Jaccard()(a, b) +jaccard(a::Number, b::Number) = Jaccard()(a, b) # BrayCurtis -@inline eval_start(::BrayCurtis, a::AbstractArray{Bool}, b::AbstractArray{Bool}) = 0, 0 @inline function eval_start(dist::BrayCurtis, a::AbstractArray, b::AbstractArray) - T = Base.promote_typeof(eval_op(dist, oneunit(eltype(a)), oneunit(eltype(b)))...) + T = result_type(dist, a, b) zero(T), zero(T) end @inline function eval_op(::BrayCurtis, s1, s2) @@ -599,7 +557,7 @@ end return v end braycurtis(a::AbstractArray, b::AbstractArray) = BrayCurtis()(a, b) - +braycurtis(a::Number, b::Number) = BrayCurtis()(a, b) # Tanimoto diff --git a/test/test_dists.jl b/test/test_dists.jl index 06d2bc1..d9e2ea8 100644 --- a/test/test_dists.jl +++ b/test/test_dists.jl @@ -122,22 +122,33 @@ end @testset "individual metrics" begin a = 1 b = 2 - @test sqeuclidean(a, b) == 1.0 - - @test euclidean(a, b) == 1.0 - @test cityblock(a, b) == 1.0 - @test totalvariation(a, b) == 0.5 + @test sqeuclidean(a, b) === 1 + @test euclidean(a, b) === 1.0 + @test jaccard(a, b) === 0.5 + @test cityblock(a, b) === 1 + @test totalvariation(a, b) === 0.5 @test chebyshev(a, b) == 1.0 + @test braycurtis(a, b) === 1/3 @test minkowski(a, b, 2) == 1.0 @test hamming(a, b) == 1 - @test peuclidean(a, b, 0.5) == 0 - @test peuclidean(a, b, 2) == 1.0 + @test peuclidean(a, b, 0.5) === 0.0 + @test peuclidean(a, b, 2) === 1.0 + @test cosine_dist(a, b) === 0.0 + @test isnan(corr_dist(a, b)) + @test spannorm_dist(a, b) === 0 bt = [true, false, true] bf = [false, true, true] @test rogerstanimoto(bt, bf) == 4.0 / 5.0 @test braycurtis(bt, bf) == 0.5 + w = 2 + @test wsqeuclidean(a, b, w) === 2 + @test weuclidean(a, b, w) === sqrt(2) + @test wcityblock(a, b, w) === 2 + @test wminkowski(a, b, w, 2) === sqrt(2) + @test whamming(a, b, w) === 2 + for T in (Float64, F64) for (_x, _y) in (([4.0, 5.0, 6.0, 7.0], [3.0, 9.0, 8.0, 1.0]), From 69a5c167f9fd2b0a08ad1fec25336cef0733dd3b Mon Sep 17 00:00:00 2001 From: Daniel Karrasch Date: Fri, 4 Oct 2019 10:22:23 +0200 Subject: [PATCH 4/7] remove wmetrics.jl --- src/wmetrics.jl | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 src/wmetrics.jl diff --git a/src/wmetrics.jl b/src/wmetrics.jl deleted file mode 100644 index e69de29..0000000 From 407f9644590426018eaf379777ef45613613ad57 Mon Sep 17 00:00:00 2001 From: Daniel Karrasch Date: Fri, 4 Oct 2019 22:03:45 +0200 Subject: [PATCH 5/7] replace RealAbstractArray --- src/metrics.jl | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/metrics.jl b/src/metrics.jl index 87fea25..fd8da3d 100644 --- a/src/metrics.jl +++ b/src/metrics.jl @@ -5,7 +5,6 @@ # Metric types # ########################################################### -const RealAbstractArray{T <: Real} = AbstractArray{T} struct Euclidean <: Metric thresh::Float64 @@ -41,7 +40,7 @@ julia> pairwise(Euclidean(1e-12), x, x) """ Euclidean() = Euclidean(0) -struct WeightedEuclidean{W <: RealAbstractArray} <: Metric +struct WeightedEuclidean{W <: AbstractArray{<:Real}} <: Metric weights::W end @@ -79,14 +78,14 @@ see [`Euclidean`](@ref). """ SqEuclidean() = SqEuclidean(0) -struct WeightedSqEuclidean{W <: RealAbstractArray} <: SemiMetric +struct WeightedSqEuclidean{W <: AbstractArray{<:Real}} <: SemiMetric weights::W end struct Chebyshev <: Metric end struct Cityblock <: Metric end -struct WeightedCityblock{W <: RealAbstractArray} <: Metric +struct WeightedCityblock{W <: AbstractArray{<:Real}} <: Metric weights::W end @@ -97,13 +96,13 @@ struct RogersTanimoto <: Metric end struct Minkowski{T <: Real} <: Metric p::T end -struct WeightedMinkowski{W <: RealAbstractArray,T <: Real} <: Metric +struct WeightedMinkowski{W <: AbstractArray{<:Real},T <: Real} <: Metric weights::W p::T end struct Hamming <: Metric end -struct WeightedHamming{W <: RealAbstractArray} <: Metric +struct WeightedHamming{W <: AbstractArray{<:Real}} <: Metric weights::W end From dbc7aebeec32519a7eac96d58763f8a3859596d2 Mon Sep 17 00:00:00 2001 From: Daniel Karrasch Date: Mon, 14 Oct 2019 13:10:16 +0200 Subject: [PATCH 6/7] edits as per review --- src/metrics.jl | 53 ++++++++++++++++---------------------------------- 1 file changed, 17 insertions(+), 36 deletions(-) diff --git a/src/metrics.jl b/src/metrics.jl index fd8da3d..7dd94cd 100644 --- a/src/metrics.jl +++ b/src/metrics.jl @@ -205,15 +205,14 @@ result_type(dist::UnionMetrics, a::AbstractArray, b::AbstractArray, ::Nothing) = result_type(dist::UnionMetrics, a::AbstractArray, b::AbstractArray, p) = typeof(_evaluate(dist, oneunit(eltype(a)), oneunit(eltype(b)), oneunit(eltype(p)))) -@inline Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b::AbstractArray) +Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b::AbstractArray) _evaluate(d, a, b, parameters(d)) end _evaluate(dist::UnionMetrics, a::Number, b::Number) = _evaluate(dist, a, b, parameters(dist)) # breaks the implementation into eval_start, eval_op, eval_reduce and eval_end -# Specialized for Arrays and avoids a branch on the size -@inline Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b::AbstractArray, ::Nothing) +Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b::AbstractArray, ::Nothing) @boundscheck if length(a) != length(b) throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b)).")) end @@ -222,32 +221,24 @@ _evaluate(dist::UnionMetrics, a::Number, b::Number) = _evaluate(dist, a, b, para end @inbounds begin s = eval_start(d, a, b) - if IndexStyle(a, b) === IndexLinear() + if IndexStyle(a, b) === IndexLinear() || size(a) == size(b) @simd for I in 1:length(a) ai = a[I] bi = b[I] s = eval_reduce(d, s, eval_op(d, ai, bi)) end else - if size(a) == size(b) - @simd for I in eachindex(a, b) - ai = a[I] - bi = b[I] - s = eval_reduce(d, s, eval_op(d, ai, bi)) - end - else - for (Ia, Ib) in zip(eachindex(a), eachindex(b)) - ai = a[Ia] - bi = b[Ib] - s = eval_reduce(d, s, eval_op(d, ai, bi)) - end + for (Ia, Ib) in zip(eachindex(a), eachindex(b)) + ai = a[Ia] + bi = b[Ib] + s = eval_reduce(d, s, eval_op(d, ai, bi)) end end return eval_end(d, s) end end -@inline Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b::AbstractArray, p::AbstractArray) +Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b::AbstractArray, p::AbstractArray) @boundscheck if length(a) != length(b) throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b)).")) end @@ -259,7 +250,7 @@ end end @inbounds begin s = eval_start(d, a, b) - if IndexStyle(a, b, p) === IndexLinear() + if IndexStyle(a, b, p) === IndexLinear() || size(a) == size(b) @simd for I in 1:length(a) ai = a[I] bi = b[I] @@ -267,20 +258,11 @@ end s = eval_reduce(d, s, eval_op(d, ai, bi, pi)) end else - if size(a) == size(b) - @simd for I in eachindex(a, b, p) - ai = a[I] - bi = b[I] - pi = p[I] - s = eval_reduce(d, s, eval_op(d, ai, bi, pi)) - end - else - for (Ia, Ib, Ip) in zip(eachindex(a), eachindex(b), eachindex(p)) - ai = a[Ia] - bi = b[Ib] - pi = p[Ip] - s = eval_reduce(d, s, eval_op(d, ai, bi, pi)) - end + for (Ia, Ib, Ip) in zip(eachindex(a), eachindex(b), eachindex(p)) + ai = a[Ia] + bi = b[Ib] + pi = p[Ip] + s = eval_reduce(d, s, eval_op(d, ai, bi, pi)) end end return eval_end(d, s) @@ -358,7 +340,7 @@ totalvariation(a::Number, b::Number) = TotalVariation()(a, b) @inline eval_op(::Chebyshev, ai, bi) = abs(ai - bi) @inline eval_reduce(::Chebyshev, s1, s2) = max(s1, s2) # if only NaN, will output NaN -@inline Base.@propagate_inbounds eval_start(::Chebyshev, a::AbstractArray, b::AbstractArray) = abs(a[1] - b[1]) +Base.@propagate_inbounds eval_start(::Chebyshev, a::AbstractArray, b::AbstractArray) = abs(a[1] - b[1]) chebyshev(a::AbstractArray, b::AbstractArray) = Chebyshev()(a, b) chebyshev(a::Number, b::Number) = Chebyshev()(a, b) @@ -408,7 +390,6 @@ _centralize(x::AbstractArray) = x .- mean(x) (dist::CorrDist)(a::Number, b::Number) = CosineDist()(zero(mean(a)), zero(mean(b))) corr_dist(a::AbstractArray, b::AbstractArray) = CorrDist()(a, b) corr_dist(a::Number, b::Number) = CorrDist()(a, b) -# result_type(::CorrDist, Ta::Type, Tb::Type) = result_type(CosineDist(), Ta, Tb) # ChiSqDist @inline eval_op(::ChiSqDist, ai, bi) = (d = abs2(ai - bi) / (ai + bi); ifelse(ai != bi, d, zero(d))) @@ -425,7 +406,7 @@ kl_divergence(a::AbstractArray, b::AbstractArray) = KLDivergence()(a, b) gkl_divergence(a::AbstractArray, b::AbstractArray) = GenKLDivergence()(a, b) # RenyiDivergence -@inline Base.@propagate_inbounds function eval_start(::RenyiDivergence, a::AbstractArray{T}, b::AbstractArray{T}) where {T <: Real} +Base.@propagate_inbounds function eval_start(::RenyiDivergence, a::AbstractArray{T}, b::AbstractArray{T}) where {T <: Real} zero(T), zero(T), T(sum(a)), T(sum(b)) end @@ -492,7 +473,7 @@ js_divergence(a::AbstractArray, b::AbstractArray) = JSDivergence()(a, b) result_type(dist::SpanNormDist, a::AbstractArray, b::AbstractArray) = typeof(eval_op(dist, oneunit(eltype(a)), oneunit(eltype(b)))) -@inline Base.@propagate_inbounds function eval_start(::SpanNormDist, a::AbstractArray, b::AbstractArray) +Base.@propagate_inbounds function eval_start(::SpanNormDist, a::AbstractArray, b::AbstractArray) a[1] - b[1], a[1] - b[1] end eval_op(::SpanNormDist, ai, bi) = ai - bi From 4d76aa491361025344043a7288dee0944b039a41 Mon Sep 17 00:00:00 2001 From: Daniel Karrasch Date: Mon, 14 Oct 2019 14:08:15 +0200 Subject: [PATCH 7/7] replace 1:length by eachindex --- src/metrics.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/metrics.jl b/src/metrics.jl index 7dd94cd..67b43ff 100644 --- a/src/metrics.jl +++ b/src/metrics.jl @@ -222,7 +222,7 @@ Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b @inbounds begin s = eval_start(d, a, b) if IndexStyle(a, b) === IndexLinear() || size(a) == size(b) - @simd for I in 1:length(a) + @simd for I in eachindex(a, b) ai = a[I] bi = b[I] s = eval_reduce(d, s, eval_op(d, ai, bi)) @@ -251,7 +251,7 @@ Base.@propagate_inbounds function _evaluate(d::UnionMetrics, a::AbstractArray, b @inbounds begin s = eval_start(d, a, b) if IndexStyle(a, b, p) === IndexLinear() || size(a) == size(b) - @simd for I in 1:length(a) + @simd for I in eachindex(a, b) ai = a[I] bi = b[I] pi = p[I]