From abb2ea110a237e2164ca92b32d2bf9c44607e0f8 Mon Sep 17 00:00:00 2001 From: Dmitry Vinokurov Date: Fri, 20 Mar 2020 19:47:31 +0600 Subject: [PATCH 1/4] Issue #83 add suport of generic containers to VOI (#124) * add utility classes for metaprogramming * remove duplicate file * enabling distance examples * add support of generic containers to Tree class * add support of generic containers to entropy and VOI * add tests for entropy * Update modules/distance/k-random/VOI.cpp Co-Authored-By: jura-gresko <57671564+jura-gresko@users.noreply.github.com> * some code cleaning Co-authored-by: jura-gresko <57671564+jura-gresko@users.noreply.github.com> --- examples/CMakeLists.txt | 2 +- examples/distance_examples/CMakeLists.txt | 8 +- .../distance_examples/entropy_example.cpp | 2 +- modules/distance/k-random/VOI.cpp | 154 ++++++++-------- modules/distance/k-random/VOI.hpp | 77 ++++---- modules/space/tree.cpp | 3 +- modules/space/tree.hpp | 3 +- modules/utils/type_traits.hpp | 157 +++++++++++++++++ tests/CMakeLists.txt | 3 +- tests/distance_tests/CMakeLists.txt | 31 ++++ tests/distance_tests/voi_tests.cpp | 164 ++++++++++++++++++ 11 files changed, 490 insertions(+), 114 deletions(-) create mode 100644 modules/utils/type_traits.hpp create mode 100644 tests/distance_tests/CMakeLists.txt create mode 100644 tests/distance_tests/voi_tests.cpp diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 899f89b8..57dfc518 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,6 +1,6 @@ add_subdirectory(correlation_examples) add_subdirectory(energies_examples) -#add_subdirectory(distance_examples) +add_subdirectory(distance_examples) add_subdirectory(dnn_examples) add_subdirectory(ensemble_examples) #add_subdirectory(graph_pyhton_assets) diff --git a/examples/distance_examples/CMakeLists.txt b/examples/distance_examples/CMakeLists.txt index 142feee5..59115998 100644 --- a/examples/distance_examples/CMakeLists.txt +++ b/examples/distance_examples/CMakeLists.txt @@ -19,7 +19,7 @@ include_directories(${Boost_INCLUDE_DIRS}) file(COPY "assets" DESTINATION ".") file(GLOB EXAMPLE_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp) - +find_package(JPEG) #Run through each source foreach(exampleSrc ${EXAMPLE_SRCS}) @@ -47,8 +47,10 @@ foreach(exampleSrc ${EXAMPLE_SRCS}) endforeach(exampleSrc) if(UNIX) - target_link_libraries(earth_mover_distance_example /usr/local/lib/libjpeg.a) - target_link_libraries(earth_mover_distance_2_example /usr/local/lib/libjpeg.a) + target_link_libraries(earth_mover_distance_example JPEG::JPEG) + target_link_libraries(earth_mover_distance_2_example JPEG::JPEG) +# target_link_libraries(earth_mover_distance_example /usr/local/lib/libjpeg.a) +# target_link_libraries(earth_mover_distance_2_example /usr/local/lib/libjpeg.a) endif(UNIX) if(CMAKE_SYSTEM_NAME MATCHES Windows) diff --git a/examples/distance_examples/entropy_example.cpp b/examples/distance_examples/entropy_example.cpp index ff79ab8f..dac3d260 100644 --- a/examples/distance_examples/entropy_example.cpp +++ b/examples/distance_examples/entropy_example.cpp @@ -88,7 +88,7 @@ int main() { std::cout << "Variation of Information, normalized Variation of Information:" << std::endl; std::cout << "VOI = " << metric::variationOfInformation(v1, v2) << std::endl; - std::cout << "VOI (Manhatten) = " << metric::variationOfInformation>(v1, v2) << std::endl; + std::cout << "VOI (Manhatten) = " << metric::variationOfInformation>, metric::Manhatten>(v1, v2) << std::endl; std::cout << "VOI norm = " << metric::variationOfInformation_normalized(v1, v2) << std::endl; // functor diff --git a/modules/distance/k-random/VOI.cpp b/modules/distance/k-random/VOI.cpp index eb56eafc..d096a886 100644 --- a/modules/distance/k-random/VOI.cpp +++ b/modules/distance/k-random/VOI.cpp @@ -13,6 +13,7 @@ Copyright (c) 2019 Panda Team #include #include #include +#include #include #include @@ -27,9 +28,10 @@ Copyright (c) 2019 Panda Team namespace metric { namespace { - template - void add_noise(std::vector>& data) + template + void add_noise(C & data) { + using T = type_traits::underlaying_type_t; std::random_device rd; std::mt19937 gen(rd()); std::normal_distribution dis(0, 1); @@ -69,14 +71,13 @@ namespace { } std::cout << "]" << std::endl; } - template - void combine( - const std::vector>& X, const std::vector>& Y, std::vector>& XY) + template > + std::vector> combine(const C1& X, const C2& Y) { std::size_t N = X.size(); std::size_t dx = X[0].size(); std::size_t dy = Y[0].size(); - XY.resize(N); + std::vector> XY(N); for (std::size_t i = 0; i < N; i++) { XY[i].resize(dx + dy); std::size_t k = 0; @@ -87,9 +88,10 @@ namespace { XY[i][k] = Y[i][j]; } } + return XY; } - template - std::vector unique(const std::vector& data) + template > + std::vector unique(const Container& data) { std::unordered_set hashes; std::vector result; @@ -104,11 +106,11 @@ namespace { template double entropy( - std::vector data, std::size_t k, L logbase, Metric metric) + const Container & data, std::size_t k, L logbase, const Metric & metric) { - using T = typename Container::value_type; - - if (data.empty() || data[0].empty()) { + using T_underlaying = type_traits::underlaying_type_t; + using T = type_traits::index_value_type_t; + if (data.size() == 0 || data[0].size() == 0) { return 0; } if (data.size() < k + 1) @@ -120,17 +122,17 @@ double entropy( double two = 2.0; // this is in order to make types match the log template function double cb = d * log(logbase, two); - if constexpr (!std::is_same>::value) { - if constexpr (std::is_same>::value) { + if constexpr (!std::is_same>::value) { + if constexpr (std::is_same>::value) { p = 2; - } else if constexpr (std::is_same>::value) { + } else if constexpr (std::is_same>::value) { p = metric.p; } cb = cb + d * log(logbase, std::tgamma(1 + 1 / p)) - log(logbase, std::tgamma(1 + d / p)); } //add_noise(data); // TODO test - metric::Tree tree(data, -1, metric); + metric::Tree tree(data, -1, metric); double entropyEstimate = boost::math::digamma(N) - boost::math::digamma(k) + cb + d * log(logbase, two); for (std::size_t i = 0; i < N; i++) { auto res = tree.knn(data[i], k + 1); @@ -142,10 +144,13 @@ double entropy( // Kozachenko-Leonenko estimator based on https://hal.archives-ouvertes.fr/hal-00331300/document (Shannon diff. entropy, // q = 1) -template , typename L = T> // TODO check if L = T is correct -typename std::enable_if::value, T>::type entropy_kl( - std::vector> data, std::size_t k = 3, L logbase = 2, Metric metric = Metric()) +template >, + typename L = type_traits::underlaying_type_t> // TODO check if L = T is correct +std::enable_if_t, L> +entropy_kl( const Container & data, std::size_t k = 3, L logbase = 2, const Metric &metric = Metric()) { + using T1 = type_traits::index_value_type_t; + using T = type_traits::underlaying_type_t; if (data.empty() || data[0].empty()) return 0; if (data.size() < k + 1) @@ -153,7 +158,7 @@ typename std::enable_if::value, T>::type entropy_kl( if constexpr (!std::is_same>::value) throw std::logic_error("entropy function is now implemented only for Euclidean distance"); - metric::Tree, Metric> tree(data, -1, metric); + metric::Tree tree(data, -1, metric); size_t N = data.size(); size_t m = data[0].size(); @@ -172,8 +177,8 @@ typename std::enable_if::value, T>::type entropy_kl( return sum; } -template -std::pair, std::vector>> pluginEstimator(const std::vector>& Y) +template +std::pair, Container> pluginEstimator(const Container & Y) { std::vector> uniqueVal = unique(Y); std::vector counts(uniqueVal.size()); @@ -189,10 +194,14 @@ std::pair, std::vector>> pluginEstimator(cons return std::make_pair(counts, uniqueVal); } -template -typename std::enable_if::value, T>::type mutualInformation( - const std::vector>& Xc, const std::vector>& Yc, int k, Metric metric, int version) +template +typename std::enable_if_t, + type_traits::underlaying_type_t> +mutualInformation( + const Container & Xc, const Container & Yc, int k, const Metric & metric, int version) { + using T = type_traits::underlaying_type_t; + using V = type_traits::index_value_type_t; T N = Xc.size(); if (N < k + 1 || Yc.size() < k + 1) @@ -202,15 +211,14 @@ typename std::enable_if::value, T>::type mutualInformation( auto Y = Yc; add_noise(X); add_noise(Y); - std::vector> XY; - combine(X, Y, XY); + std::vector> XY = combine(X, Y); metric::Tree, Metric> tree(XY, -1, metric); auto entropyEstimate = boost::math::digamma(k) + boost::math::digamma(N); if (version == 2) { entropyEstimate -= 1 / static_cast(k); } - metric::Tree, Metric> xTree(X, -1, metric); + metric::Tree xTree(X, -1, metric); for (std::size_t i = 0; i < N; i++) { auto res = tree.knn(XY[i], k + 1); @@ -239,83 +247,87 @@ typename std::enable_if::value, T>::type mutualInformation( return entropyEstimate; } -template -typename std::enable_if::value, T>::type mutualInformation( - const std::vector>& Xc, const std::vector>& Yc, T logbase) +template +std::enable_if_t, T> +mutualInformation(const Container& Xc, const Container& Yc, T logbase) { - std::vector> XY; - combine(Xc, Yc, XY); - return entropy(Xc, logbase) - + entropy(Yc, + std::vector> XY = combine(Xc, Yc); + return entropy(Xc, logbase) + + entropy(Yc, logbase) // entropy overload for integers is not implemented yet - - entropy(XY, logbase); + - entropy(XY, logbase); } -template -typename std::enable_if::value, T>::type variationOfInformation( - const std::vector>& Xc, const std::vector>& Yc, int k, T logbase) +template +typename std::enable_if_t, T> +variationOfInformation(const C& Xc, const C& Yc, int k, T logbase) { - return entropy, Metric>(Xc, k, logbase, Metric()) + entropy, Metric>(Yc, k, logbase, Metric()) - - 2 * mutualInformation(Xc, Yc, k); + return entropy(Xc, k, logbase, Metric()) + entropy(Yc, k, logbase, Metric()) + - 2 * mutualInformation(Xc, Yc, k); } -template -typename std::enable_if::value, T>::type variationOfInformation_normalized( - const std::vector>& Xc, const std::vector>& Yc, int k, T logbase) +template +typename std::enable_if_t, T> +variationOfInformation_normalized( + const C& Xc, const C& Yc, int k, T logbase) { using Cheb = metric::Chebyshev; - auto mi = mutualInformation(Xc, Yc, k); - return 1 - (mi / (entropy, Cheb>(Xc, k, logbase, Cheb()) + entropy, Cheb>(Yc, k, logbase, Cheb()) - mi)); + auto mi = mutualInformation(Xc, Yc, k); + return 1 - (mi / (entropy(Xc, k, logbase, Cheb()) + entropy(Yc, k, logbase, Cheb()) - mi)); +} + +template +template +typename std::enable_if_t, V> // only real values are accepted +VOI::operator()(const C& a, const C& b) const +{ + using Cheb = metric::Chebyshev; + return entropy(a, k, logbase, Cheb()) + entropy(b, k, logbase, Cheb()) + - 2 * mutualInformation(a, b, k); } template -template