From 6af109c85b92fe3504f961896f659ddaac0182c7 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Sat, 12 Mar 2016 16:06:33 +0800 Subject: [PATCH 4/4] Kokkos::StaticCrsGraph: Add binary search to findRelOffset. See https://github.com/trilinos/Trilinos/issues/205 for discussion. This commit optimizes the function for the case where the input array to search is sorted, by using binary search in that case. This should make its performance comparable to that of Tpetra::CrsGraph::findLocalIndex or Epetra_CrsGraph::FindMyIndexLoc. Next step is to optimize for the case of short rows, by using linear search in that case. This should actually improve on both Epetra and Tpetra, and possibly address https://github.com/trilinos/Trilinos/issues/118. --- .../containers/src/Kokkos_StaticCrsGraph.hpp | 56 +++++++++++++++++++--- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/packages/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp b/packages/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp index d6bff23..c068692 100644 --- a/packages/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp +++ b/packages/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp @@ -200,7 +200,7 @@ findRelOffset (const IndexViewType& indsToSearch, /* typename IndexViewType::const_value_type */ const typename std::decay::type indToFind, const OffsetType hint, - const bool /* isSorted */) + const bool isSorted) { // IndexViewType doesn't have to be a Kokkos::View; it just has to // implement operator[] like a 1-D array. @@ -215,13 +215,57 @@ findRelOffset (const IndexViewType& indsToSearch, if (hint < numEnt && indsToSearch[hint] == indToFind) { return hint; // hint was correct } - - // Start with simple linear search for now. - for (OffsetType k = 0; k < numEnt; ++k) { - if (indsToSearch[k] == indToFind) { - return k; + +#if 0 + // Even if the array is sorted, use linear search if the number of + // entries is small ("small" is a tuning parameter; feel free to + // tune for your architecture). 'constexpr' promises the compiler + // that it can bake this constant as a literal into the code. + constexpr OffsetType linearSearchThreshold = 16; + + if (! isSorted || numEnt < linearSearchThreshold) { +#else + if (! isSorted) { +#endif + for (OffsetType k = 0; k < numEnt; ++k) { + if (indsToSearch[k] == indToFind) { + return k; + } } } + else { // use binary search + OffsetType start = 0; + OffsetType end = numEnt; + // Compare epetra/src/Epetra_Util.cpp, Epetra_Util_binary_search. + // Unlike that function, I don't use end = numEnt-1, because I + // want this code to work also for unsigned OffsetType (signed is + // preferred, though). Thus, in my code, end is always "one past + // the last valid index." + while (end > start) { + // Invariants: 0 <= start < end, thus start + end > 0. + const OffsetType mid = (start + end - 1) / 2; + // Invariants: 0 <= start <= mid < end. + if (indsToSearch[mid] < indToFind) { + // Invariant: start < mid+1 (thus, recursion terminates), + // and for all k <= mid, indsToSearch[k] < indToFind. + start = mid + 1; // Invariant: 0 < mid < start <= end. + } + else { // indsToSearch[mid] >= indToFind + // Invariant: mid < end (thus, recursion terminates), + // and for all k <= mid, indsToSearch[k] >= indToFind. + end = mid; // Invariant: 0 <= start <= mid <= end. + } + } + // Invariant: 0 <= start == end. + + // Don't actually check the first entry if numEnt == 0. If numEnt + // > 0 and indsToSearch == NULL, that's the caller's problem. + if (numEnt > static_cast (0) && + indsToSearch[start] == indToFind) { + return start; + } + } + return numEnt; // "end of sequence" } -- 2.5.4 (Apple Git-61)