From c39c945b7a81c5c1130db566985c3b919a0b9560 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Sat, 12 Mar 2016 17:27:15 +0800 Subject: [PATCH] TMP Tpetra::CrsGraph::getLocalDiagOffsets: Partway to #212 @trilinos/tpetra I've gotten you partway to #212. The rest is really just writing a device functor. --- packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp | 148 +++++++++++++++-------- 1 file changed, 100 insertions(+), 48 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index 653fa85..f1c331c 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -5831,68 +5831,120 @@ namespace Tpetra { bool allOffsetsCorrect = true; bool noOtherWeirdness = true; std::vector > wrongOffsets; - auto localGraph = this->getLocalGraph (); #endif // HAVE_TPETRA_DEBUG + // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just + // the subset of Map functionality that we need below. + auto lclRowMap = rowMap.getLocalMap (); + auto lclColMap = colMap.getLocalMap (); + // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this // setup, at least on the host. For CUDA, we have to use LocalMap // (that comes from each of the two Maps). - for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) { - const GO gblRowInd = rowMap.getGlobalElement (lclRowInd); - const GO gblColInd = gblRowInd; - const LO lclColInd = colMap.getLocalElement (gblColInd); + if (isFillComplete ()) { + // mfh 12 Mar 2016: The fillComplete branch may be parallelized + // on the (CUDA) device. I've gotten you most of the way there: + // all functions called are marked as Kokkos device functions, + // and all Views are device Views. + + auto lclGraph = this->getLocalGraph (); + // Get the MemoryUnmanaged version of the graph. This avoids + // memory management overhead in the parallel kernel below. + // + // FIXME (mfh 12 Mar 2016) There's currently no way to make a + // MemoryUnmanaged Kokkos::StaticCrsGraph. Thus, we have to do + // this separately for its offsets and column indices. + Kokkos::View ptr = lclGraph.row_map; + Kokkos::View ind = lclGraph.entries; + const size_t INV = Tpetra::Details::OrdinalTraits::invalid (); + + for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) { + const GO gblRowInd = lclRowMap.getGlobalElement (lclRowInd); + const GO gblColInd = gblRowInd; + const LO lclColInd = lclColMap.getLocalElement (gblColInd); + + if (lclColInd == INV) { + offsets[lclRowInd] = INV; + } + else { + // Could be empty, but that's OK. + const LO numEnt = ptr[lclRowInd+1] - ptr[lclRowInd]; + // std::pair doesn't have its methods marked as device functions, + // so we have to use Kokkos::pair. + auto lclColInds = + Kokkos::subview (ind, Kokkos::make_pair (ptr[lclRowInd], + ptr[lclRowInd+1])); + using Kokkos::Details::findRelOffset; + const LO offset = + findRelOffset (lclColInds, numEnt, + lclColInd, 0, + this->isSorted ()); + offsets[lclRowInd] = (offset == numEnt) ? INV : + static_cast (offset); + } + } + } + else { + for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) { + const GO gblRowInd = lclRowMap.getGlobalElement (lclRowInd); + const GO gblColInd = gblRowInd; + const LO lclColInd = lclColMap.getLocalElement (gblColInd); - if (lclColInd == Teuchos::OrdinalTraits::invalid ()) { + if (lclColInd == Tpetra::Details::OrdinalTraits::invalid ()) { #ifdef HAVE_TPETRA_DEBUG - allRowMapDiagEntriesInColMap = false; + allRowMapDiagEntriesInColMap = false; #endif // HAVE_TPETRA_DEBUG - offsets[lclRowInd] = Teuchos::OrdinalTraits::invalid (); - } - else { - const RowInfo rowInfo = this->getRowInfo (lclRowInd); - if (static_cast (rowInfo.localRow) == lclRowInd && - rowInfo.numEntries > 0) { - const size_t offset = this->findLocalIndex (rowInfo, lclColInd); - offsets(lclRowInd) = offset; + offsets[lclRowInd] = Tpetra::Details::OrdinalTraits::invalid (); + } + else { + const RowInfo rowInfo = this->getRowInfo (lclRowInd); + if (static_cast (rowInfo.localRow) == lclRowInd && + rowInfo.numEntries > 0) { + const size_t offset = this->findLocalIndex (rowInfo, lclColInd); + offsets(lclRowInd) = offset; #ifdef HAVE_TPETRA_DEBUG - // Now that we have what we think is an offset, make sure - // that it really does point to the diagonal entry. Offsets - // are _relative_ to each row, not absolute (for the whole - // (local) graph). - Teuchos::ArrayView lclColInds; - try { - this->getLocalRowView (lclRowInd, lclColInds); - } - catch (...) { - noOtherWeirdness = false; - } - // Don't continue with error checking if the above failed. - if (noOtherWeirdness) { - const size_t numEnt = lclColInds.size (); - if (offset >= numEnt) { - // Offsets are relative to each row, so this means that - // the offset is out of bounds. - allOffsetsCorrect = false; - wrongOffsets.push_back (std::make_pair (lclRowInd, offset)); - } else { - const LO actualLclColInd = lclColInds[offset]; - const GO actualGblColInd = colMap.getGlobalElement (actualLclColInd); - if (actualGblColInd != gblColInd) { - allOffsetsCorrect = false; - wrongOffsets.push_back (std::make_pair (lclRowInd, offset)); - } - } - } + // Now that we have what we think is an offset, make sure + // that it really does point to the diagonal entry. Offsets + // are _relative_ to each row, not absolute (for the whole + // (local) graph). + Teuchos::ArrayView lclColInds; + try { + this->getLocalRowView (lclRowInd, lclColInds); + } + catch (...) { + noOtherWeirdness = false; + } + // Don't continue with error checking if the above failed. + if (noOtherWeirdness) { + const size_t numEnt = lclColInds.size (); + if (offset >= numEnt) { + // Offsets are relative to each row, so this means that + // the offset is out of bounds. + allOffsetsCorrect = false; + wrongOffsets.push_back (std::make_pair (lclRowInd, offset)); + } else { + const LO actualLclColInd = lclColInds[offset]; + const GO actualGblColInd = lclColMap.getGlobalElement (actualLclColInd); + if (actualGblColInd != gblColInd) { + allOffsetsCorrect = false; + wrongOffsets.push_back (std::make_pair (lclRowInd, offset)); + } + } + } #endif // HAVE_TPETRA_DEBUG - } - else { - offsets(lclRowInd) = Teuchos::OrdinalTraits::invalid (); + } + else { + offsets(lclRowInd) = Tpetra::Details::OrdinalTraits::invalid (); #ifdef HAVE_TPETRA_DEBUG - allDiagEntriesFound = false; + allDiagEntriesFound = false; #endif // HAVE_TPETRA_DEBUG - } + } + } } } -- 2.5.4 (Apple Git-61)