From e2d256ca31df4bc62f2ba33059ca8589eee28121 Mon Sep 17 00:00:00 2001 From: crtrott Date: Wed, 2 Dec 2015 22:58:56 -0700 Subject: [PATCH] Tpetra: address issue #41 for threading CrsMatrix::getLocalDiagCopy @trilinos/tpetra This threads the function for host execution space. Was identified as serial bottleneck for a Nalu run. --- packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index 9d682fcb84cd..20c4d7f0827b 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -2879,7 +2879,7 @@ namespace Tpetra { // Find the diagonal entries and put them in lclVecHost1d. const size_t myNumRows = getNodeNumRows (); - for (size_t i = 0; i < myNumRows; ++i) { + Kokkos::parallel_for ( Kokkos::RangePolicy( 0, myNumRows), [&] (const size_t& i) { lclVecHost1d(i) = STS::zero (); // default value if no diag entry if (offsets[i] != Teuchos::OrdinalTraits::invalid ()) { ArrayView ind; @@ -2890,7 +2890,7 @@ namespace Tpetra { this->getLocalRowView (i, ind, val); lclVecHost1d(i) = static_cast (val[offsets[i]]); } - } + }); lclVec.template sync (); // sync changes back to device }