Skip to content

Commit

Permalink
Tpetra: address issue #41 for threading CrsMatrix::getLocalDiagCopy
Browse files Browse the repository at this point in the history
@trilinos/tpetra

This threads the function for host execution space. Was identified as
serial bottleneck for a Nalu run.
  • Loading branch information
crtrott committed Dec 3, 2015
1 parent 8b08f3c commit e2d256c
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2879,7 +2879,7 @@ namespace Tpetra {

// Find the diagonal entries and put them in lclVecHost1d.
const size_t myNumRows = getNodeNumRows ();
for (size_t i = 0; i < myNumRows; ++i) {
Kokkos::parallel_for ( Kokkos::RangePolicy<host_execution_space>( 0, myNumRows), [&] (const size_t& i) {
lclVecHost1d(i) = STS::zero (); // default value if no diag entry
if (offsets[i] != Teuchos::OrdinalTraits<size_t>::invalid ()) {
ArrayView<const LocalOrdinal> ind;
Expand All @@ -2890,7 +2890,7 @@ namespace Tpetra {
this->getLocalRowView (i, ind, val);
lclVecHost1d(i) = static_cast<impl_scalar_type> (val[offsets[i]]);
}
}
});
lclVec.template sync<execution_space> (); // sync changes back to device
}

Expand Down

0 comments on commit e2d256c

Please sign in to comment.