Skip to content

Commit

Permalink
Merge Pull Request #10002 from trilinos/Trilinos/csiefer-oh-gosh-this…
Browse files Browse the repository at this point in the history
…-thing-is-picky

Automatically Merged using Trilinos Pull Request AutoTester
PR Title: Tpetra: Fixing WrappedDualView usage bug
PR Author: csiefer2
  • Loading branch information
trilinos-autotester authored Dec 16, 2021
2 parents 9732a48 + bfe17e7 commit 93bbee4
Show file tree
Hide file tree
Showing 4 changed files with 573 additions and 554 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ int executeInsertGlobalIndicesFESP_(const Teuchos::RCP<const Teuchos::Comm<int>
fe_matrix = rcp(new fe_matrix_type(fe_graph));
rhs = rcp (new fe_multivector_type(domain_map, fe_graph->getImporter(), 1));

Kokkos::View<local_ordinal_type[4][4], deviceType> element_matrix ("element_matrix");
Kokkos::View<local_ordinal_type[4][4], hostType> element_matrix ("element_matrix");
Teuchos::Array<Scalar> element_rhs(4);

Teuchos::Array<global_ordinal_type> column_global_ids(4); // global column ids list
Expand Down Expand Up @@ -376,7 +376,6 @@ int executeInsertGlobalIndicesFESPKokkos_(const Teuchos::RCP<const Teuchos::Comm
auto domain_map = row_map;
auto range_map = row_map;

auto owned_element_to_node_ids = mesh.getOwnedElementToNode().getHostView(Tpetra::Access::ReadOnly);

Teuchos::TimeMonitor::getStackedTimer()->startBaseTimer();

Expand All @@ -388,8 +387,8 @@ int executeInsertGlobalIndicesFESPKokkos_(const Teuchos::RCP<const Teuchos::Comm
Teuchos::Array<global_ordinal_type> global_ids_in_row(4);

{
TimeMonitor timerElementLoopGraph
(*TimeMonitor::getNewTimer("1) ElementLoop (Graph)"));
TimeMonitor timerElementLoopGraph(*TimeMonitor::getNewTimer("1) ElementLoop (Graph)"));
auto owned_element_to_node_ids = mesh.getOwnedElementToNode().getHostView(Tpetra::Access::ReadOnly);

// for each element in the mesh...
Tpetra::beginAssembly(*fe_graph);
Expand Down Expand Up @@ -421,12 +420,14 @@ int executeInsertGlobalIndicesFESPKokkos_(const Teuchos::RCP<const Teuchos::Comm
}
}


// Call fillComplete on the fe_graph to 'finalize' it.
{
TimeMonitor timer(*TimeMonitor::getNewTimer("2) FillComplete (Graph)"));
Tpetra::endAssembly(*fe_graph);
}


// Print out verbose information about the fe_graph.
if(opts.verbose) fe_graph->describe(out, Teuchos::VERB_EXTREME);

Expand Down Expand Up @@ -467,31 +468,34 @@ int executeInsertGlobalIndicesFESPKokkos_(const Teuchos::RCP<const Teuchos::Comm
// - sumIntoGlobalValues( 6, [ 2 3 7 6 ], [ -1 0 -1 2 ])
RCP<TimeMonitor> timerElementLoopMemory = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("3.1) ElementLoop (Memory)")));

// Because we're processing elements in parallel, we need storage for all of them
int numOwnedElements = mesh.getNumOwnedElements();
int nperel = mesh.getOwnedElementToNode().extent(1);

RCP<fe_matrix_type> fe_matrix = rcp(new fe_matrix_type(fe_graph));
RCP<fe_multivector_type> rhs =
rcp (new fe_multivector_type(domain_map, fe_graph->getImporter(), 1));


auto localMatrix = fe_matrix->getLocalMatrixDevice();
auto localRHS = rhs->getLocalViewDevice(Tpetra::Access::OverwriteAll);
auto localMap = owned_plus_shared_map->getLocalMap();
auto localColMap = fe_matrix->getColMap()->getLocalMap();

// Because we're processing elements in parallel, we need storage for all of them
int numOwnedElements = mesh.getNumOwnedElements();
int nperel = owned_element_to_node_ids.extent(1);

pair_type alln = pair_type(0,nperel);
scalar_2d_array_type all_element_matrix("all_element_matrix",nperel*numOwnedElements);
scalar_1d_array_type all_element_rhs("all_element_rhs",nperel*numOwnedElements);
local_ordinal_single_view_type all_lcids("all_lids",nperel*numOwnedElements);

timerElementLoopMemory=Teuchos::null;

{
TimeMonitor timerElementLoopMatrix
(*TimeMonitor::getNewTimer ("3.2) ElementLoop (Matrix)"));
TimeMonitor timerElementLoopMatrix(*TimeMonitor::getNewTimer ("3.2) ElementLoop (Matrix)"));
Tpetra::beginAssembly(*fe_matrix,*rhs);

auto owned_element_to_node_ids = mesh.getOwnedElementToNode().getDeviceView(Tpetra::Access::ReadOnly);

// Loop over elements
Tpetra::beginAssembly(*fe_matrix,*rhs);
auto localRHS = rhs->getLocalViewDevice(Tpetra::Access::OverwriteAll);
// Tpetra::beginAssembly(*fe_matrix,*rhs);
Kokkos::parallel_for
("Assemble FE matrix and right-hand side",
Kokkos::RangePolicy<execution_space, int> (0, numOwnedElements),
Expand All @@ -512,14 +516,16 @@ int executeInsertGlobalIndicesFESPKokkos_(const Teuchos::RCP<const Teuchos::Comm
element_lcids(element_node_idx) =
localColMap.getLocalElement (owned_element_to_node_ids (element_gidx, element_node_idx));
}

// For each node (row) on the current element:
// - populate the values array
// - add the values to the fe_matrix.

for (int element_node_idx = 0; element_node_idx < nperel; ++element_node_idx) {
const local_ordinal_type local_row_id =
localMap.getLocalElement (owned_element_to_node_ids (element_gidx, element_node_idx));
auto row_values = Kokkos::subview(element_matrix, element_node_idx, alln);

// Force atomics on sums
for (int col_idx = 0; col_idx < nperel; ++col_idx) {
localMatrix.sumIntoValues (local_row_id, &element_lcids(col_idx), 1,
Expand All @@ -528,21 +534,25 @@ int executeInsertGlobalIndicesFESPKokkos_(const Teuchos::RCP<const Teuchos::Comm
}
Kokkos::atomic_add (&(localRHS(local_row_id,0)), element_rhs[element_node_idx]);
}

});
}


// After the contributions are added, 'finalize' the matrix using fillComplete()
{
TimeMonitor timer(*TimeMonitor::getNewTimer("4) FillComplete (Matrix)"));
Tpetra::endAssembly(*fe_matrix);
}


{
// Global assemble the RHS
TimeMonitor timer(*TimeMonitor::getNewTimer("5) GlobalAssemble (RHS)"));
Tpetra::endAssembly(*rhs);
}


Teuchos::TimeMonitor::getStackedTimer()->stopBaseTimer();

// Print out fe_matrix details.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,10 @@ MeshDatabase::MeshDatabase(Teuchos::RCP<const Teuchos::Comm<int> > comm,
num_my_nodes *= (myNodeStop_[k]-myNodeStart_[k]);
}


// Generate the owned element ids
ownedElementGlobalIDs_ = global_ordinal_view_type(globalDualViewType("ownedElementGlobalIDs_",num_my_elements));
auto ownedElementGlobalIDs = ownedElementGlobalIDs_.getHostView(Tpetra::Access::ReadWrite);
Kokkos::resize(ownedElementGlobalIDs,num_my_elements);
int ect=0;
for(global_ordinal_type j=myElementStart_[1]; j<myElementStop_[1]; j++) {
for(global_ordinal_type i=myElementStart_[0]; i<myElementStop_[0]; i++) {
Expand All @@ -213,10 +214,12 @@ MeshDatabase::MeshDatabase(Teuchos::RCP<const Teuchos::Comm<int> > comm,
ect++;
}
}



// Generate the owned node ids
ownedNodeGlobalIDs_ = global_ordinal_view_type(globalDualViewType("ownedNodeGlobalIDs_",num_my_nodes));
auto _ownedNodeGlobalIDs = ownedNodeGlobalIDs_.getHostView(Tpetra::Access::ReadWrite);
Kokkos::resize(_ownedNodeGlobalIDs,num_my_nodes);

int nct=0;
for(global_ordinal_type j=myNodeStart_[1]; j<myNodeStop_[1]; j++) {
for(global_ordinal_type i=myNodeStart_[0]; i<myNodeStop_[0]; i++) {
Expand All @@ -226,10 +229,11 @@ MeshDatabase::MeshDatabase(Teuchos::RCP<const Teuchos::Comm<int> > comm,
}
}


// Generate the element-to-node map
// NOTE: Hardwired to QUAD4's. Nodes are ordered exodus-style (counter-clockwise) within an element
ownedElementToNode_ = global_ordinal_2d_array_type(global2DArrayDualViewType("ownedElementToNode_",num_my_elements));
auto _ownedElementToNode = ownedElementToNode_.getHostView(Tpetra::Access::ReadWrite);
Kokkos::resize(_ownedElementToNode,num_my_elements);
int cct=0;
for(global_ordinal_type j=myElementStart_[1]; j<myElementStop_[1]; j++) {
for(global_ordinal_type i=myElementStart_[0]; i<myElementStop_[0]; i++) {
Expand All @@ -248,9 +252,9 @@ MeshDatabase::MeshDatabase(Teuchos::RCP<const Teuchos::Comm<int> > comm,
// NOTE: This only generates a halo for elements where I own at least one node. On the x/y hi sides,
// the highers element does not own all the nodes on that proc. Ergo, no halo in that direction
std::vector<global_ordinal_type> my_ghost_elements;
for(global_ordinal_type j=myElementStart_[1]-1; j<myElementStop_[1]; j++) {
for(global_ordinal_type j=myElementStart_[1]-1; j<myElementStop_[1]+1; j++) {
if(j<0 || j>=globalElements_[1]) continue; // Ignore stuff off the mesh
for(global_ordinal_type i=myElementStart_[0]-1; i<myElementStop_[0]; i++) {
for(global_ordinal_type i=myElementStart_[0]-1; i<myElementStop_[0]+1; i++) {
if(i<0 || i>=globalElements_[0]) continue; // Ignore stuff off the mesh

// Ignore proc interior
Expand All @@ -263,10 +267,10 @@ MeshDatabase::MeshDatabase(Teuchos::RCP<const Teuchos::Comm<int> > comm,
}

// NOTE: This are not recorded in Aztec/Ifpack/ML ordering. Because most apps don't do that.
ghostElementGlobalIDs_ = global_ordinal_view_type(globalDualViewType("ghostElementGlobalIDs_",my_ghost_elements.size()));
ghostElementToNode_ = global_ordinal_2d_array_type(global2DArrayDualViewType("ghostElementToNode_",my_ghost_elements.size()));
auto _ghostElementGlobalIDs = ghostElementGlobalIDs_.getHostView(Tpetra::Access::ReadWrite);
auto _ghostElementToNode = ghostElementToNode_.getHostView(Tpetra::Access::ReadWrite);
Kokkos::resize(_ghostElementGlobalIDs,my_ghost_elements.size());
Kokkos::resize(_ghostElementToNode,my_ghost_elements.size());
for(size_t k=0; k<my_ghost_elements.size(); k++) {
global_ordinal_type i,j, eidx= my_ghost_elements[k];
_ghostElementGlobalIDs(k) = eidx;
Expand All @@ -293,11 +297,13 @@ MeshDatabase::MeshDatabase(Teuchos::RCP<const Teuchos::Comm<int> > comm,
}
}

auto _ghostNodeGlobalIDs = ghostNodeGlobalIDs_.getHostView(Tpetra::Access::ReadWrite);
Kokkos::resize(_ghostNodeGlobalIDs,my_ghost_nodes.size());
for(auto k=my_ghost_nodes.begin(); k!=my_ghost_nodes.end(); k++) {
size_t kk = std::distance(my_ghost_nodes.begin(),k);
ghostNodeGlobalIDs_ = global_ordinal_view_type(globalDualViewType("ghostNodeGlobalIDs_",my_ghost_nodes.size()));
{
auto _ghostNodeGlobalIDs = ghostNodeGlobalIDs_.getHostView(Tpetra::Access::ReadWrite);
for(auto k=my_ghost_nodes.begin(); k!=my_ghost_nodes.end(); k++) {
size_t kk = std::distance(my_ghost_nodes.begin(),k);
_ghostNodeGlobalIDs(kk) = *k;
}
}

initializeOwnedAndGhostNodeGlobalIDs();
Expand All @@ -308,9 +314,8 @@ MeshDatabase::MeshDatabase(Teuchos::RCP<const Teuchos::Comm<int> > comm,
void MeshDatabase::initializeOwnedAndGhostNodeGlobalIDs(void)
{
size_t total_size = getNumOwnedNodes() + getNumGhostNodes();

ownedAndGhostNodeGlobalIDs_ = global_ordinal_view_type(globalDualViewType("ownedAndGhostGlobalIDs_",total_size));
auto _ownedAndGhostNodeGlobalIDs = ownedAndGhostNodeGlobalIDs_.getHostView(Tpetra::Access::ReadWrite);
Kokkos::resize(_ownedAndGhostNodeGlobalIDs, total_size);

{
size_t insert_idx = 0;
Expand All @@ -331,8 +336,8 @@ void MeshDatabase::initializeOwnedAndGhostNodeGlobalIDs(void)
void MeshDatabase::initializeOwnedAndGhostElementGlobalIDs(void)
{
size_t total_size = getNumOwnedElements() + getNumGhostElements();
ownedAndGhostElementGlobalIDs_ = global_ordinal_view_type(globalDualViewType("ownedAndGhostElementIDs_",total_size));
auto _ownedAndGhostElementGlobalIDs = ownedAndGhostElementGlobalIDs_.getHostView(Tpetra::Access::ReadWrite);
Kokkos::resize(_ownedAndGhostElementGlobalIDs, total_size);

{
size_t insert_idx = 0;
Expand All @@ -351,13 +356,13 @@ void MeshDatabase::initializeOwnedAndGhostElementGlobalIDs(void)



void MeshDatabase::print(std::ostream & oss)
void MeshDatabase::print(std::ostream & outstream)
{
std::ostringstream ss;
std::ostringstream ss,oss;
ss<<"["<<MyRank_<<","<<myProcIJ_[0]<<","<<myProcIJ_[1]<<"]";
oss<<ss.str()<<" Global Elements = ["<<globalElements_[0]<<"x"<<globalElements_[1]<<"] Nodes ="<<globalNodes_[0]<<"x"<<globalNodes_[1]<<"]\n";
oss<<ss.str()<<" Stop/Start Elements = ["<<myElementStart_[0]<<","<<myElementStop_[0]<<")x["<<myElementStart_[1]<<","<<myElementStop_[1]<<")\n";
oss<<ss.str()<<" Stop/Start Nodes = ["<<myNodeStart_[0]<<","<<myNodeStop_[0]<<")x["<<myNodeStart_[1]<<","<<myNodeStop_[1]<<")\n";
oss<<ss.str()<<" Global Elements = ["<<globalElements_[0]<<"x"<<globalElements_[1]<<"] Nodes = ["<<globalNodes_[0]<<"x"<<globalNodes_[1]<<"]\n";
oss<<ss.str()<<" Start/Stop Elements = ["<<myElementStart_[0]<<","<<myElementStop_[0]<<")x["<<myElementStart_[1]<<","<<myElementStop_[1]<<")\n";
oss<<ss.str()<<" Start/Stop Nodes = ["<<myNodeStart_[0]<<","<<myNodeStop_[0]<<")x["<<myNodeStart_[1]<<","<<myNodeStop_[1]<<")\n";

oss<<ss.str()<<" Owned Global Elements = ";
{
Expand Down Expand Up @@ -431,7 +436,7 @@ void MeshDatabase::print(std::ostream & oss)
}
}

oss<<std::endl;
outstream<<oss.str()<<std::endl;
}


Expand Down
Loading

0 comments on commit 93bbee4

Please sign in to comment.