Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Porting some optimization cases to run on GPU without UVM #1086

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
1 change: 0 additions & 1 deletion src/Albany_Application.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3144,7 +3144,6 @@ Application::loadWorksetBucketInfo(PHAL::Workset& workset, const int& ws,
workset.EBName = wsEBNames[ws];
workset.wsIndex = ws;

workset.local_Vp.resize(workset.numCells);

workset.savedMDFields = phxSetup->get_saved_fields(evalName);

Expand Down
2 changes: 1 addition & 1 deletion src/PHAL_Workset.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ struct Workset
Teuchos::RCP<Albany::DistributedParameterLibrary> distParamLib;
std::string dist_param_deriv_name;
bool transpose_dist_param_deriv;
Teuchos::ArrayRCP<Teuchos::ArrayRCP<Teuchos::ArrayRCP<double>>> local_Vp;
Kokkos::View<double***, PHX::Device> local_Vp;

Teuchos::ArrayRCP<Teuchos::ArrayRCP<const double*>> wsCoords;
std::string EBName;
Expand Down
60 changes: 60 additions & 0 deletions src/disc/Albany_DOFManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ void DOFManager::build ()
// 4. Possibly restrict the DOFs list
restrict (part_name());

// Allocate/populate nodes view
convertVecsToViews (m_subcell_closures, m_subcell_closures_view, "m_subcell_closures_view");
if (m_topo_dim>2 && m_top_bot_well_defined)
convertVecsToViews (m_side_closure_orderd_as_side, m_side_closure_orderd_as_side_view, "m_side_closure_orderd_as_side_view");

// Done
m_built = true;
}
Expand Down Expand Up @@ -161,13 +166,30 @@ getGIDFieldOffsetsSubcell (int fieldNum,
return m_subcell_closures[fieldNum][subcell_dim][subcell_pos];
}

Kokkos::Subview<Kokkos::View<int****,PHX::Device>,int,int,int,decltype(Kokkos::ALL)>
DOFManager::
getGIDFieldOffsetsSubcellKokkos (int fieldNum,
int subcell_dim,
int subcell_pos) const
{
return Kokkos::subview(m_subcell_closures_view,fieldNum,subcell_dim,subcell_pos,Kokkos::ALL);
}


const std::vector<int>&
DOFManager::
getGIDFieldOffsetsSide (int fieldNum, int side) const
{
return getGIDFieldOffsetsSubcell(fieldNum,m_topo_dim-1,side);
}

Kokkos::Subview<Kokkos::View<int****,PHX::Device>,int,int,int,decltype(Kokkos::ALL)>
DOFManager::
getGIDFieldOffsetsSideKokkos (int fieldNum, int side) const
{
return getGIDFieldOffsetsSubcellKokkos(fieldNum,m_topo_dim-1,side);
}

const std::vector<int>&
DOFManager::
getGIDFieldOffsetsSide (int fieldNum, int side, const int orderAsInSide) const
Expand All @@ -191,6 +213,44 @@ getGIDFieldOffsetsSide (int fieldNum, int side, const int orderAsInSide) const
return m_side_closure_orderd_as_side[fieldNum][side][orderAsInSide];
}

Kokkos::Subview<Kokkos::View<int****,PHX::Device>,int,int,int,decltype(Kokkos::ALL)>
DOFManager::
getGIDFieldOffsetsSideKokkos(int fieldNum, int side, const int orderAsInSide) const
{
return Kokkos::subview(m_side_closure_orderd_as_side_view,fieldNum,side,orderAsInSide,Kokkos::ALL);
}

void
DOFManager::
convertVecsToViews (vec4int& vec, Kokkos::View<int****, PHX::Device>& view, std::string view_name)
{
size_t max_size_i = vec.size();
size_t max_size_j = 0; size_t max_size_k = 0; size_t max_size_l = 0;
for (size_t i=0; i < vec.size(); ++i) {
max_size_j = std::max(max_size_j, vec[i].size());
for (size_t j=0; j < vec[i].size(); ++j) {
max_size_k = std::max(max_size_k, vec[i][j].size());
for (size_t k=0; k < vec[i][j].size(); ++k) {
max_size_l = std::max(max_size_l, vec[i][j][k].size());
}
}
}

view = Kokkos::View<int****,PHX::Device>(view_name,
max_size_i,
max_size_j,
max_size_k,
max_size_l);

const auto view_host = Kokkos::create_mirror_view(view);
for (size_t i=0; i < vec.size(); ++i)
for (size_t j=0; j < vec[i].size(); ++j)
for (size_t k=0; k < vec[i][j].size(); ++k)
for (size_t l=0; l < vec[i][j][k].size(); ++l)
view_host(i,j,k,l) = vec[i][j][k][l];
Kokkos::deep_copy(view,view_host);
}

void DOFManager::
restrict (const std::string& sub_part_name)
{
Expand Down
12 changes: 12 additions & 0 deletions src/disc/Albany_DOFManager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,18 +66,24 @@ class DOFManager : public panzer::DOFManager {
using panzer::DOFManager::getGIDFieldOffsets_closure;
const std::vector<int>&
getGIDFieldOffsetsSubcell (int fieldNum, int subcell_dim, int subcell_pos) const;
Kokkos::Subview<Kokkos::View<int****,PHX::Device>,int,int,int,decltype(Kokkos::ALL)>
getGIDFieldOffsetsSubcellKokkos (int fieldNum, int subcell_dim, int subcell_pos) const;

// Special case of the above, for subcell being the top or bottom side
// NOTE: only for quad/hexa/wedge
const std::vector<int>&
getGIDFieldOffsetsSide (int fieldNum, int side) const;
Kokkos::Subview<Kokkos::View<int****,PHX::Device>,int,int,int,decltype(Kokkos::ALL)>
getGIDFieldOffsetsSideKokkos (int fieldNum, int side) const;

// If side!=orderedAsInSide, this version returns side offsets ordered
// in such a way that off[i] on side=$side is directly above/below
// off[i] on side=$orderAsInSide.
// This makes sense ONLY IF $side and $orderAsInSide are both in {top,bot}
const std::vector<int>&
getGIDFieldOffsetsSide (int fieldNum, int side, int orderAsInSide) const;
Kokkos::Subview<Kokkos::View<int****,PHX::Device>,int,int,int,decltype(Kokkos::ALL)>
getGIDFieldOffsetsSideKokkos (int fieldNum, int side, int orderAsInSide) const;

const std::string& part_name () const {
return m_part_name;
Expand Down Expand Up @@ -123,10 +129,15 @@ class DOFManager : public panzer::DOFManager {
DualView<const int**> m_elem_dof_lids;

using vec4int = std::vector<std::vector<std::vector<std::vector<int>>>>;

// Convert DOF vec4ints to Kokkos Views for device access
void convertVecsToViews (vec4int& vec, Kokkos::View<int****, PHX::Device>& view, std::string view_name);

// m_subcell_closures[ifield][dim][ord] is the vector of the offsets of
// field $ifield on the $ord-th subcell of dimension $dim. More precisely,
// it's the closure of all offsets on all entities belonging to that subcell
vec4int m_subcell_closures;
Kokkos::View<int****, PHX::Device> m_subcell_closures_view;

// Shortcut for location of top/bot sides in the cell list of sides.
bool m_top_bot_well_defined = false;
Expand All @@ -145,6 +156,7 @@ class DOFManager : public panzer::DOFManager {
// dof at offset m_subcell_closures[F][side_dim][top]
// NOTE: this
vec4int m_side_closure_orderd_as_side;
Kokkos::View<int****, PHX::Device> m_side_closure_orderd_as_side_view;

Teuchos::RCP<ConnManager> m_conn_mgr;

Expand Down
1 change: 0 additions & 1 deletion src/disc/Albany_LayeredMeshNumbering.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ struct LayeredMeshNumbering {
return layerOrd ? 1 : numLayers;
}


T getColumnId (const T id) const {
return layerOrd ? id % numHorizEntities : id / numLayers;
}
Expand Down
8 changes: 4 additions & 4 deletions src/evaluators/bc/PHAL_Neumann_Def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -971,6 +971,8 @@ evaluateFields(typename Traits::EvalData workset)

this->evaluateNeumannContribution(workset);

const auto& local_Vp = workset.local_Vp;

constexpr auto ALL = Kokkos::ALL();
if (trans) {
const int neq = workset.numEqs;
Expand All @@ -979,7 +981,6 @@ evaluateFields(typename Traits::EvalData workset)
const auto elem_lids = workset.disc->getElementLIDs_host(workset.wsIndex);

for (size_t cell=0; cell<workset.numCells; ++cell) {
const auto& local_Vp = workset.local_Vp[cell];
const int num_deriv = local_Vp.size()/neq;
const auto elem_LID = elem_lids(cell);
const auto p_dof_lids = Kokkos::subview(p_elem_dof_lids,elem_LID,ALL);
Expand All @@ -992,7 +993,7 @@ evaluateFields(typename Traits::EvalData workset)
for (int node = 0; node < this->numNodes; ++node) {
for (int dim = 0; dim < this->numDOFsSet; ++dim){
int eq = this->offset[dim];
val += this->neumann(cell, node, dim).dx(i)*local_Vp[node*neq+eq][col];
val += this->neumann(cell, node, dim).dx(i)*local_Vp(cell,node*neq+eq,col);
}
}
fpV_nonconst2dView[col][row] += val;
Expand All @@ -1007,7 +1008,6 @@ evaluateFields(typename Traits::EvalData workset)
const auto& offsets = this->fields_offsets;
for (std::size_t cell=0; cell < workset.numCells; ++cell ) {
const auto elem_LID = elem_lids(cell);
const auto& local_Vp = workset.local_Vp[cell];
const int num_deriv = local_Vp.size();

const auto dof_lids = Kokkos::subview(elem_dof_lids,elem_LID,ALL);
Expand All @@ -1018,7 +1018,7 @@ evaluateFields(typename Traits::EvalData workset)
for (int col=0; col<num_cols; col++) {
double val = 0.0;
for (int i=0; i<num_deriv; ++i) {
val += this->neumann(cell, node, dim).dx(i)*local_Vp[i][col];
val += this->neumann(cell, node, dim).dx(i)*local_Vp(cell,i,col);
}
fpV_nonconst2dView[col][row] += val;
}
Expand Down
3 changes: 3 additions & 0 deletions src/evaluators/gather/PHAL_GatherScalarNodalParameter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ class GatherScalarNodalParameter<PHAL::AlbanyTraits::DistParamDeriv,Traits> :
void evaluateFields(typename Traits::EvalData d);
private:
typedef typename PHAL::AlbanyTraits::DistParamDeriv::ParamScalarT ParamScalarT;
protected:
using ExecutionSpace = typename PHX::Device::execution_space;
using RangePolicy = Kokkos::RangePolicy<ExecutionSpace>;
};


Expand Down
99 changes: 63 additions & 36 deletions src/evaluators/gather/PHAL_GatherScalarNodalParameter_Def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,17 +191,19 @@ evaluateFields(typename Traits::EvalData workset)

// Distributed parameter vector
const auto p = workset.distParamLib->get(this->param_name);
const auto p_data = Albany::getLocalData(p->overlapped_vector().getConst());
const auto p_data = Albany::getDeviceData(p->overlapped_vector().getConst());

const auto Vp = workset.Vp;
const auto Vp_data = !Vp.is_null() ? Albany::getLocalData(Vp) : Teuchos::null;
Albany::ThyraMVDeviceView<const ST> Vp_data;
if (!Vp.is_null()) Vp_data = Albany::getDeviceData(Vp);

// Parameter/solution/nodes dof numbering info
const auto dof_mgr = workset.disc->getDOFManager();
const auto p_elem_dof_lids = p->get_dof_mgr()->elem_dof_lids().host();
const auto p_elem_dof_lids = p->get_dof_mgr()->elem_dof_lids().dev();

const auto ws = workset.wsIndex;
const auto elem_lids = workset.disc->getElementLIDs_host(ws);
const auto ws_elem_lids = workset.disc->getWsElementLIDs().dev();
const auto elem_lids = Kokkos::subview(ws_elem_lids,ws,Kokkos::ALL());

// Are we differentiating w.r.t. this parameter?
const bool is_active = (workset.dist_param_deriv_name == this->param_name);
Expand All @@ -211,58 +213,76 @@ evaluateFields(typename Traits::EvalData workset)
const int neq = dof_mgr->getNumFields();
const int num_deriv = this->numNodes;
bool trans = workset.transpose_dist_param_deriv;
const auto elem_dof_lids = dof_mgr->elem_dof_lids().host();
for (std::size_t cell=0; cell<workset.numCells; ++cell) {
const auto elem_dof_lids = dof_mgr->elem_dof_lids().dev();

Kokkos::parallel_for(this->getName(),RangePolicy(0,workset.numCells),
KOKKOS_CLASS_LAMBDA(const int& cell) {
const auto elem_LID = elem_lids(cell);
const auto p_dof_lids = Kokkos::subview(p_elem_dof_lids,elem_LID,ALL);
for (int node=0; node<num_deriv; ++node) {
const LO lid = p_dof_lids(node);

// Initialize Fad type for parameter value
const auto p_val = lid>=0 ? p_data[lid] : 0;
const auto p_val = lid>=0 ? p_data(lid) : 0;
ParamScalarT v(num_deriv, node, p_val);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this work on device, and is it performant? With certain fads, doesn't it allocate memory on device at every call?

this->val(cell,node) = v;
}
});

if (Vp != Teuchos::null) {
const int num_cols = Vp->domain()->dim();
const int num_dofs = elem_dof_lids.extent(1);
if (trans) {
workset.local_Vp = Kokkos::View<double***, PHX::Device>("local_Vp",workset.numCells,num_dofs,num_cols);
} else {
workset.local_Vp = Kokkos::View<double***, PHX::Device>("local_Vp",workset.numCells,num_deriv,num_cols);
}

if (Vp != Teuchos::null) {
const int num_cols = Vp->domain()->dim();
const auto& local_Vp = workset.local_Vp;

auto& local_Vp = workset.local_Vp[cell];
if (trans) {
for (int eq=0; eq<neq; ++eq) {
const auto& offsets = dof_mgr->getGIDFieldOffsetsKokkos(eq);
const int num_offsets = offsets.size();

if (trans) {
auto dof_lids = Kokkos::subview(elem_dof_lids,elem_LID,ALL);
// const auto& offsets = this->m_sol_fields_offsets;
local_Vp.resize(dof_lids.size());
for (int eq=0; eq<neq; ++eq) {
const auto& offsets = dof_mgr->getGIDFieldOffsets(eq);
for (const auto o : offsets) {
local_Vp[o].resize(num_cols);
Kokkos::parallel_for(this->getName()+"_transvp",RangePolicy(0,workset.numCells),
KOKKOS_CLASS_LAMBDA(const int& cell) {
const auto elem_LID = elem_lids(cell);
const auto dof_lids = Kokkos::subview(elem_dof_lids,elem_LID,ALL);
for (int i=0; i < num_offsets; ++i) {
const auto o = offsets(i);
const LO lid = dof_lids(o);
for (int col=0; col<num_cols; ++col)
local_Vp[o][col] = Vp_data[col][lid];
local_Vp(cell,o,col) = Vp_data(lid,col);
}
}
} else {
local_Vp.resize(num_deriv);
});
}
}
else {
Kokkos::parallel_for(this->getName()+"_notransvp",RangePolicy(0,workset.numCells),
KOKKOS_CLASS_LAMBDA(const int& cell) {
const auto elem_LID = elem_lids(cell);
const auto p_dof_lids = Kokkos::subview(p_elem_dof_lids,elem_LID,ALL);
for (int node=0; node<num_deriv; ++node) {
const LO lid = p_dof_lids(node);
local_Vp[node].resize(num_cols);
for (int col=0; col<num_cols; ++col)
local_Vp[node][col] = lid>=0 ? Vp_data[col][lid] : 0;
local_Vp(cell,node,col) = lid>=0 ? Vp_data(lid,col) : 0;
}
}
});
}
}
} else {
// If not active, just set the parameter value in the phalanx field
for (std::size_t cell=0; cell < workset.numCells; ++cell ) {
const int num_nodes = this->numNodes;
Kokkos::parallel_for(this->getName(),RangePolicy(0,workset.numCells),
KOKKOS_CLASS_LAMBDA(const int& cell) {
const auto elem_LID = elem_lids(cell);
const auto p_dof_lids = Kokkos::subview(p_elem_dof_lids,elem_LID,ALL);
for (int node=0; node<this->numNodes; ++node) {
for (int node=0; node<num_nodes; ++node) {
const LO lid = p_dof_lids(node);
this->val(cell,node) = lid>=0 ? p_data[lid] : 0;
this->val(cell,node) = lid>=0 ? p_data(lid) : 0;
}
}
});
}
}

Expand Down Expand Up @@ -330,6 +350,18 @@ evaluateFields(typename Traits::EvalData workset)
const int neq = sol_dof_mgr->getNumFields();
const int num_deriv = this->numNodes;
const bool trans = workset.transpose_dist_param_deriv;

if (Vp != Teuchos::null) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indentation seems off here.

const int num_cols = workset.Vp->domain()->dim();
if (trans) {
const int num_dofs = elem_dof_lids.extent(1);
workset.local_Vp = Kokkos::View<double***, PHX::Device>("local_Vp",workset.numCells,num_dofs,num_cols);
} else {
workset.local_Vp = Kokkos::View<double***, PHX::Device>("local_Vp",workset.numCells,num_deriv,num_cols);
}
}
const auto& local_Vp = workset.local_Vp;

for (std::size_t cell=0; cell<workset.numCells; ++cell) {
const auto elem_LID = elem_lids(cell);
const auto basal_elem_LID = layers_data->getColumnId(elem_LID);
Expand All @@ -349,27 +381,22 @@ evaluateFields(typename Traits::EvalData workset)
if (Vp != Teuchos::null) {
const int num_cols = workset.Vp->domain()->dim();

auto& local_Vp = workset.local_Vp[cell];
if (trans) {
auto dof_lids = Kokkos::subview(elem_dof_lids,elem_LID,ALL);
local_Vp.resize(dof_lids.size());
for (int eq=0; eq<neq; ++eq) {
const auto& sol_offsets = sol_dof_mgr->getGIDFieldOffsets(eq);
for (const auto o : sol_offsets) {
local_Vp[o].resize(num_cols);
const LO lid = dof_lids(o);
for (int col=0; col<num_cols; ++col)
local_Vp[o][col] = Vp_data[col][lid];
local_Vp(cell,o,col) = Vp_data[col][lid];
}
}
} else {
local_Vp.resize(num_deriv);
for (int node2d=0; node2d<num_nodes_2d; ++node2d) {
const LO p_lid = p_elem_dof_lids(param_elem_LID,offsets_p[node2d]);
for (auto node : {offsets_bot[node2d], offsets_top[node2d]}) {
local_Vp[node].resize(num_cols);
for (int col=0; col<num_cols; ++col) {
local_Vp[node][col] = p_lid>=0 ? Vp_data[col][p_lid] : 0;
local_Vp(cell,node,col) = p_lid>=0 ? Vp_data[col][p_lid] : 0;
}
}
}
Expand Down
Loading