forked from trilinos/Trilinos
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge 'trilinos/Trilinos:trilinos/develop' (13b95f6) into 'EM-Plasma/…
…Trilinos:develop' (01a4ee0). * trilinos/develop: Intrepid2: Support Full Exact Sequence in Structured Integration (trilinos#10419) Tpetra: Use numVectors==1 specialization for local parts of norms
- Loading branch information
Showing
37 changed files
with
4,526 additions
and
1,491 deletions.
There are no files selected for viewing
151 changes: 151 additions & 0 deletions
151
packages/intrepid2/assembly-examples/GRADGRADStandardAssembly.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
// | ||
// GRADGRADStandardAssembly.hpp | ||
// Trilinos | ||
// | ||
// Created by Roberts, Nathan V on 7/2/21. | ||
// | ||
|
||
#ifndef Intrepid2_GRADGRADStandardAssembly_hpp | ||
#define Intrepid2_GRADGRADStandardAssembly_hpp | ||
|
||
#include "JacobianFlopEstimate.hpp" | ||
|
||
/** \file GRADGRADStandardAssembly.hpp | ||
\brief Locally assembles a Poisson matrix -- an array of shape (C,F,F), with formulation (grad e_i, grad e_j), using standard Intrepid2 methods; these do not algorithmically exploit geometric structure. | ||
*/ | ||
|
||
//! Version that uses the classic, generic Intrepid2 paths. | ||
template<class Scalar, class BasisFamily, class PointScalar, int spaceDim, typename DeviceType> | ||
Intrepid2::ScalarView<Scalar,DeviceType> performStandardQuadratureGRADGRAD(Intrepid2::CellGeometry<PointScalar, spaceDim, DeviceType> &geometry, | ||
const int &polyOrder, int worksetSize, | ||
double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount) | ||
{ | ||
using ExecutionSpace = typename DeviceType::execution_space; | ||
int numVertices = 1; | ||
for (int d=0; d<spaceDim; d++) | ||
{ | ||
numVertices *= 2; | ||
} | ||
|
||
auto jacobianAndCellMeasureTimer = Teuchos::TimeMonitor::getNewTimer("Jacobians"); | ||
auto fstIntegrateCall = Teuchos::TimeMonitor::getNewTimer("transform + integrate()"); | ||
auto initialSetupTimer = Teuchos::TimeMonitor::getNewTimer("Initial Setup"); | ||
initialSetupTimer->start(); | ||
|
||
using CellTools = Intrepid2::CellTools<DeviceType>; | ||
using FunctionSpaceTools = Intrepid2::FunctionSpaceTools<DeviceType>; | ||
|
||
using namespace Intrepid2; | ||
|
||
using namespace std; | ||
// dimensions of the returned view are (C,F,F) | ||
auto fs = FUNCTION_SPACE_HGRAD; | ||
|
||
shards::CellTopology cellTopo = geometry.cellTopology(); | ||
|
||
auto basis = getBasis< BasisFamily >(cellTopo, fs, polyOrder); | ||
|
||
int numFields = basis->getCardinality(); | ||
int numCells = geometry.numCells(); | ||
|
||
if (worksetSize > numCells) worksetSize = numCells; | ||
|
||
// local stiffness matrices: | ||
ScalarView<Scalar,DeviceType> cellStiffness("cell stiffness matrices",numCells,numFields,numFields); | ||
|
||
auto cubature = DefaultCubatureFactory::create<DeviceType>(cellTopo,polyOrder*2); | ||
int numPoints = cubature->getNumPoints(); | ||
ScalarView<PointScalar,DeviceType> cubaturePoints("cubature points",numPoints,spaceDim); | ||
ScalarView<double,DeviceType> cubatureWeights("cubature weights", numPoints); | ||
|
||
cubature->getCubature(cubaturePoints, cubatureWeights); | ||
|
||
const double flopsPerJacobianPerCell = flopsPerJacobian(spaceDim, numPoints, numVertices); | ||
const double flopsPerJacobianDetPerCell = flopsPerJacobianDet(spaceDim, numPoints); | ||
const double flopsPerJacobianInvPerCell = flopsPerJacobianInverse(spaceDim, numPoints); | ||
|
||
// Allocate some intermediate containers | ||
ScalarView<Scalar,DeviceType> basisValues ("basis values", numFields, numPoints ); | ||
ScalarView<Scalar,DeviceType> basisGradValues("basis grad values", numFields, numPoints, spaceDim); | ||
|
||
ScalarView<Scalar,DeviceType> transformedGradValues("transformed grad values", worksetSize, numFields, numPoints, spaceDim); | ||
ScalarView<Scalar,DeviceType> transformedWeightedGradValues("transformed weighted grad values", worksetSize, numFields, numPoints, spaceDim); | ||
|
||
basis->getValues(basisValues, cubaturePoints, OPERATOR_VALUE ); | ||
basis->getValues(basisGradValues, cubaturePoints, OPERATOR_GRAD ); | ||
|
||
const int numNodesPerCell = geometry.numNodesPerCell(); | ||
ScalarView<PointScalar,DeviceType> expandedCellNodes("expanded cell nodes",numCells,numNodesPerCell,spaceDim); | ||
Kokkos::parallel_for(Kokkos::RangePolicy<ExecutionSpace>(0,numCells), | ||
KOKKOS_LAMBDA (const int &cellOrdinal) { | ||
for (int nodeOrdinal=0; nodeOrdinal<numNodesPerCell; nodeOrdinal++) | ||
{ | ||
for (int d=0; d<spaceDim; d++) | ||
{ | ||
expandedCellNodes(cellOrdinal,nodeOrdinal,d) = geometry(cellOrdinal,nodeOrdinal,d); | ||
} | ||
} | ||
}); | ||
|
||
ScalarView<Scalar,DeviceType> cellMeasures("cell measures", worksetSize, numPoints); | ||
ScalarView<Scalar,DeviceType> jacobianDeterminant("jacobian determinant", worksetSize, numPoints); | ||
ScalarView<Scalar,DeviceType> jacobian("jacobian", worksetSize, numPoints, spaceDim, spaceDim); | ||
ScalarView<Scalar,DeviceType> jacobianInverse("jacobian inverse", worksetSize, numPoints, spaceDim, spaceDim); | ||
|
||
initialSetupTimer->stop(); | ||
|
||
transformIntegrateFlopCount = 0; | ||
jacobianCellMeasureFlopCount = numCells * flopsPerJacobianPerCell; // jacobian itself | ||
jacobianCellMeasureFlopCount += numCells * flopsPerJacobianInvPerCell; // inverse | ||
jacobianCellMeasureFlopCount += numCells * flopsPerJacobianDetPerCell; // determinant | ||
jacobianCellMeasureFlopCount += numCells * numPoints; // cell measure: (C,P) gets weighted with cubature weights of shape (P) | ||
|
||
int cellOffset = 0; | ||
while (cellOffset < numCells) | ||
{ | ||
int startCell = cellOffset; | ||
int numCellsInWorkset = (cellOffset + worksetSize - 1 < numCells) ? worksetSize : numCells - startCell; | ||
|
||
std::pair<int,int> cellRange = {startCell, startCell+numCellsInWorkset}; | ||
auto cellWorkset = Kokkos::subview(expandedCellNodes, cellRange, Kokkos::ALL(), Kokkos::ALL()); | ||
|
||
if (numCellsInWorkset != worksetSize) | ||
{ | ||
Kokkos::resize(jacobian, numCellsInWorkset, numPoints, spaceDim, spaceDim); | ||
Kokkos::resize(jacobianInverse, numCellsInWorkset, numPoints, spaceDim, spaceDim); | ||
Kokkos::resize(jacobianDeterminant, numCellsInWorkset, numPoints); | ||
Kokkos::resize(cellMeasures, numCellsInWorkset, numPoints); | ||
Kokkos::resize(transformedGradValues, numCellsInWorkset, numFields, numPoints, spaceDim); | ||
Kokkos::resize(transformedWeightedGradValues, numCellsInWorkset, numFields, numPoints, spaceDim); | ||
} | ||
jacobianAndCellMeasureTimer->start(); | ||
CellTools::setJacobian(jacobian, cubaturePoints, cellWorkset, cellTopo); // accounted for outside loop, as numCells * flopsPerJacobianPerCell. | ||
CellTools::setJacobianInv(jacobianInverse, jacobian); | ||
CellTools::setJacobianDet(jacobianDeterminant, jacobian); | ||
|
||
FunctionSpaceTools::computeCellMeasure(cellMeasures, jacobianDeterminant, cubatureWeights); | ||
ExecutionSpace().fence(); | ||
jacobianAndCellMeasureTimer->stop(); | ||
|
||
// because structured integration performs transformations within integrate(), to get a fairer comparison here we include the transformation calls. | ||
fstIntegrateCall->start(); | ||
FunctionSpaceTools::HGRADtransformGRAD(transformedGradValues, jacobianInverse, basisGradValues); | ||
transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields) * double(numPoints) * double(spaceDim) * (spaceDim - 1) * 2.0; // 2: one multiply, one add per (P,D) entry in the contraction. | ||
FunctionSpaceTools::multiplyMeasure(transformedWeightedGradValues, cellMeasures, transformedGradValues); | ||
transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields) * double(numPoints) * double(spaceDim); // multiply each entry of transformedGradValues: one flop for each. | ||
|
||
auto cellStiffnessSubview = Kokkos::subview(cellStiffness, cellRange, Kokkos::ALL(), Kokkos::ALL()); | ||
|
||
FunctionSpaceTools::integrate(cellStiffnessSubview, transformedGradValues, transformedWeightedGradValues); | ||
ExecutionSpace().fence(); | ||
fstIntegrateCall->stop(); | ||
|
||
transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields) * double(numFields) * double(numPoints * spaceDim * 2 - 1); // 2: one multiply, one add per (P,D) entry in the contraction. | ||
|
||
cellOffset += worksetSize; | ||
} | ||
// std::cout << "standard integration, approximateFlopCount: " << approximateFlopCount << std::endl; | ||
return cellStiffness; | ||
} | ||
|
||
#endif /* GRADGRADStandardAssembly_h */ |
153 changes: 153 additions & 0 deletions
153
packages/intrepid2/assembly-examples/GRADGRADStructuredAssembly.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
// | ||
// GRADGRADStructuredAssembly.hpp | ||
// Trilinos | ||
// | ||
// Created by Roberts, Nathan V on 7/2/21. | ||
// | ||
|
||
#ifndef GRADGRADStructuredAssembly_h | ||
#define GRADGRADStructuredAssembly_h | ||
|
||
#include "JacobianFlopEstimate.hpp" | ||
|
||
/** \file GRADGRADStructuredAssembly.hpp | ||
\brief Locally assembles a Poisson matrix -- an array of shape (C,F,F), with formulation (grad e_i, grad e_j), using "structured" Intrepid2 methods; these algorithmically exploit geometric structure as expressed in the provided CellGeometry. | ||
*/ | ||
|
||
//! Version that takes advantage of new structured integration support, including sum factorization. | ||
template<class Scalar, class BasisFamily, class PointScalar, int spaceDim, typename DeviceType> | ||
Intrepid2::ScalarView<Scalar,DeviceType> performStructuredQuadratureGRADGRAD(Intrepid2::CellGeometry<PointScalar, spaceDim, DeviceType> &geometry, const int &polyOrder, const int &worksetSize, | ||
double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount) | ||
{ | ||
using namespace Intrepid2; | ||
|
||
using ExecutionSpace = typename DeviceType::execution_space; | ||
|
||
int numVertices = 1; | ||
for (int d=0; d<spaceDim; d++) | ||
{ | ||
numVertices *= 2; | ||
} | ||
|
||
auto initialSetupTimer = Teuchos::TimeMonitor::getNewTimer("Initial Setup"); | ||
initialSetupTimer->start(); | ||
using namespace std; | ||
using FunctionSpaceTools = FunctionSpaceTools<DeviceType>; | ||
using IntegrationTools = IntegrationTools<DeviceType>; | ||
// dimensions of the returned view are (C,F,F) | ||
auto fs = FUNCTION_SPACE_HGRAD; | ||
|
||
shards::CellTopology cellTopo = geometry.cellTopology(); | ||
|
||
auto basis = getBasis< BasisFamily >(cellTopo, fs, polyOrder); | ||
|
||
int numFields = basis->getCardinality(); | ||
int numCells = geometry.numCells(); | ||
|
||
// local stiffness matrix: | ||
ScalarView<Scalar,DeviceType> cellStiffness("cell stiffness matrices",numCells,numFields,numFields); | ||
|
||
auto cubature = DefaultCubatureFactory::create<DeviceType>(cellTopo,polyOrder*2); | ||
auto tensorCubatureWeights = cubature->allocateCubatureWeights(); | ||
TensorPoints<PointScalar,DeviceType> tensorCubaturePoints = cubature->allocateCubaturePoints(); | ||
|
||
cubature->getCubature(tensorCubaturePoints, tensorCubatureWeights); | ||
|
||
EOperator op = OPERATOR_GRAD; | ||
BasisValues<Scalar,DeviceType> gradientValues = basis->allocateBasisValues(tensorCubaturePoints, op); | ||
basis->getValues(gradientValues, tensorCubaturePoints, op); | ||
|
||
// goal here is to do a weighted Poisson; i.e. (f grad u, grad v) on each cell | ||
|
||
int cellOffset = 0; | ||
|
||
auto jacobianAndCellMeasureTimer = Teuchos::TimeMonitor::getNewTimer("Jacobians"); | ||
auto fstIntegrateCall = Teuchos::TimeMonitor::getNewTimer("transform + integrate()"); | ||
|
||
Data<PointScalar,DeviceType> jacobian = geometry.allocateJacobianData(tensorCubaturePoints, 0, worksetSize); | ||
Data<PointScalar,DeviceType> jacobianDet = CellTools<DeviceType>::allocateJacobianDet(jacobian); | ||
Data<PointScalar,DeviceType> jacobianInv = CellTools<DeviceType>::allocateJacobianInv(jacobian); | ||
TensorData<PointScalar,DeviceType> cellMeasures = geometry.allocateCellMeasure(jacobianDet, tensorCubatureWeights); | ||
|
||
// lazily-evaluated transformed gradient values (temporary to allow integralData allocation) | ||
auto transformedGradientValuesTemp = FunctionSpaceTools::getHGRADtransformGRAD(jacobianInv, gradientValues); | ||
auto integralData = IntegrationTools::allocateIntegralData(transformedGradientValuesTemp, cellMeasures, transformedGradientValuesTemp); | ||
|
||
const int numPoints = jacobian.getDataExtent(1); // data extent will be 1 for affine, numPoints for other cases | ||
|
||
// TODO: make the below determination accurate for diagonal/block-diagonal cases… (right now, will overcount) | ||
const double flopsPerJacobianPerCell = flopsPerJacobian(spaceDim, numPoints, numVertices); | ||
const double flopsPerJacobianDetPerCell = flopsPerJacobianDet(spaceDim, numPoints); | ||
const double flopsPerJacobianInvPerCell = flopsPerJacobianInverse(spaceDim, numPoints); | ||
|
||
transformIntegrateFlopCount = 0; | ||
jacobianCellMeasureFlopCount = numCells * flopsPerJacobianPerCell; // jacobian itself | ||
jacobianCellMeasureFlopCount += numCells * flopsPerJacobianInvPerCell; // inverse | ||
jacobianCellMeasureFlopCount += numCells * flopsPerJacobianDetPerCell; // determinant | ||
jacobianCellMeasureFlopCount += numCells * numPoints; // cell measure: (C,P) gets weighted with cubature weights of shape (P) | ||
|
||
auto refData = geometry.getJacobianRefData(tensorCubaturePoints); | ||
|
||
initialSetupTimer->stop(); | ||
while (cellOffset < numCells) | ||
{ | ||
int startCell = cellOffset; | ||
int numCellsInWorkset = (cellOffset + worksetSize - 1 < numCells) ? worksetSize : numCells - startCell; | ||
int endCell = numCellsInWorkset + startCell; | ||
|
||
jacobianAndCellMeasureTimer->start(); | ||
if (numCellsInWorkset != worksetSize) | ||
{ | ||
const int CELL_DIM = 0; // first dimension corresponds to cell | ||
jacobian.setExtent( CELL_DIM, numCellsInWorkset); | ||
jacobianDet.setExtent( CELL_DIM, numCellsInWorkset); | ||
jacobianInv.setExtent( CELL_DIM, numCellsInWorkset); | ||
integralData.setExtent(CELL_DIM, numCellsInWorkset); | ||
|
||
// cellMeasures is a TensorData object with separateFirstComponent_ = true; the below sets the cell dimension… | ||
cellMeasures.setFirstComponentExtentInDimension0(numCellsInWorkset); | ||
} | ||
|
||
geometry.setJacobian(jacobian, tensorCubaturePoints, refData, startCell, endCell); | ||
CellTools<DeviceType>::setJacobianDet(jacobianDet, jacobian); | ||
CellTools<DeviceType>::setJacobianInv(jacobianInv, jacobian); | ||
|
||
// lazily-evaluated transformed gradient values: | ||
auto transformedGradientValues = FunctionSpaceTools::getHGRADtransformGRAD(jacobianInv, gradientValues); | ||
|
||
geometry.computeCellMeasure(cellMeasures, jacobianDet, tensorCubatureWeights); | ||
ExecutionSpace().fence(); | ||
jacobianAndCellMeasureTimer->stop(); | ||
|
||
bool sumInto = false; | ||
double approximateFlopCountIntegrateWorkset = 0; | ||
fstIntegrateCall->start(); | ||
IntegrationTools::integrate(integralData, transformedGradientValues, cellMeasures, transformedGradientValues, sumInto, &approximateFlopCountIntegrateWorkset); | ||
ExecutionSpace().fence(); | ||
fstIntegrateCall->stop(); | ||
|
||
// copy into cellStiffness container. (Alternately, do something like allocateIntegralData, but outside this loop, and take a subview to construct the workset integralData.) | ||
if (integralData.getUnderlyingViewRank() == 3) | ||
{ | ||
std::pair<int,int> cellRange = {startCell, endCell}; | ||
auto cellStiffnessSubview = Kokkos::subview(cellStiffness, cellRange, Kokkos::ALL(), Kokkos::ALL()); | ||
Kokkos::deep_copy(cellStiffnessSubview, integralData.getUnderlyingView3()); | ||
} | ||
else // underlying view rank is 2; copy to each cell in destination stiffness matrix | ||
{ | ||
auto integralView2 = integralData.getUnderlyingView2(); | ||
auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<3>>({0,0,0},{numCellsInWorkset,numFields,numFields}); | ||
Kokkos::parallel_for("copy uniform data to expanded container", policy, | ||
KOKKOS_LAMBDA (const int &cellOrdinal, const int &leftFieldOrdinal, const int &rightFieldOrdinal) { | ||
cellStiffness(startCell + cellOrdinal, leftFieldOrdinal, rightFieldOrdinal) = integralView2(leftFieldOrdinal,rightFieldOrdinal); | ||
}); | ||
} | ||
|
||
transformIntegrateFlopCount += approximateFlopCountIntegrateWorkset; | ||
|
||
cellOffset += worksetSize; | ||
} | ||
return cellStiffness; | ||
} | ||
|
||
#endif /* GRADGRADStructuredAssembly_h */ |
Oops, something went wrong.