Skip to content

Commit

Permalink
Small optimizations for Update System Variables calls for masses and …
Browse files Browse the repository at this point in the history
…springs.

The result is a 10% performance improvement on CPU for the Floating Platform case,
largely by reducing overhead associated with copying the entire masses/springs objects
in the lambda capture.
  • Loading branch information
ddement committed Jan 25, 2025
1 parent 0bb6f70 commit acfbeb7
Show file tree
Hide file tree
Showing 13 changed files with 311 additions and 178 deletions.
37 changes: 26 additions & 11 deletions src/step/assemble_inertia_matrix_masses.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,38 @@

namespace openturbine {

namespace masses {

struct AssembleMassesInertia {
double beta_prime;
double gamma_prime;
Kokkos::View<double* [6][6]>::const_type qp_Muu;
Kokkos::View<double* [6][6]>::const_type qp_Guu;

Kokkos::View<double* [6][6]> inertia_matrix_terms;

KOKKOS_FUNCTION
void operator()(size_t i_elem) const {
for (auto i_dof = 0U; i_dof < 6U; ++i_dof) {
for (auto j_dof = 0U; j_dof < 6U; ++j_dof) {
inertia_matrix_terms(i_elem, i_dof, j_dof) =
beta_prime * qp_Muu(i_elem, i_dof, j_dof) +
gamma_prime * qp_Guu(i_elem, i_dof, j_dof);
}
}
}
};

} // namespace masses

inline void AssembleInertiaMatrixMasses(
const Masses& masses, double beta_prime, double gamma_prime
) {
auto region = Kokkos::Profiling::ScopedRegion("Assemble Masses Inertia");
Kokkos::parallel_for(
"AssembleMassesInertia", masses.num_elems,
KOKKOS_LAMBDA(const int i_elem) {
// Add inertial terms to global inertia matrix
// Combines mass matrix (Muu) and gyroscopic terms (Guu)
for (auto i_dof = 0U; i_dof < 6U; ++i_dof) {
for (auto j_dof = 0U; j_dof < 6U; ++j_dof) {
masses.inertia_matrix_terms(i_elem, i_dof, j_dof) =
beta_prime * masses.qp_Muu(i_elem, i_dof, j_dof) +
gamma_prime * masses.qp_Guu(i_elem, i_dof, j_dof);
}
}
}
masses::AssembleMassesInertia{
beta_prime, gamma_prime, masses.qp_Muu, masses.qp_Guu, masses.inertia_matrix_terms}
);
}

Expand Down
26 changes: 19 additions & 7 deletions src/step/assemble_residual_vector_masses.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,29 @@

namespace openturbine {

namespace masses {

struct AssembleMassesResidual {
Kokkos::View<double* [6]>::const_type qp_Fi;
Kokkos::View<double* [6]>::const_type qp_Fg;

Kokkos::View<double* [6]> residual_vector_terms;

KOKKOS_FUNCTION
void operator()(size_t i_elem) const {
for (auto i_dof = 0U; i_dof < 6U; ++i_dof) {
residual_vector_terms(i_elem, i_dof) = qp_Fi(i_elem, i_dof) - qp_Fg(i_elem, i_dof);
}
}
};

} // namespace masses

inline void AssembleResidualVectorMasses(const Masses& masses) {
auto region = Kokkos::Profiling::ScopedRegion("Assemble Masses Residual");
Kokkos::parallel_for(
"AssembleMassesResidual", masses.num_elems,
KOKKOS_LAMBDA(const int i_elem) {
// Add inertial (Fi) to and subtract gravity (Fg) forces from residual
for (auto i_dof = 0U; i_dof < 6U; ++i_dof) {
masses.residual_vector_terms(i_elem, i_dof) =
masses.qp_Fi(i_elem, i_dof) - masses.qp_Fg(i_elem, i_dof);
}
}
masses::AssembleMassesResidual{masses.qp_Fi, masses.qp_Fg, masses.residual_vector_terms}
);
}

Expand Down
25 changes: 18 additions & 7 deletions src/step/assemble_residual_vector_springs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,28 @@

namespace openturbine {

namespace springs {

struct AssembleSpringsResidual {
Kokkos::View<double* [3]>::const_type f;
Kokkos::View<double* [2][3]> residual_vector_terms;

KOKKOS_FUNCTION
void operator()(size_t i_elem) const {
// Apply forces with opposite signs to each node
for (auto i_dof = 0U; i_dof < 3U; ++i_dof) {
residual_vector_terms(i_elem, 0, i_dof) = f(i_elem, i_dof);
residual_vector_terms(i_elem, 1, i_dof) = -f(i_elem, i_dof);
}
}
};

} // namespace springs
inline void AssembleResidualVectorSprings(const Springs& springs) {
auto region = Kokkos::Profiling::ScopedRegion("Assemble Springs Residual");
Kokkos::parallel_for(
"AssembleSpringsResidual", springs.num_elems,
KOKKOS_LAMBDA(const int i_elem) {
// Apply forces with opposite signs to each node
for (auto i_dof = 0U; i_dof < 3U; ++i_dof) {
springs.residual_vector_terms(i_elem, 0, i_dof) = springs.f(i_elem, i_dof);
springs.residual_vector_terms(i_elem, 1, i_dof) = -springs.f(i_elem, i_dof);
}
}
springs::AssembleSpringsResidual{springs.f, springs.residual_vector_terms}
);
}

Expand Down
28 changes: 19 additions & 9 deletions src/step/assemble_stiffness_matrix_masses.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,29 @@

namespace openturbine {

namespace masses {

struct AssembleMassesStiffness {
Kokkos::View<double* [6][6]>::const_type qp_Kuu;

Kokkos::View<double* [6][6]> stiffness_matrix_terms;

KOKKOS_FUNCTION
void operator()(size_t i_elem) const {
for (auto i_dof = 0U; i_dof < 6U; ++i_dof) {
for (auto j_dof = 0U; j_dof < 6U; ++j_dof) {
stiffness_matrix_terms(i_elem, i_dof, j_dof) = qp_Kuu(i_elem, i_dof, j_dof);
}
}
}
};
} // namespace masses

inline void AssembleStiffnessMatrixMasses(const Masses& masses) {
auto region = Kokkos::Profiling::ScopedRegion("Assemble Masses Stiffness");
Kokkos::parallel_for(
"AssembleMassesStiffness", masses.num_elems,
KOKKOS_LAMBDA(const int i_elem) {
// Add stiffness terms to global stiffness matrix
for (auto i_dof = 0U; i_dof < 6U; ++i_dof) {
for (auto j_dof = 0U; j_dof < 6U; ++j_dof) {
masses.stiffness_matrix_terms(i_elem, i_dof, j_dof) =
masses.qp_Kuu(i_elem, i_dof, j_dof);
}
}
}
masses::AssembleMassesStiffness{masses.qp_Kuu, masses.stiffness_matrix_terms}
);
}

Expand Down
58 changes: 33 additions & 25 deletions src/step/assemble_stiffness_matrix_springs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,36 +7,44 @@

namespace openturbine {

inline void AssembleStiffnessMatrixSprings(const Springs& springs) {
auto region = Kokkos::Profiling::ScopedRegion("Assemble Springs Stiffness");
Kokkos::parallel_for(
"AssembleSpringsStiffness", springs.num_elems,
KOKKOS_LAMBDA(const int i_elem) {
for (auto i_dof = 0U; i_dof < 3U; ++i_dof) {
for (auto j_dof = 0U; j_dof < 3U; ++j_dof) {
springs.stiffness_matrix_terms(i_elem, 0, 0, i_dof, j_dof) =
springs.a(i_elem, i_dof, j_dof);
}
namespace springs {

struct AssembleSpringsStiffness {
Kokkos::View<double* [3][3]>::const_type a;
Kokkos::View<double* [2][2][3][3]> stiffness_matrix_terms;

KOKKOS_FUNCTION
void operator()(size_t i_elem) const {
for (auto i_dof = 0U; i_dof < 3U; ++i_dof) {
for (auto j_dof = 0U; j_dof < 3U; ++j_dof) {
stiffness_matrix_terms(i_elem, 0, 0, i_dof, j_dof) = a(i_elem, i_dof, j_dof);
}
for (auto i_dof = 0U; i_dof < 3U; ++i_dof) {
for (auto j_dof = 0U; j_dof < 3U; ++j_dof) {
springs.stiffness_matrix_terms(i_elem, 0, 1, i_dof, j_dof) =
-springs.a(i_elem, i_dof, j_dof);
}
}
for (auto i_dof = 0U; i_dof < 3U; ++i_dof) {
for (auto j_dof = 0U; j_dof < 3U; ++j_dof) {
stiffness_matrix_terms(i_elem, 0, 1, i_dof, j_dof) = -a(i_elem, i_dof, j_dof);
}
for (auto i_dof = 0U; i_dof < 3U; ++i_dof) {
for (auto j_dof = 0U; j_dof < 3U; ++j_dof) {
springs.stiffness_matrix_terms(i_elem, 1, 0, i_dof, j_dof) =
-springs.a(i_elem, i_dof, j_dof);
}
}
for (auto i_dof = 0U; i_dof < 3U; ++i_dof) {
for (auto j_dof = 0U; j_dof < 3U; ++j_dof) {
stiffness_matrix_terms(i_elem, 1, 0, i_dof, j_dof) = -a(i_elem, i_dof, j_dof);
}
for (auto i_dof = 0U; i_dof < 3U; ++i_dof) {
for (auto j_dof = 0U; j_dof < 3U; ++j_dof) {
springs.stiffness_matrix_terms(i_elem, 1, 1, i_dof, j_dof) =
springs.a(i_elem, i_dof, j_dof);
}
}
for (auto i_dof = 0U; i_dof < 3U; ++i_dof) {
for (auto j_dof = 0U; j_dof < 3U; ++j_dof) {
stiffness_matrix_terms(i_elem, 1, 1, i_dof, j_dof) = a(i_elem, i_dof, j_dof);
}
}
}
};

} // namespace springs

inline void AssembleStiffnessMatrixSprings(const Springs& springs) {
auto region = Kokkos::Profiling::ScopedRegion("Assemble Springs Stiffness");
Kokkos::parallel_for(
"AssembleSpringsStiffness", springs.num_elems,
springs::AssembleSpringsStiffness{springs.a, springs.stiffness_matrix_terms}
);
}

Expand Down
112 changes: 21 additions & 91 deletions src/step/update_system_variables_masses.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,119 +10,49 @@
#include "state/state.hpp"
#include "step_parameters.hpp"
#include "system/masses/calculate_QP_position.hpp"
#include "system/masses/calculate_RR0.hpp"
#include "system/masses/calculate_gravity_force.hpp"
#include "system/masses/calculate_gyroscopic_matrix.hpp"
#include "system/masses/calculate_inertia_stiffness_matrix.hpp"
#include "system/masses/calculate_inertial_force.hpp"
#include "system/masses/calculate_mass_matrix_components.hpp"
#include "system/masses/rotate_section_matrix.hpp"
#include "system/masses/calculate_quadrature_point_values.hpp"
#include "system/masses/copy_to_quadrature_points.hpp"
#include "system/masses/update_node_state.hpp"

namespace openturbine {

inline void UpdateSystemVariablesMasses(
const StepParameters& parameters, const Masses& masses, State& state
) {
auto region = Kokkos::Profiling::ScopedRegion("Update System Variables Masses");

// Update the node states for masses to get the current position/rotation
Kokkos::parallel_for(
masses.num_elems,
"masses::UpdateNodeState", masses.num_elems,
masses::UpdateNodeState{
masses.state_indices, masses.node_u, masses.node_u_dot, masses.node_u_ddot, state.q,
state.v, state.vd
}
state.v, state.vd}
);

// Calculate some ancillary values (angular velocity - omega, angular acceleration - omega_dot,
// linear acceleration - u_ddot etc.) that will be required by system kernels
Kokkos::deep_copy(
masses.qp_x0, Kokkos::subview(masses.node_x0, Kokkos::ALL, Kokkos::make_pair(0, 3))
);
Kokkos::deep_copy(
masses.qp_u, Kokkos::subview(masses.node_u, Kokkos::ALL, Kokkos::make_pair(0, 3))
);
Kokkos::deep_copy(
masses.qp_r0, Kokkos::subview(masses.node_x0, Kokkos::ALL, Kokkos::make_pair(3, 7))
);
Kokkos::deep_copy(
masses.qp_u_ddot, Kokkos::subview(masses.node_u_ddot, Kokkos::ALL, Kokkos::make_pair(0, 3))
);
Kokkos::deep_copy(
masses.qp_r, Kokkos::subview(masses.node_u, Kokkos::ALL, Kokkos::make_pair(3, 7))
);
Kokkos::deep_copy(
masses.qp_omega, Kokkos::subview(masses.node_u_dot, Kokkos::ALL, Kokkos::make_pair(3, 6))
);
Kokkos::deep_copy(
masses.qp_omega_dot,
Kokkos::subview(masses.node_u_ddot, Kokkos::ALL, Kokkos::make_pair(3, 6))
Kokkos::parallel_for(
"masses::CopyToQuadraturePoints", masses.num_elems,
masses::CopyToQuadraturePoints{
masses.node_x0, masses.node_u, masses.node_u_dot, masses.node_u_ddot, masses.qp_x0,
masses.qp_r0, masses.qp_u, masses.qp_r, masses.qp_u_ddot, masses.qp_omega,
masses.qp_omega_dot}
);

Kokkos::parallel_for(
masses.num_elems,
KOKKOS_LAMBDA(size_t i_elem) {
masses::CalculateQPPosition{i_elem, masses.qp_x0, masses.qp_u,
masses.qp_r0, masses.qp_r, masses.qp_x}();
}
"masses::CalculateQPPosition", masses.num_elems,
masses::CalculateQPPosition{
masses.qp_x0, masses.qp_u, masses.qp_r0, masses.qp_r, masses.qp_x}
);

// Calculate system variables by executing the system kernels and perform assembly
Kokkos::parallel_for(
masses.num_elems,
KOKKOS_LAMBDA(const size_t i_elem) {
// Calculate the global rotation matrix
masses::CalculateRR0{i_elem, masses.qp_x, masses.qp_RR0}();

// Transform mass matrix from material -> inertial frame
masses::RotateSectionMatrix{i_elem, masses.qp_RR0, masses.qp_Mstar, masses.qp_Muu}();

// Calculate mass matrix components i.e. eta, rho, and eta_tilde
masses::CalculateMassMatrixComponents{
i_elem, masses.qp_Muu, masses.qp_eta, masses.qp_rho, masses.qp_eta_tilde
}();

// Calculate gravity forces
masses::CalculateGravityForce{
i_elem, masses.gravity, masses.qp_Muu, masses.qp_eta_tilde, masses.qp_Fg
}();

// Calculate inertial forces i.e. forces due to linear + angular
// acceleration
masses::CalculateInertialForces{
i_elem,
masses.qp_Muu,
masses.qp_u_ddot,
masses.qp_omega,
masses.qp_omega_dot,
masses.qp_eta_tilde,
masses.qp_omega_tilde,
masses.qp_omega_dot_tilde,
masses.qp_rho,
masses.qp_eta,
masses.qp_Fi
}();

// Calculate the gyroscopic/inertial damping matrix
masses::CalculateGyroscopicMatrix{i_elem, masses.qp_Muu,
masses.qp_omega, masses.qp_omega_tilde,
masses.qp_rho, masses.qp_eta,
masses.qp_Guu}();

// Calculate the inertial stiffness matrix i.e. contributions from mass distribution and
// rotational dynamics
masses::CalculateInertiaStiffnessMatrix{
i_elem,
masses.qp_Muu,
masses.qp_u_ddot,
masses.qp_omega,
masses.qp_omega_dot,
masses.qp_omega_tilde,
masses.qp_omega_dot_tilde,
masses.qp_rho,
masses.qp_eta,
masses.qp_Kuu
}();
}
"masses::CalculateQuadraturePointValues", masses.num_elems,
masses::CalculateQuadraturePointValues{
masses.gravity, masses.qp_Mstar, masses.qp_x, masses.qp_u_ddot, masses.qp_omega,
masses.qp_omega_dot, masses.qp_eta, masses.qp_rho, masses.qp_eta_tilde,
masses.qp_omega_tilde, masses.qp_omega_dot_tilde, masses.qp_Fi, masses.qp_Fg,
masses.qp_RR0, masses.qp_Muu, masses.qp_Guu, masses.qp_Kuu}
);

AssembleResidualVectorMasses(masses);
Expand Down
Loading

0 comments on commit acfbeb7

Please sign in to comment.