Skip to content

Commit

Permalink
cuda/hip batch changes
Browse files Browse the repository at this point in the history
  • Loading branch information
yhmtsai committed Nov 28, 2024
1 parent c1feff4 commit 3fa5f5d
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 70 deletions.
4 changes: 2 additions & 2 deletions common/cuda_hip/solver/batch_bicgstab_launch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ void launch_apply_kernel(

#define GKO_DECLARE_BATCH_BICGSTAB_LAUNCH(_vtype, _n_shared, _prec_shared, \
mat_t, log_t, pre_t, stop_t) \
void launch_apply_kernel<device_type<_vtype>, _n_shared, _prec_shared, \
void launch_apply_kernel<_vtype, _n_shared, _prec_shared, \
stop_t<device_type<_vtype>>>( \
std::shared_ptr<const DefaultExecutor> exec, \
const gko::kernels::batch_bicgstab::storage_config& sconf, \
const settings<remove_complex<device_type<_vtype>>>& settings, \
const settings<remove_complex<_vtype>>& settings, \
log_t<gko::remove_complex<device_type<_vtype>>>& logger, \
pre_t<device_type<_vtype>>& prec, \
const mat_t<const device_type<_vtype>>& mat, \
Expand Down
26 changes: 13 additions & 13 deletions common/cuda_hip/solver/batch_cg_launch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,19 @@ void launch_apply_kernel(
device_type<ValueType>* const __restrict__ workspace_data,
const int& block_size, const size_t& shared_size);

#define GKO_DECLARE_BATCH_CG_LAUNCH(_vtype, _n_shared, _prec_shared, mat_t, \
log_t, pre_t, stop_t) \
void launch_apply_kernel<device_type<_vtype>, _n_shared, _prec_shared, \
stop_t<device_type<_vtype>>>( \
std::shared_ptr<const DefaultExecutor> exec, \
const gko::kernels::batch_cg::storage_config& sconf, \
const settings<remove_complex<_vtype>>& settings, \
log_t<device_type<gko::remove_complex<device_type<_vtype>>>>& logger, \
pre_t<device_type<_vtype>>& prec, \
const mat_t<const device_type<_vtype>>& mat, \
const device_type<_vtype>* const __restrict__ b_values, \
device_type<_vtype>* const __restrict__ x_values, \
device_type<_vtype>* const __restrict__ workspace_data, \
#define GKO_DECLARE_BATCH_CG_LAUNCH(_vtype, _n_shared, _prec_shared, mat_t, \
log_t, pre_t, stop_t) \
void launch_apply_kernel<_vtype, _n_shared, _prec_shared, \
stop_t<device_type<_vtype>>>( \
std::shared_ptr<const DefaultExecutor> exec, \
const gko::kernels::batch_cg::storage_config& sconf, \
const settings<remove_complex<_vtype>>& settings, \
log_t<gko::remove_complex<device_type<_vtype>>>& logger, \
pre_t<device_type<_vtype>>& prec, \
const mat_t<const device_type<_vtype>>& mat, \
const device_type<_vtype>* const __restrict__ b_values, \
device_type<_vtype>* const __restrict__ x_values, \
device_type<_vtype>* const __restrict__ workspace_data, \
const int& block_size, const size_t& shared_size)

#define GKO_INSTANTIATE_BATCH_CG_LAUNCH_0_FALSE \
Expand Down
22 changes: 11 additions & 11 deletions cuda/solver/batch_bicgstab_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -72,58 +72,58 @@ public:
// Template parameters launch_apply_kernel<StopType, n_shared,
// prec_shared>
if (sconf.prec_shared) {
launch_apply_kernel<cuda_value_type, 9, true, StopType>(
launch_apply_kernel<ValueType, 9, true, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
} else {
switch (sconf.n_shared) {
case 0:
launch_apply_kernel<cuda_value_type, 0, false, StopType>(
launch_apply_kernel<ValueType, 0, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 1:
launch_apply_kernel<cuda_value_type, 1, false, StopType>(
launch_apply_kernel<ValueType, 1, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 2:
launch_apply_kernel<cuda_value_type, 2, false, StopType>(
launch_apply_kernel<ValueType, 2, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 3:
launch_apply_kernel<cuda_value_type, 3, false, StopType>(
launch_apply_kernel<ValueType, 3, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 4:
launch_apply_kernel<cuda_value_type, 4, false, StopType>(
launch_apply_kernel<ValueType, 4, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 5:
launch_apply_kernel<cuda_value_type, 5, false, StopType>(
launch_apply_kernel<ValueType, 5, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 6:
launch_apply_kernel<cuda_value_type, 6, false, StopType>(
launch_apply_kernel<ValueType, 6, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 7:
launch_apply_kernel<cuda_value_type, 7, false, StopType>(
launch_apply_kernel<ValueType, 7, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 8:
launch_apply_kernel<cuda_value_type, 8, false, StopType>(
launch_apply_kernel<ValueType, 8, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 9:
launch_apply_kernel<cuda_value_type, 9, false, StopType>(
launch_apply_kernel<ValueType, 9, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
Expand Down
27 changes: 14 additions & 13 deletions cuda/solver/batch_bicgstab_launch.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ template <typename StopType, typename PrecType, typename LogType,
int get_num_threads_per_block(std::shared_ptr<const DefaultExecutor> exec,
const int num_rows);

#define GKO_DECLARE_BATCH_BICGSTAB_GET_NUM_THREADS_PER_BLOCK( \
_vtype, mat_t, log_t, pre_t, stop_t) \
int get_num_threads_per_block< \
stop_t<cuda_type<_vtype>>, pre_t<cuda_type<_vtype>>, \
log_t<gko::remove_complex<_vtype>>, mat_t<const cuda_type<_vtype>>, \
cuda_type<_vtype>>(std::shared_ptr<const DefaultExecutor> exec, \
const int num_rows)
#define GKO_DECLARE_BATCH_BICGSTAB_GET_NUM_THREADS_PER_BLOCK( \
_vtype, mat_t, log_t, pre_t, stop_t) \
int get_num_threads_per_block< \
stop_t<cuda_type<_vtype>>, pre_t<cuda_type<_vtype>>, \
log_t<gko::remove_complex<cuda_type<_vtype>>>, \
mat_t<const cuda_type<_vtype>>, cuda_type<_vtype>>( \
std::shared_ptr<const DefaultExecutor> exec, const int num_rows)

#define GKO_INSTANTIATE_BATCH_BICGSTAB_GET_NUM_THREADS_PER_BLOCK \
GKO_BATCH_INSTANTIATE(GKO_DECLARE_BATCH_BICGSTAB_GET_NUM_THREADS_PER_BLOCK)
Expand All @@ -47,12 +47,13 @@ template <typename StopType, typename PrecType, typename LogType,
typename BatchMatrixType, typename ValueType>
int get_max_dynamic_shared_memory(std::shared_ptr<const DefaultExecutor> exec);

#define GKO_DECLARE_BATCH_BICGSTAB_GET_MAX_DYNAMIC_SHARED_MEMORY( \
_vtype, mat_t, log_t, pre_t, stop_t) \
int get_max_dynamic_shared_memory< \
stop_t<cuda_type<_vtype>>, pre_t<cuda_type<_vtype>>, \
log_t<gko::remove_complex<_vtype>>, mat_t<const cuda_type<_vtype>>, \
cuda_type<_vtype>>(std::shared_ptr<const DefaultExecutor> exec)
#define GKO_DECLARE_BATCH_BICGSTAB_GET_MAX_DYNAMIC_SHARED_MEMORY( \
_vtype, mat_t, log_t, pre_t, stop_t) \
int get_max_dynamic_shared_memory< \
stop_t<cuda_type<_vtype>>, pre_t<cuda_type<_vtype>>, \
log_t<gko::remove_complex<cuda_type<_vtype>>>, \
mat_t<const cuda_type<_vtype>>, cuda_type<_vtype>>( \
std::shared_ptr<const DefaultExecutor> exec)

#define GKO_INSTANTIATE_BATCH_BICGSTAB_GET_MAX_DYNAMIC_SHARED_MEMORY \
GKO_BATCH_INSTANTIATE( \
Expand Down
14 changes: 7 additions & 7 deletions cuda/solver/batch_cg_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -73,38 +73,38 @@ public:
// Template parameters launch_apply_kernel<ValueType, n_shared,
// prec_shared, StopType>
if (sconf.prec_shared) {
launch_apply_kernel<cuda_value_type, 5, true, StopType>(
launch_apply_kernel<ValueType, 5, true, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
} else {
switch (sconf.n_shared) {
case 0:
launch_apply_kernel<cuda_value_type, 0, false, StopType>(
launch_apply_kernel<ValueType, 0, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 1:
launch_apply_kernel<cuda_value_type, 1, false, StopType>(
launch_apply_kernel<ValueType, 1, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 2:
launch_apply_kernel<cuda_value_type, 2, false, StopType>(
launch_apply_kernel<ValueType, 2, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 3:
launch_apply_kernel<cuda_value_type, 3, false, StopType>(
launch_apply_kernel<ValueType, 3, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 4:
launch_apply_kernel<cuda_value_type, 4, false, StopType>(
launch_apply_kernel<ValueType, 4, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 5:
launch_apply_kernel<cuda_value_type, 5, false, StopType>(
launch_apply_kernel<ValueType, 5, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
Expand Down
13 changes: 7 additions & 6 deletions cuda/solver/batch_cg_launch.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,13 @@ template <typename StopType, typename PrecType, typename LogType,
typename BatchMatrixType, typename ValueType>
int get_max_dynamic_shared_memory(std::shared_ptr<const DefaultExecutor> exec);

#define GKO_DECLARE_BATCH_CG_GET_MAX_DYNAMIC_SHARED_MEMORY( \
_vtype, mat_t, log_t, pre_t, stop_t) \
int get_max_dynamic_shared_memory< \
stop_t<cuda_type<_vtype>>, pre_t<cuda_type<_vtype>>, \
log_t<gko::remove_complex<_vtype>>, mat_t<const cuda_type<_vtype>>, \
cuda_type<_vtype>>(std::shared_ptr<const DefaultExecutor> exec)
#define GKO_DECLARE_BATCH_CG_GET_MAX_DYNAMIC_SHARED_MEMORY( \
_vtype, mat_t, log_t, pre_t, stop_t) \
int get_max_dynamic_shared_memory< \
stop_t<cuda_type<_vtype>>, pre_t<cuda_type<_vtype>>, \
log_t<gko::remove_complex<cuda_type<_vtype>>>, \
mat_t<const cuda_type<_vtype>>, cuda_type<_vtype>>( \
std::shared_ptr<const DefaultExecutor> exec)

#define GKO_INSTANTIATE_BATCH_CG_GET_MAX_DYNAMIC_SHARED_MEMORY \
GKO_BATCH_INSTANTIATE(GKO_DECLARE_BATCH_CG_GET_MAX_DYNAMIC_SHARED_MEMORY)
Expand Down
22 changes: 11 additions & 11 deletions hip/solver/batch_bicgstab_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,58 +96,58 @@ class kernel_caller {
// Template parameters launch_apply_kernel<StopType, n_shared,
// prec_shared)
if (sconf.prec_shared) {
launch_apply_kernel<hip_value_type, 9, true, StopType>(
launch_apply_kernel<ValueType, 9, true, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
} else {
switch (sconf.n_shared) {
case 0:
launch_apply_kernel<hip_value_type, 0, false, StopType>(
launch_apply_kernel<ValueType, 0, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 1:
launch_apply_kernel<hip_value_type, 1, false, StopType>(
launch_apply_kernel<ValueType, 1, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 2:
launch_apply_kernel<hip_value_type, 2, false, StopType>(
launch_apply_kernel<ValueType, 2, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 3:
launch_apply_kernel<hip_value_type, 3, false, StopType>(
launch_apply_kernel<ValueType, 3, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 4:
launch_apply_kernel<hip_value_type, 4, false, StopType>(
launch_apply_kernel<ValueType, 4, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 5:
launch_apply_kernel<hip_value_type, 5, false, StopType>(
launch_apply_kernel<ValueType, 5, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 6:
launch_apply_kernel<hip_value_type, 6, false, StopType>(
launch_apply_kernel<ValueType, 6, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 7:
launch_apply_kernel<hip_value_type, 7, false, StopType>(
launch_apply_kernel<ValueType, 7, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 8:
launch_apply_kernel<hip_value_type, 8, false, StopType>(
launch_apply_kernel<ValueType, 8, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 9:
launch_apply_kernel<hip_value_type, 9, false, StopType>(
launch_apply_kernel<ValueType, 9, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
Expand Down
14 changes: 7 additions & 7 deletions hip/solver/batch_cg_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,38 +98,38 @@ class kernel_caller {
// Template parameters launch_apply_kernel<ValueType, n_shared,
// prec_shared, StopType>
if (sconf.prec_shared) {
launch_apply_kernel<hip_value_type, 5, true, StopType>(
launch_apply_kernel<ValueType, 5, true, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
} else {
switch (sconf.n_shared) {
case 0:
launch_apply_kernel<hip_value_type, 0, false, StopType>(
launch_apply_kernel<ValueType, 0, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 1:
launch_apply_kernel<hip_value_type, 1, false, StopType>(
launch_apply_kernel<ValueType, 1, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 2:
launch_apply_kernel<hip_value_type, 2, false, StopType>(
launch_apply_kernel<ValueType, 2, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 3:
launch_apply_kernel<hip_value_type, 3, false, StopType>(
launch_apply_kernel<ValueType, 3, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 4:
launch_apply_kernel<hip_value_type, 4, false, StopType>(
launch_apply_kernel<ValueType, 4, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
case 5:
launch_apply_kernel<hip_value_type, 5, false, StopType>(
launch_apply_kernel<ValueType, 5, false, StopType>(
exec_, sconf, settings_, logger, prec, mat, b.values,
x.values, workspace_data, block_size, shared_size);
break;
Expand Down

0 comments on commit 3fa5f5d

Please sign in to comment.