diff --git a/common/cuda_hip/solver/batch_bicgstab_launch.hpp b/common/cuda_hip/solver/batch_bicgstab_launch.hpp index 193d78a0c8a..1a787a860cd 100644 --- a/common/cuda_hip/solver/batch_bicgstab_launch.hpp +++ b/common/cuda_hip/solver/batch_bicgstab_launch.hpp @@ -37,11 +37,11 @@ void launch_apply_kernel( #define GKO_DECLARE_BATCH_BICGSTAB_LAUNCH(_vtype, _n_shared, _prec_shared, \ mat_t, log_t, pre_t, stop_t) \ - void launch_apply_kernel, _n_shared, _prec_shared, \ + void launch_apply_kernel<_vtype, _n_shared, _prec_shared, \ stop_t>>( \ std::shared_ptr exec, \ const gko::kernels::batch_bicgstab::storage_config& sconf, \ - const settings>>& settings, \ + const settings>& settings, \ log_t>>& logger, \ pre_t>& prec, \ const mat_t>& mat, \ diff --git a/common/cuda_hip/solver/batch_cg_launch.hpp b/common/cuda_hip/solver/batch_cg_launch.hpp index 03b07630942..d9e9159f0d1 100644 --- a/common/cuda_hip/solver/batch_cg_launch.hpp +++ b/common/cuda_hip/solver/batch_cg_launch.hpp @@ -35,19 +35,19 @@ void launch_apply_kernel( device_type* const __restrict__ workspace_data, const int& block_size, const size_t& shared_size); -#define GKO_DECLARE_BATCH_CG_LAUNCH(_vtype, _n_shared, _prec_shared, mat_t, \ - log_t, pre_t, stop_t) \ - void launch_apply_kernel, _n_shared, _prec_shared, \ - stop_t>>( \ - std::shared_ptr exec, \ - const gko::kernels::batch_cg::storage_config& sconf, \ - const settings>& settings, \ - log_t>>>& logger, \ - pre_t>& prec, \ - const mat_t>& mat, \ - const device_type<_vtype>* const __restrict__ b_values, \ - device_type<_vtype>* const __restrict__ x_values, \ - device_type<_vtype>* const __restrict__ workspace_data, \ +#define GKO_DECLARE_BATCH_CG_LAUNCH(_vtype, _n_shared, _prec_shared, mat_t, \ + log_t, pre_t, stop_t) \ + void launch_apply_kernel<_vtype, _n_shared, _prec_shared, \ + stop_t>>( \ + std::shared_ptr exec, \ + const gko::kernels::batch_cg::storage_config& sconf, \ + const settings>& settings, \ + log_t>>& logger, \ + pre_t>& prec, \ + const mat_t>& mat, \ + const device_type<_vtype>* const __restrict__ b_values, \ + device_type<_vtype>* const __restrict__ x_values, \ + device_type<_vtype>* const __restrict__ workspace_data, \ const int& block_size, const size_t& shared_size) #define GKO_INSTANTIATE_BATCH_CG_LAUNCH(...) \ diff --git a/cuda/solver/batch_bicgstab_kernels.cu b/cuda/solver/batch_bicgstab_kernels.cu index b8c1ab20b01..f6bede16863 100644 --- a/cuda/solver/batch_bicgstab_kernels.cu +++ b/cuda/solver/batch_bicgstab_kernels.cu @@ -71,58 +71,58 @@ public: // Template parameters launch_apply_kernel if (sconf.prec_shared) { - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); } else { switch (sconf.n_shared) { case 0: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 1: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 2: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 3: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 4: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 5: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 6: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 7: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 8: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 9: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; diff --git a/cuda/solver/batch_bicgstab_launch.cuh b/cuda/solver/batch_bicgstab_launch.cuh index 737f2a923b0..cfe7931dde3 100644 --- a/cuda/solver/batch_bicgstab_launch.cuh +++ b/cuda/solver/batch_bicgstab_launch.cuh @@ -31,16 +31,16 @@ template exec, const int num_rows); -#define GKO_DECLARE_BATCH_BICGSTAB_GET_NUM_THREADS_PER_BLOCK_( \ - _vtype, mat_t, log_t, pre_t, stop_t) \ - int get_num_threads_per_block< \ - stop_t>, pre_t>, \ - log_t>, mat_t>, \ - cuda_type<_vtype>>(std::shared_ptr exec, \ - const int num_rows) +#define GKO_DECLARE_BATCH_BICGSTAB_GET_NUM_THREADS_PER_BLOCK_( \ + _vtype, mat_t, log_t, pre_t, stop_t) \ + int get_num_threads_per_block< \ + stop_t>, pre_t>, \ + log_t>>, \ + mat_t>, cuda_type<_vtype>>( \ + std::shared_ptr exec, const int num_rows) #define GKO_INSTANTIATE_BATCH_BICGSTAB_GET_NUM_THREADS_PER_BLOCK_(...) \ - GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_VARGS( \ + GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_VARGS_WITH_HALF( \ GKO_DECLARE_BATCH_BICGSTAB_GET_NUM_THREADS_PER_BLOCK_, __VA_ARGS__) #define GKO_INSTANTIATE_BATCH_BICGSTAB_GET_NUM_THREADS_PER_BLOCK \ @@ -52,15 +52,16 @@ template int get_max_dynamic_shared_memory(std::shared_ptr exec); -#define GKO_DECLARE_BATCH_BICGSTAB_GET_MAX_DYNAMIC_SHARED_MEMORY_( \ - _vtype, mat_t, log_t, pre_t, stop_t) \ - int get_max_dynamic_shared_memory< \ - stop_t>, pre_t>, \ - log_t>, mat_t>, \ - cuda_type<_vtype>>(std::shared_ptr exec) +#define GKO_DECLARE_BATCH_BICGSTAB_GET_MAX_DYNAMIC_SHARED_MEMORY_( \ + _vtype, mat_t, log_t, pre_t, stop_t) \ + int get_max_dynamic_shared_memory< \ + stop_t>, pre_t>, \ + log_t>>, \ + mat_t>, cuda_type<_vtype>>( \ + std::shared_ptr exec) #define GKO_INSTANTIATE_BATCH_BICGSTAB_GET_MAX_DYNAMIC_SHARED_MEMORY_(...) \ - GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_VARGS( \ + GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_VARGS_WITH_HALF( \ GKO_DECLARE_BATCH_BICGSTAB_GET_MAX_DYNAMIC_SHARED_MEMORY_, \ __VA_ARGS__) diff --git a/cuda/solver/batch_cg_kernels.cu b/cuda/solver/batch_cg_kernels.cu index 88f1c65c00e..5706970df5a 100644 --- a/cuda/solver/batch_cg_kernels.cu +++ b/cuda/solver/batch_cg_kernels.cu @@ -72,38 +72,38 @@ public: // Template parameters launch_apply_kernel if (sconf.prec_shared) { - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); } else { switch (sconf.n_shared) { case 0: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 1: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 2: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 3: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 4: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 5: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; diff --git a/cuda/solver/batch_cg_launch.cuh b/cuda/solver/batch_cg_launch.cuh index e803e15fe80..af575b81651 100644 --- a/cuda/solver/batch_cg_launch.cuh +++ b/cuda/solver/batch_cg_launch.cuh @@ -40,7 +40,7 @@ int get_num_threads_per_block(std::shared_ptr exec, std::shared_ptr exec, const int num_rows) #define GKO_INSTANTIATE_BATCH_CG_GET_NUM_THREADS_PER_BLOCK_(...) \ - GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_VARGS( \ + GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_VARGS_WITH_HALF( \ GKO_DECLARE_BATCH_CG_GET_NUM_THREADS_PER_BLOCK_, __VA_ARGS__) #define GKO_INSTANTIATE_BATCH_CG_GET_NUM_THREADS_PER_BLOCK \ @@ -51,16 +51,17 @@ template int get_max_dynamic_shared_memory(std::shared_ptr exec); -#define GKO_DECLARE_BATCH_CG_GET_MAX_DYNAMIC_SHARED_MEMORY_( \ - _vtype, mat_t, log_t, pre_t, stop_t) \ - int get_max_dynamic_shared_memory< \ - stop_t>, pre_t>, \ - log_t>, mat_t>, \ - cuda_type<_vtype>>(std::shared_ptr exec) +#define GKO_DECLARE_BATCH_CG_GET_MAX_DYNAMIC_SHARED_MEMORY_( \ + _vtype, mat_t, log_t, pre_t, stop_t) \ + int get_max_dynamic_shared_memory< \ + stop_t>, pre_t>, \ + log_t>>, \ + mat_t>, cuda_type<_vtype>>( \ + std::shared_ptr exec) #define GKO_INSTANTIATE_BATCH_CG_GET_MAX_DYNAMIC_SHARED_MEMORY_(...) \ - GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_VARGS( \ + GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_VARGS_WITH_HALF( \ GKO_DECLARE_BATCH_CG_GET_MAX_DYNAMIC_SHARED_MEMORY_, __VA_ARGS__) #define GKO_INSTANTIATE_BATCH_CG_GET_MAX_DYNAMIC_SHARED_MEMORY \ diff --git a/hip/solver/batch_bicgstab_kernels.hip.cpp b/hip/solver/batch_bicgstab_kernels.hip.cpp index 332d3dd8303..8e6be4e9061 100644 --- a/hip/solver/batch_bicgstab_kernels.hip.cpp +++ b/hip/solver/batch_bicgstab_kernels.hip.cpp @@ -95,58 +95,58 @@ class kernel_caller { // Template parameters launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); } else { switch (sconf.n_shared) { case 0: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 1: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 2: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 3: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 4: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 5: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 6: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 7: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 8: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 9: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; diff --git a/hip/solver/batch_cg_kernels.hip.cpp b/hip/solver/batch_cg_kernels.hip.cpp index 6001adb94d3..5bd7500819c 100644 --- a/hip/solver/batch_cg_kernels.hip.cpp +++ b/hip/solver/batch_cg_kernels.hip.cpp @@ -97,38 +97,38 @@ class kernel_caller { // Template parameters launch_apply_kernel if (sconf.prec_shared) { - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); } else { switch (sconf.n_shared) { case 0: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 1: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 2: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 3: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 4: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break; case 5: - launch_apply_kernel( + launch_apply_kernel( exec_, sconf, settings_, logger, prec, mat, b.values, x.values, workspace_data, block_size, shared_size); break;