-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathKokkosFFT_Cuda_plans.hpp
214 lines (182 loc) · 9.64 KB
/
KokkosFFT_Cuda_plans.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
// SPDX-FileCopyrightText: (C) The kokkos-fft development team, see COPYRIGHT.md file
//
// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
#ifndef KOKKOSFFT_CUDA_PLANS_HPP
#define KOKKOSFFT_CUDA_PLANS_HPP
#include <numeric>
#include "KokkosFFT_Cuda_types.hpp"
#include "KokkosFFT_Extents.hpp"
#include "KokkosFFT_traits.hpp"
#include "KokkosFFT_asserts.hpp"
namespace KokkosFFT {
namespace Impl {
// 1D transform
template <typename ExecutionSpace, typename PlanType, typename InViewType,
typename OutViewType, typename BufferViewType, typename InfoType,
std::enable_if_t<InViewType::rank() == 1 &&
std::is_same_v<ExecutionSpace, Kokkos::Cuda>,
std::nullptr_t> = nullptr>
auto create_plan(const ExecutionSpace& exec_space,
std::unique_ptr<PlanType>& plan, const InViewType& in,
const OutViewType& out, BufferViewType&, InfoType&,
Direction /*direction*/, axis_type<1> axes, shape_type<1> s,
bool is_inplace) {
static_assert(
KokkosFFT::Impl::are_operatable_views_v<ExecutionSpace, InViewType,
OutViewType>,
"create_plan: InViewType and OutViewType must have the same base "
"floating point type (float/double), the same layout "
"(LayoutLeft/LayoutRight), "
"and the same rank. ExecutionSpace must be accessible to the data in "
"InViewType and OutViewType.");
using in_value_type = typename InViewType::non_const_value_type;
using out_value_type = typename OutViewType::non_const_value_type;
plan = std::make_unique<PlanType>();
cufftResult cufft_rt = cufftCreate(&(*plan));
KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftCreate failed");
cudaStream_t stream = exec_space.cuda_stream();
cufftSetStream((*plan), stream);
auto type = KokkosFFT::Impl::transform_type<ExecutionSpace, in_value_type,
out_value_type>::type();
auto [in_extents, out_extents, fft_extents, howmany] =
KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace);
const int nx = fft_extents.at(0);
int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1,
std::multiplies<>());
cufft_rt = cufftPlan1d(&(*plan), nx, type, howmany);
KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftPlan1d failed");
return fft_size;
}
// 2D transform
template <typename ExecutionSpace, typename PlanType, typename InViewType,
typename OutViewType, typename BufferViewType, typename InfoType,
std::enable_if_t<InViewType::rank() == 2 &&
std::is_same_v<ExecutionSpace, Kokkos::Cuda>,
std::nullptr_t> = nullptr>
auto create_plan(const ExecutionSpace& exec_space,
std::unique_ptr<PlanType>& plan, const InViewType& in,
const OutViewType& out, BufferViewType&, InfoType&,
Direction /*direction*/, axis_type<2> axes, shape_type<2> s,
bool is_inplace) {
static_assert(
KokkosFFT::Impl::are_operatable_views_v<ExecutionSpace, InViewType,
OutViewType>,
"create_plan: InViewType and OutViewType must have the same base "
"floating point type (float/double), the same layout "
"(LayoutLeft/LayoutRight), "
"and the same rank. ExecutionSpace must be accessible to the data in "
"InViewType and OutViewType.");
using in_value_type = typename InViewType::non_const_value_type;
using out_value_type = typename OutViewType::non_const_value_type;
plan = std::make_unique<PlanType>();
cufftResult cufft_rt = cufftCreate(&(*plan));
KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftCreate failed");
cudaStream_t stream = exec_space.cuda_stream();
cufftSetStream((*plan), stream);
auto type = KokkosFFT::Impl::transform_type<ExecutionSpace, in_value_type,
out_value_type>::type();
[[maybe_unused]] auto [in_extents, out_extents, fft_extents, howmany] =
KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace);
const int nx = fft_extents.at(0), ny = fft_extents.at(1);
int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1,
std::multiplies<>());
cufft_rt = cufftPlan2d(&(*plan), nx, ny, type);
KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftPlan2d failed");
return fft_size;
}
// 3D transform
template <typename ExecutionSpace, typename PlanType, typename InViewType,
typename OutViewType, typename BufferViewType, typename InfoType,
std::enable_if_t<InViewType::rank() == 3 &&
std::is_same_v<ExecutionSpace, Kokkos::Cuda>,
std::nullptr_t> = nullptr>
auto create_plan(const ExecutionSpace& exec_space,
std::unique_ptr<PlanType>& plan, const InViewType& in,
const OutViewType& out, BufferViewType&, InfoType&,
Direction /*direction*/, axis_type<3> axes, shape_type<3> s,
bool is_inplace) {
static_assert(
KokkosFFT::Impl::are_operatable_views_v<ExecutionSpace, InViewType,
OutViewType>,
"create_plan: InViewType and OutViewType must have the same base "
"floating point type (float/double), the same layout "
"(LayoutLeft/LayoutRight), "
"and the same rank. ExecutionSpace must be accessible to the data in "
"InViewType and OutViewType.");
using in_value_type = typename InViewType::non_const_value_type;
using out_value_type = typename OutViewType::non_const_value_type;
plan = std::make_unique<PlanType>();
cufftResult cufft_rt = cufftCreate(&(*plan));
KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftCreate failed");
cudaStream_t stream = exec_space.cuda_stream();
cufftSetStream((*plan), stream);
auto type = KokkosFFT::Impl::transform_type<ExecutionSpace, in_value_type,
out_value_type>::type();
[[maybe_unused]] auto [in_extents, out_extents, fft_extents, howmany] =
KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace);
const int nx = fft_extents.at(0), ny = fft_extents.at(1),
nz = fft_extents.at(2);
int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1,
std::multiplies<>());
cufft_rt = cufftPlan3d(&(*plan), nx, ny, nz, type);
KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftPlan3d failed");
return fft_size;
}
// batched transform, over ND Views
template <typename ExecutionSpace, typename PlanType, typename InViewType,
typename OutViewType, typename BufferViewType, typename InfoType,
std::size_t fft_rank = 1,
std::enable_if_t<std::is_same_v<ExecutionSpace, Kokkos::Cuda>,
std::nullptr_t> = nullptr>
auto create_plan(const ExecutionSpace& exec_space,
std::unique_ptr<PlanType>& plan, const InViewType& in,
const OutViewType& out, BufferViewType&, InfoType&,
Direction /*direction*/, axis_type<fft_rank> axes,
shape_type<fft_rank> s, bool is_inplace) {
static_assert(
KokkosFFT::Impl::are_operatable_views_v<ExecutionSpace, InViewType,
OutViewType>,
"create_plan: InViewType and OutViewType must have the same base "
"floating point type (float/double), the same layout "
"(LayoutLeft/LayoutRight), "
"and the same rank. ExecutionSpace must be accessible to the data in "
"InViewType and OutViewType.");
static_assert(
InViewType::rank() >= fft_rank,
"KokkosFFT::_create: Rank of View must be larger than Rank of FFT.");
using in_value_type = typename InViewType::non_const_value_type;
using out_value_type = typename OutViewType::non_const_value_type;
const int rank = fft_rank;
constexpr auto type =
KokkosFFT::Impl::transform_type<ExecutionSpace, in_value_type,
out_value_type>::type();
auto [in_extents, out_extents, fft_extents, howmany] =
KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace);
int idist = std::accumulate(in_extents.begin(), in_extents.end(), 1,
std::multiplies<>());
int odist = std::accumulate(out_extents.begin(), out_extents.end(), 1,
std::multiplies<>());
int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1,
std::multiplies<>());
// For the moment, considering the contiguous layout only
int istride = 1, ostride = 1;
plan = std::make_unique<PlanType>();
cufftResult cufft_rt = cufftCreate(&(*plan));
KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftCreate failed");
cudaStream_t stream = exec_space.cuda_stream();
cufftSetStream((*plan), stream);
cufft_rt = cufftPlanMany(&(*plan), rank, fft_extents.data(),
in_extents.data(), istride, idist,
out_extents.data(), ostride, odist, type, howmany);
KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftPlanMany failed");
return fft_size;
}
template <typename ExecutionSpace, typename PlanType, typename InfoType,
std::enable_if_t<std::is_same_v<ExecutionSpace, Kokkos::Cuda>,
std::nullptr_t> = nullptr>
void destroy_plan_and_info(std::unique_ptr<PlanType>& plan, InfoType&) {
cufftDestroy(*plan);
}
} // namespace Impl
} // namespace KokkosFFT
#endif