-
Notifications
You must be signed in to change notification settings - Fork 915
/
sequence.cu
163 lines (139 loc) · 6 KB
/
sequence.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/iterator.cuh>
#include <cudf/filling.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/scalar/scalar_device_view.cuh>
#include <cudf/types.hpp>
#include <cudf/utilities/error.hpp>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>
namespace cudf {
namespace detail {
namespace {
// This functor only exists here because using a lambda directly in the tabulate() call generates
// the cryptic
// __T289 link error. This seems to be related to lambda usage within functions using SFINAE.
template <typename T>
struct tabulator {
cudf::numeric_scalar_device_view<T> const n_init;
cudf::numeric_scalar_device_view<T> const n_step;
T __device__ operator()(cudf::size_type i)
{
return n_init.value() + (static_cast<T>(i) * n_step.value());
}
};
template <typename T>
struct const_tabulator {
cudf::numeric_scalar_device_view<T> const n_init;
T __device__ operator()(cudf::size_type i) { return n_init.value() + static_cast<T>(i); }
};
/**
* @brief Functor called by the `type_dispatcher` to generate the sequence specified
* by init and step.
*/
struct sequence_functor {
template <
typename T,
typename std::enable_if_t<cudf::is_numeric<T>() and not cudf::is_boolean<T>()>* = nullptr>
std::unique_ptr<column> operator()(size_type size,
scalar const& init,
scalar const& step,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto result = make_fixed_width_column(init.type(), size, mask_state::UNALLOCATED, stream, mr);
auto result_device_view = mutable_column_device_view::create(*result, stream);
auto n_init =
get_scalar_device_view(static_cast<cudf::scalar_type_t<T>&>(const_cast<scalar&>(init)));
auto n_step =
get_scalar_device_view(static_cast<cudf::scalar_type_t<T>&>(const_cast<scalar&>(step)));
// not using thrust::sequence because it requires init and step to be passed as
// constants, not iterators. to do that we would have to retrieve the scalar values off the gpu,
// which is undesirable from a performance perspective.
thrust::tabulate(rmm::exec_policy(stream),
result_device_view->begin<T>(),
result_device_view->end<T>(),
tabulator<T>{n_init, n_step});
return result;
}
template <
typename T,
typename std::enable_if_t<cudf::is_numeric<T>() and not cudf::is_boolean<T>()>* = nullptr>
std::unique_ptr<column> operator()(size_type size,
scalar const& init,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto result = make_fixed_width_column(init.type(), size, mask_state::UNALLOCATED, stream, mr);
auto result_device_view = mutable_column_device_view::create(*result, stream);
auto n_init =
get_scalar_device_view(static_cast<cudf::scalar_type_t<T>&>(const_cast<scalar&>(init)));
// not using thrust::sequence because it requires init and step to be passed as
// constants, not iterators. to do that we would have to retrieve the scalar values off the gpu,
// which is undesirable from a performance perspective.
thrust::tabulate(rmm::exec_policy(stream),
result_device_view->begin<T>(),
result_device_view->end<T>(),
const_tabulator<T>{n_init});
return result;
}
template <typename T, typename... Args>
std::enable_if_t<not cudf::is_numeric<T>() or cudf::is_boolean<T>(), std::unique_ptr<column>>
operator()(Args&&...)
{
CUDF_FAIL("Unsupported sequence scalar type");
}
};
} // anonymous namespace
std::unique_ptr<column> sequence(size_type size,
scalar const& init,
scalar const& step,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_EXPECTS(init.type() == step.type(), "init and step must be of the same type.");
CUDF_EXPECTS(size >= 0, "size must be >= 0");
CUDF_EXPECTS(is_numeric(init.type()), "Input scalar types must be numeric");
return type_dispatcher(init.type(), sequence_functor{}, size, init, step, stream, mr);
}
std::unique_ptr<column> sequence(
size_type size,
scalar const& init,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
CUDF_EXPECTS(size >= 0, "size must be >= 0");
CUDF_EXPECTS(is_numeric(init.type()), "init scalar type must be numeric");
return type_dispatcher(init.type(), sequence_functor{}, size, init, stream, mr);
}
} // namespace detail
std::unique_ptr<column> sequence(size_type size,
scalar const& init,
scalar const& step,
rmm::mr::device_memory_resource* mr)
{
return detail::sequence(size, init, step, rmm::cuda_stream_default, mr);
}
std::unique_ptr<column> sequence(size_type size,
scalar const& init,
rmm::mr::device_memory_resource* mr)
{
return detail::sequence(size, init, rmm::cuda_stream_default, mr);
}
} // namespace cudf