diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 86ec24c1b7b..624293ad87c 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -343,6 +343,7 @@ add_library(
src/lists/lists_column_factories.cu
src/lists/lists_column_view.cu
src/lists/segmented_sort.cu
+ src/lists/sequences.cu
src/merge/merge.cu
src/partitioning/partitioning.cu
src/partitioning/round_robin.cu
diff --git a/cpp/include/cudf/filling.hpp b/cpp/include/cudf/filling.hpp
index aff0d20a467..905a897eb40 100644
--- a/cpp/include/cudf/filling.hpp
+++ b/cpp/include/cudf/filling.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -169,7 +169,7 @@ std::unique_ptr
repeat(
* @param init First value in the sequence
* @param step Increment value
* @param mr Device memory resource used to allocate the returned column's device memory
- * @return std::unique_ptr The result table containing the sequence
+ * @return The result column containing the generated sequence
*/
std::unique_ptr sequence(
size_type size,
@@ -195,7 +195,7 @@ std::unique_ptr sequence(
* @param size Size of the output column
* @param init First value in the sequence
* @param mr Device memory resource used to allocate the returned column's device memory
- * @return std::unique_ptr The result table containing the sequence
+ * @return The result column containing the generated sequence
*/
std::unique_ptr sequence(
size_type size,
@@ -223,7 +223,7 @@ std::unique_ptr sequence(
* @param months Months to increment
* @param mr Device memory resource used to allocate the returned column's device memory
*
- * @returns Timestamps column with sequences of months.
+ * @return Timestamps column with sequences of months.
*/
std::unique_ptr calendrical_month_sequence(
size_type size,
diff --git a/cpp/include/cudf/lists/filling.hpp b/cpp/include/cudf/lists/filling.hpp
new file mode 100644
index 00000000000..74a4dac1e10
--- /dev/null
+++ b/cpp/include/cudf/lists/filling.hpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include
+
+#include
+
+namespace cudf::lists {
+/**
+ * @addtogroup lists_filling
+ * @{
+ * @file
+ * @brief Column APIs for individual list sequence
+ */
+
+/**
+ * @brief Create a lists column in which each row contains a sequence of values specified by a tuple
+ * of (`start`, `size`) parameters.
+ *
+ * Create a lists column in which each row is a sequence of values starting from a `start` value,
+ * incrementing by one, and its cardinality is specified by a `size` value. The `start` and `size`
+ * values used to generate each list is taken from the corresponding row of the input @p starts and
+ * @p sizes columns.
+ *
+ * - @p sizes must be a column of integer types.
+ * - All the input columns must not have nulls.
+ * - If any row of the @p sizes column contains negative value, the output is undefined.
+ *
+ * @code{.pseudo}
+ * starts = [0, 1, 2, 3, 4]
+ * sizes = [0, 2, 2, 1, 3]
+ *
+ * output = [ [], [1, 2], [2, 3], [3], [4, 5, 6] ]
+ * @endcode
+ *
+ * @throws cudf::logic_error if @p sizes column is not of integer types.
+ * @throws cudf::logic_error if any input column has nulls.
+ * @throws cudf::logic_error if @p starts and @p sizes columns do not have the same size.
+ *
+ * @param starts First values in the result sequences.
+ * @param sizes Numbers of values in the result sequences.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
+ * @return The result column containing generated sequences.
+ */
+std::unique_ptr sequences(
+ column_view const& starts,
+ column_view const& sizes,
+ rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Create a lists column in which each row contains a sequence of values specified by a tuple
+ * of (`start`, `step`, `size`) parameters.
+ *
+ * Create a lists column in which each row is a sequence of values starting from a `start` value,
+ * incrementing by a `step` value, and its cardinality is specified by a `size` value. The values
+ * `start`, `step`, and `size` used to generate each list is taken from the corresponding row of the
+ * input @p starts, @p steps, and @p sizes columns.
+ *
+ * - @p sizes must be a column of integer types.
+ * - @p starts and @p steps columns must have the same type.
+ * - All the input columns must not have nulls.
+ * - If any row of the @p sizes column contains negative value, the output is undefined.
+ *
+ * @code{.pseudo}
+ * starts = [0, 1, 2, 3, 4]
+ * steps = [2, 1, 1, 1, -3]
+ * sizes = [0, 2, 2, 1, 3]
+ *
+ * output = [ [], [1, 2], [2, 3], [3], [4, 1, -2] ]
+ * @endcode
+ *
+ * @throws cudf::logic_error if @p sizes column is not of integer types.
+ * @throws cudf::logic_error if any input column has nulls.
+ * @throws cudf::logic_error if @p starts and @p steps columns have different types.
+ * @throws cudf::logic_error if @p starts, @p steps, and @p sizes columns do not have the same size.
+ *
+ * @param starts First values in the result sequences.
+ * @param steps Increment values for the result sequences.
+ * @param sizes Numbers of values in the result sequences.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
+ * @return The result column containing generated sequences.
+ */
+std::unique_ptr sequences(
+ column_view const& starts,
+ column_view const& steps,
+ column_view const& sizes,
+ rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/** @} */ // end of group
+} // namespace cudf::lists
diff --git a/cpp/src/lists/sequences.cu b/cpp/src/lists/sequences.cu
new file mode 100644
index 00000000000..5007918441b
--- /dev/null
+++ b/cpp/src/lists/sequences.cu
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+#include
+#include
+
+#include
+
+namespace cudf::lists {
+namespace detail {
+namespace {
+template
+struct tabulator {
+ size_type const n_lists;
+ size_type const n_elements;
+
+ T const* const starts;
+ T const* const steps;
+ offset_type const* const offsets;
+
+ template
+ static std::enable_if_t(), T> __device__ multiply(U x, size_type times)
+ {
+ return x * static_cast(times);
+ }
+
+ template
+ static std::enable_if_t(), T> __device__ multiply(U x, size_type times)
+ {
+ return T{x.count() * times};
+ }
+
+ auto __device__ operator()(size_type idx) const
+ {
+ auto const list_idx_end = thrust::upper_bound(thrust::seq, offsets, offsets + n_lists, idx);
+ auto const list_idx = thrust::distance(offsets, list_idx_end) - 1;
+ auto const list_offset = offsets[list_idx];
+ auto const list_step = steps ? steps[list_idx] : T{1};
+ return starts[list_idx] + multiply(list_step, idx - list_offset);
+ }
+};
+
+template
+struct sequences_functor {
+ template
+ static std::unique_ptr invoke(Args&&...)
+ {
+ CUDF_FAIL("Unsupported per-list sequence type-agg combination.");
+ }
+};
+
+struct sequences_dispatcher {
+ template
+ std::unique_ptr operator()(size_type n_lists,
+ size_type n_elements,
+ column_view const& starts,
+ std::optional const& steps,
+ offset_type const* offsets,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
+ {
+ return sequences_functor::invoke(n_lists, n_elements, starts, steps, offsets, stream, mr);
+ }
+};
+
+template
+static constexpr bool is_supported()
+{
+ return (cudf::is_numeric() && !cudf::is_boolean()) || cudf::is_duration();
+}
+
+template
+struct sequences_functor()>> {
+ static std::unique_ptr invoke(size_type n_lists,
+ size_type n_elements,
+ column_view const& starts,
+ std::optional const& steps,
+ offset_type const* offsets,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
+ {
+ auto result =
+ make_fixed_width_column(starts.type(), n_elements, mask_state::UNALLOCATED, stream, mr);
+ if (starts.is_empty()) { return result; }
+
+ auto const result_begin = result->mutable_view().template begin();
+
+ // Use pointers instead of column_device_view to access start and step values should be enough.
+ // This is because we don't need to check for nulls and only support numeric and duration types.
+ auto const starts_begin = starts.template begin();
+ auto const steps_begin = steps ? steps.value().template begin() : nullptr;
+
+ auto const op = tabulator{n_lists, n_elements, starts_begin, steps_begin, offsets};
+ thrust::tabulate(rmm::exec_policy(stream), result_begin, result_begin + n_elements, op);
+
+ return result;
+ }
+};
+
+std::unique_ptr make_empty_lists_column(data_type child_type,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
+{
+ auto offsets = make_empty_column(data_type(type_to_id()));
+ auto child = make_empty_column(child_type);
+ return make_lists_column(
+ 0, std::move(offsets), std::move(child), 0, rmm::device_buffer(0, stream, mr), stream, mr);
+}
+
+std::unique_ptr sequences(column_view const& starts,
+ std::optional const& steps,
+ column_view const& sizes,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
+{
+ CUDF_EXPECTS(!starts.has_nulls() && !sizes.has_nulls(),
+ "starts and sizes input columns must not have nulls.");
+ CUDF_EXPECTS(starts.size() == sizes.size(),
+ "starts and sizes input columns must have the same number of rows.");
+ CUDF_EXPECTS(cudf::is_index_type(sizes.type()), "Input sizes column must be of integer types.");
+
+ if (steps) {
+ auto const& steps_cv = steps.value();
+ CUDF_EXPECTS(!steps_cv.has_nulls(), "steps input column must not have nulls.");
+ CUDF_EXPECTS(starts.size() == steps_cv.size(),
+ "starts and steps input columns must have the same number of rows.");
+ CUDF_EXPECTS(starts.type() == steps_cv.type(),
+ "starts and steps input columns must have the same type.");
+ }
+
+ auto const n_lists = starts.size();
+ if (n_lists == 0) { return make_empty_lists_column(starts.type(), stream, mr); }
+
+ // Generate list offsets for the output.
+ auto list_offsets = make_numeric_column(
+ data_type(type_to_id()), n_lists + 1, mask_state::UNALLOCATED, stream, mr);
+ auto const offsets_begin = list_offsets->mutable_view().template begin();
+ auto const sizes_input_it = cudf::detail::indexalator_factory::make_input_iterator(sizes);
+
+ thrust::exclusive_scan(
+ rmm::exec_policy(stream), sizes_input_it, sizes_input_it + n_lists + 1, offsets_begin);
+ auto const n_elements = cudf::detail::get_value(list_offsets->view(), n_lists, stream);
+
+ auto child = type_dispatcher(starts.type(),
+ sequences_dispatcher{},
+ n_lists,
+ n_elements,
+ starts,
+ steps,
+ offsets_begin,
+ stream,
+ mr);
+
+ return make_lists_column(n_lists,
+ std::move(list_offsets),
+ std::move(child),
+ 0,
+ rmm::device_buffer(0, stream, mr),
+ stream,
+ mr);
+}
+
+} // anonymous namespace
+
+std::unique_ptr sequences(column_view const& starts,
+ column_view const& sizes,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
+{
+ return sequences(starts, std::nullopt, sizes, stream, mr);
+}
+
+std::unique_ptr sequences(column_view const& starts,
+ column_view const& steps,
+ column_view const& sizes,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
+{
+ return sequences(starts, std::optional{steps}, sizes, stream, mr);
+}
+
+} // namespace detail
+
+std::unique_ptr sequences(column_view const& starts,
+ column_view const& sizes,
+ rmm::mr::device_memory_resource* mr)
+{
+ CUDF_FUNC_RANGE();
+ return detail::sequences(starts, sizes, rmm::cuda_stream_default, mr);
+}
+
+std::unique_ptr sequences(column_view const& starts,
+ column_view const& steps,
+ column_view const& sizes,
+ rmm::mr::device_memory_resource* mr)
+{
+ CUDF_FUNC_RANGE();
+ return detail::sequences(starts, steps, sizes, rmm::cuda_stream_default, mr);
+}
+
+} // namespace cudf::lists
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index c1c209b2413..d90260400a0 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -442,6 +442,7 @@ ConfigureTest(
lists/drop_list_duplicates_tests.cpp
lists/explode_tests.cpp
lists/extract_tests.cpp
+ lists/sequences_tests.cpp
lists/sort_lists_tests.cpp
)
diff --git a/cpp/tests/lists/sequences_tests.cpp b/cpp/tests/lists/sequences_tests.cpp
new file mode 100644
index 00000000000..2dafeaf5cea
--- /dev/null
+++ b/cpp/tests/lists/sequences_tests.cpp
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+using namespace cudf::test::iterators;
+
+namespace {
+template
+using ListsCol = cudf::test::lists_column_wrapper;
+
+template
+using FWDCol = cudf::test::fixed_width_column_wrapper;
+
+using IntsCol = cudf::test::fixed_width_column_wrapper;
+} // namespace
+
+/*-----------------------------------------------------------------------------------------------*/
+template
+class NumericSequencesTypedTest : public cudf::test::BaseFixture {
+};
+using NumericTypes =
+ cudf::test::Concat;
+TYPED_TEST_SUITE(NumericSequencesTypedTest, NumericTypes);
+
+TYPED_TEST(NumericSequencesTypedTest, SimpleTestNoNull)
+{
+ using T = TypeParam;
+
+ auto const starts = FWDCol{1, 2, 3};
+ auto const sizes = IntsCol{5, 3, 4};
+
+ // Sequences with step == 1.
+ {
+ auto const expected =
+ ListsCol{ListsCol{1, 2, 3, 4, 5}, ListsCol{2, 3, 4}, ListsCol{3, 4, 5, 6}};
+ auto const result = cudf::lists::sequences(starts, sizes);
+ CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result);
+ }
+
+ // Sequences with various steps.
+ {
+ auto const steps = FWDCol{1, 3, 2};
+ auto const expected =
+ ListsCol{ListsCol{1, 2, 3, 4, 5}, ListsCol{2, 5, 8}, ListsCol{3, 5, 7, 9}};
+ auto const result = cudf::lists::sequences(starts, steps, sizes);
+ CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result);
+ }
+}
+
+TYPED_TEST(NumericSequencesTypedTest, ZeroSizesTest)
+{
+ using T = TypeParam;
+
+ auto const starts = FWDCol{1, 2, 3};
+ auto const sizes = IntsCol{0, 3, 0};
+
+ // Sequences with step == 1.
+ {
+ auto const expected = ListsCol{ListsCol{}, ListsCol{2, 3, 4}, ListsCol{}};
+ auto const result = cudf::lists::sequences(starts, sizes);
+ CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result);
+ }
+
+ // Sequences with various steps.
+ {
+ auto const steps = FWDCol{1, 3, 2};
+ auto const expected = ListsCol{ListsCol{}, ListsCol{2, 5, 8}, ListsCol{}};
+ auto const result = cudf::lists::sequences(starts, steps, sizes);
+ CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result);
+ }
+}
+
+TYPED_TEST(NumericSequencesTypedTest, SlicedInputTestNoNulls)
+{
+ using T = TypeParam;
+ constexpr int32_t dont_care{123};
+
+ auto const starts_original =
+ FWDCol{dont_care, dont_care, dont_care, 1, 2, 3, 4, 5, dont_care, dont_care};
+ auto const sizes_original = IntsCol{dont_care, 5, 3, 4, 1, 2, dont_care, dont_care};
+
+ auto const starts = cudf::slice(starts_original, {3, 8})[0];
+ auto const sizes = cudf::slice(sizes_original, {1, 6})[0];
+
+ // Sequences with step == 1.
+ {
+ auto const expected = ListsCol{ListsCol{1, 2, 3, 4, 5},
+ ListsCol{2, 3, 4},
+ ListsCol{3, 4, 5, 6},
+ ListsCol{4},
+ ListsCol{5, 6}
+
+ };
+ auto const result = cudf::lists::sequences(starts, sizes);
+ CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result);
+ }
+
+ // Sequences with various steps.
+ {
+ auto const steps_original = FWDCol{dont_care, dont_care, 1, 3, 2, 2, 3, dont_care};
+ auto const steps = cudf::slice(steps_original, {2, 7})[0];
+
+ auto const expected = ListsCol{ListsCol{1, 2, 3, 4, 5},
+ ListsCol{2, 5, 8},
+ ListsCol{3, 5, 7, 9},
+ ListsCol{4},
+ ListsCol{5, 8}
+
+ };
+ auto const result = cudf::lists::sequences(starts, steps, sizes);
+ CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result);
+ }
+}
+
+/*-----------------------------------------------------------------------------------------------*/
+// Data generated using https://www.epochconverter.com/
+template
+class DurationSequencesTypedTest : public cudf::test::BaseFixture {
+};
+TYPED_TEST_SUITE(DurationSequencesTypedTest, cudf::test::DurationTypes);
+
+// Start time is 1638477473L - Thursday, December 2, 2021 8:37:53 PM.
+constexpr int64_t start_time = 1638477473L;
+
+TYPED_TEST(DurationSequencesTypedTest, SequencesNoNull)
+{
+ using T = TypeParam;
+
+ auto const starts = FWDCol{start_time, start_time, start_time};
+ auto const sizes = IntsCol{1, 2, 3};
+
+ // Sequences with step == 1.
+ {
+ auto const expected_h = std::vector{start_time, start_time + 1L, start_time + 2L};
+ auto const expected =
+ ListsCol{ListsCol{expected_h.begin(), expected_h.begin() + 1},
+ ListsCol{expected_h.begin(), expected_h.begin() + 2},
+ ListsCol{expected_h.begin(), expected_h.begin() + 3}};
+ auto const result = cudf::lists::sequences(starts, sizes);
+ CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result);
+ }
+
+ // Sequences with various steps, including negative.
+ {
+ auto const steps = FWDCol{10L, -155L, -13L};
+ auto const expected = ListsCol{
+ ListsCol{start_time},
+ ListsCol{start_time, start_time - 155L},
+ ListsCol{start_time, start_time - 13L, start_time - 13L * 2L}};
+ auto const result = cudf::lists::sequences(starts, steps, sizes);
+ CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result);
+ }
+}
+
+/*-----------------------------------------------------------------------------------------------*/
+class NumericSequencesTest : public cudf::test::BaseFixture {
+};
+
+TEST_F(NumericSequencesTest, EmptyInput)
+{
+ auto const starts = IntsCol{};
+ auto const sizes = IntsCol{};
+ auto const steps = IntsCol{};
+ auto const expected = ListsCol{};
+
+ // Sequences with step == 1.
+ {
+ auto const result = cudf::lists::sequences(starts, sizes);
+ CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result);
+ }
+
+ // Sequences with given steps.
+ {
+ auto const result = cudf::lists::sequences(starts, steps, sizes);
+ CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result);
+ }
+}
+
+TEST_F(NumericSequencesTest, InvalidSizesInput)
+{
+ auto const starts = IntsCol{};
+ auto const steps = IntsCol{};
+ auto const sizes = FWDCol{};
+
+ EXPECT_THROW(cudf::lists::sequences(starts, sizes), cudf::logic_error);
+ EXPECT_THROW(cudf::lists::sequences(starts, steps, sizes), cudf::logic_error);
+}
+
+TEST_F(NumericSequencesTest, MismatchedColumnSizesInput)
+{
+ auto const starts = IntsCol{1, 2, 3};
+ auto const steps = IntsCol{1, 2};
+ auto const sizes = IntsCol{1, 2, 3, 4};
+
+ EXPECT_THROW(cudf::lists::sequences(starts, sizes), cudf::logic_error);
+ EXPECT_THROW(cudf::lists::sequences(starts, steps, sizes), cudf::logic_error);
+}
+
+TEST_F(NumericSequencesTest, MismatchedColumnTypesInput)
+{
+ auto const starts = IntsCol{1, 2, 3};
+ auto const steps = FWDCol{1, 2, 3};
+ auto const sizes = IntsCol{1, 2, 3};
+
+ EXPECT_THROW(cudf::lists::sequences(starts, steps, sizes), cudf::logic_error);
+}
+
+TEST_F(NumericSequencesTest, InputHasNulls)
+{
+ constexpr int32_t null{0};
+
+ {
+ auto const starts = IntsCol{{null, 2, 3}, null_at(0)};
+ auto const sizes = IntsCol{1, 2, 3};
+ EXPECT_THROW(cudf::lists::sequences(starts, sizes), cudf::logic_error);
+ }
+
+ {
+ auto const starts = IntsCol{1, 2, 3};
+ auto const sizes = IntsCol{{null, 2, 3}, null_at(0)};
+ EXPECT_THROW(cudf::lists::sequences(starts, sizes), cudf::logic_error);
+ }
+
+ {
+ auto const starts = IntsCol{1, 2, 3};
+ auto const steps = IntsCol{{null, 2, 3}, null_at(0)};
+ auto const sizes = IntsCol{1, 2, 3};
+ EXPECT_THROW(cudf::lists::sequences(starts, steps, sizes), cudf::logic_error);
+ }
+}