Skip to content

Commit

Permalink
Merge pull request #5356 from bdice/fea-repeat-size_type
Browse files Browse the repository at this point in the history
[REVIEW] Change cudf::repeat to use size_type for the number of repeats.
  • Loading branch information
Keith Kraus authored Jun 2, 2020
2 parents 9e238e7 + d78b3a2 commit b72e647
Show file tree
Hide file tree
Showing 8 changed files with 19 additions and 35 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- PR #5327 Add `cudf::cross_join` feature
- PR #5204 Concatenate strings columns using row separator as strings column
- PR #5342 Add support for `StringMethods.__getitem__`
- PR #5356 Use `size_type` instead of `scalar` in `cudf::repeat`.

## Improvements
- PR #5245 Add column reduction benchmark
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/detail/repeat.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@ std::unique_ptr<table> repeat(table_view const& input_table,
cudaStream_t stream = 0);

/**
* @copydoc cudf::repeat(table_view const&, scalar const&,
* @copydoc cudf::repeat(table_view const&, size_type,
* rmm::mr::device_memory_resource*)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<table> repeat(table_view const& input_table,
scalar const& count,
size_type count,
rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource(),
cudaStream_t stream = 0);

Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/filling.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,13 +137,13 @@ std::unique_ptr<table> repeat(
* size_type.
*
* @param input_table Input table
* @param count Non-null scalar of an integral type
* @param count Number of repetitions
* @param mr Device memory resource used to allocate the returned table's device memory.
* @return The result table containing the repetitions
*/
std::unique_ptr<table> repeat(
table_view const& input_table,
scalar const& count,
size_type count,
rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());

/**
Expand Down
16 changes: 7 additions & 9 deletions cpp/src/filling/repeat.cu
Original file line number Diff line number Diff line change
Expand Up @@ -135,22 +135,20 @@ std::unique_ptr<table> repeat(table_view const& input_table,
}

std::unique_ptr<table> repeat(table_view const& input_table,
scalar const& count,
size_type count,
rmm::mr::device_memory_resource* mr,
cudaStream_t stream)
{
CUDF_EXPECTS(count.is_valid(), "count cannot be null");
auto stride = cudf::type_dispatcher(count.type(), count_accessor{&count}, stream);
CUDF_EXPECTS(stride >= 0, "count value should be non-negative");
CUDF_EXPECTS(count >= 0, "count value should be non-negative");
CUDF_EXPECTS(
static_cast<int64_t>(input_table.num_rows()) * stride <= std::numeric_limits<size_type>::max(),
static_cast<int64_t>(input_table.num_rows()) * count <= std::numeric_limits<size_type>::max(),
"The resulting table has more rows than size_type's limit.");

if ((input_table.num_rows() == 0) || (stride == 0)) { return cudf::empty_like(input_table); }
if ((input_table.num_rows() == 0) || (count == 0)) { return cudf::empty_like(input_table); }

auto output_size = input_table.num_rows() * stride;
auto output_size = input_table.num_rows() * count;
auto map_begin = thrust::make_transform_iterator(
thrust::make_counting_iterator(0), [stride] __device__(auto i) { return i / stride; });
thrust::make_counting_iterator(0), [count] __device__(auto i) { return i / count; });
auto map_end = map_begin + output_size;

return gather(input_table, map_begin, map_end, false, mr, stream);
Expand All @@ -168,7 +166,7 @@ std::unique_ptr<table> repeat(table_view const& input_table,
}

std::unique_ptr<table> repeat(table_view const& input_table,
scalar const& count,
size_type count,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
Expand Down
7 changes: 1 addition & 6 deletions cpp/tests/filling/repeat_tests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,7 @@ TYPED_TEST(RepeatTypedTestFixture, RepeatScalarCount)

cudf::table_view input_table{{input}};

auto p_count = cudf::make_numeric_scalar(cudf::data_type(cudf::INT32));
using T_int = cudf::id_to_type<cudf::INT32>;
using ScalarType = cudf::scalar_type_t<T_int>;
static_cast<ScalarType*>(p_count.get())->set_value(repeat_count);

auto p_ret = cudf::repeat(input_table, *p_count);
auto p_ret = cudf::repeat(input_table, repeat_count);

EXPECT_EQ(p_ret->num_columns(), 1);
cudf::test::expect_columns_equal(p_ret->view().column(0), expected);
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/cpp/filling.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,5 @@ cdef extern from "cudf/filling.hpp" namespace "cudf" nogil:

cdef unique_ptr[table] repeat(
const table_view & input,
const scalar & count
size_type count
) except +
15 changes: 4 additions & 11 deletions python/cudf/cudf/_lib/filling.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,8 @@ def fill(Column destination, int begin, int end, Scalar value):
def repeat(Table inp, object count, bool check_count=False):
if isinstance(count, Column):
return _repeat_via_column(inp, count, check_count)

if isinstance(count, Scalar):
return _repeat_via_scalar(inp, count)

raise TypeError(
"Expected `count` to be Column or Scalar but got {}"
.format(type(count))
)
else:
return _repeat_via_size_type(inp, count)


def _repeat_via_column(Table inp, Column count, bool check_count):
Expand All @@ -86,15 +80,14 @@ def _repeat_via_column(Table inp, Column count, bool check_count):
)


def _repeat_via_scalar(Table inp, Scalar count):
def _repeat_via_size_type(Table inp, size_type count):
cdef table_view c_inp = inp.view()
cdef scalar* c_count = count.c_value.get()
cdef unique_ptr[table] c_result

with nogil:
c_result = move(cpp_filling.repeat(
c_inp,
c_count[0]
count
))

return Table.from_unique_ptr(
Expand Down
5 changes: 1 addition & 4 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import cudf
import cudf._lib as libcudf
from cudf._lib.nvtx import annotate
from cudf._lib.scalar import as_scalar
from cudf.core.column import as_column, build_categorical_column
from cudf.utils.dtypes import (
is_categorical_dtype,
Expand Down Expand Up @@ -1140,9 +1139,7 @@ def repeat(self, repeats, axis=None):
return self._repeat(repeats)

def _repeat(self, count):
if is_scalar(count):
count = as_scalar(count)
else:
if not is_scalar(count):
count = as_column(count)

result = self.__class__._from_table(
Expand Down

0 comments on commit b72e647

Please sign in to comment.