Skip to content

Commit

Permalink
Merge branch 'branch-21.08' into contains-null-namespace
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed Jun 16, 2021
2 parents b861d9d + 93ce6c7 commit 76f4fb2
Show file tree
Hide file tree
Showing 32 changed files with 2,029 additions and 335 deletions.
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ add_library(cudf
src/copying/gather.cu
src/copying/get_element.cu
src/copying/pack.cpp
src/copying/reverse.cu
src/copying/sample.cu
src/copying/scatter.cu
src/copying/shift.cu
Expand Down
30 changes: 30 additions & 0 deletions cpp/include/cudf/copying.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,36 @@ std::unique_ptr<table> gather(
out_of_bounds_policy bounds_policy = out_of_bounds_policy::DONT_CHECK,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Reverses the rows within a table.
* Creates a new table that is the reverse of @p source_table.
* Example:
* ```
* source = [[4,5,6], [7,8,9], [10,11,12]]
* return = [[6,5,4], [9,8,7], [12,11,10]]
* ```
*
* @param source_table Table that will be reversed
*/
std::unique_ptr<table> reverse(
table_view const& source_table,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Reverses the elements of a column
* Creates a new column that is the reverse of @p source_column.
* Example:
* ```
* source = [4,5,6]
* return = [6,5,4]
* ```
*
* @param source_column Column that will be reversed
*/
std::unique_ptr<column> reverse(
column_view const& source_column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Scatters the rows of the source table into a copy of the target table
* according to a scatter map.
Expand Down
68 changes: 68 additions & 0 deletions cpp/src/copying/reverse.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf/column/column_view.hpp>
#include <cudf/copying.hpp>
#include <cudf/detail/gather.cuh>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>
#include <rmm/mr/device/per_device_resource.hpp>

#include <thrust/iterator/constant_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_output_iterator.h>
#include <thrust/scan.h>

namespace cudf {
namespace detail {
std::unique_ptr<table> reverse(table_view const& source_table,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
size_type num_rows = source_table.num_rows();
auto elements =
make_counting_transform_iterator(0, [num_rows] __device__(auto i) { return num_rows - i - 1; });
auto elements_end = elements + source_table.num_rows();

return gather(source_table, elements, elements_end, out_of_bounds_policy::DONT_CHECK, stream, mr);
}

std::unique_ptr<column> reverse(column_view const& source_column,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return std::move(cudf::reverse(table_view({source_column}))->release().front());
}
} // namespace detail

std::unique_ptr<table> reverse(table_view const& source_table, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::reverse(source_table, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> reverse(column_view const& source_column,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::reverse(source_column, rmm::cuda_stream_default, mr);
}
} // namespace cudf
9 changes: 8 additions & 1 deletion cpp/src/io/orc/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,8 @@ orc_streams::orc_stream_offsets orc_streams::compute_offsets(
// Everything else uses RLE
return true;
}();
// non-RLE and RLE streams are separated in the buffer that stores encoded data
// The computed offsets do not take the streams of the other type into account
if (is_rle_data) {
strm_offsets[i] = rle_data_size;
rle_data_size += (stream.length * num_rowgroups + 7) & ~7;
Expand Down Expand Up @@ -681,6 +683,10 @@ encoded_data writer::impl::encode_columns(const table_device_view &view,
: (((stripe_dict->num_strings + 0x1ff) >> 9) * (512 * 4 + 2));
if (stripe.id == 0) {
strm.data_ptrs[strm_type] = encoded_data.data() + stream_offsets.offsets[strm_id];
// Dictionary lengths are encoded as RLE, which are all stored after non-RLE data:
// include non-RLE data size in the offset only in that case
if (strm_type == gpu::CI_DATA2 && ck.encoding_kind == DICTIONARY_V2)
strm.data_ptrs[strm_type] += stream_offsets.non_rle_data_size;
} else {
auto const &strm_up = col_streams[stripe_dict[-dict_stride].start_chunk];
strm.data_ptrs[strm_type] =
Expand Down Expand Up @@ -710,7 +716,8 @@ encoded_data writer::impl::encode_columns(const table_device_view &view,
: (col_streams[rg_idx - 1].data_ptrs[strm_type] +
col_streams[rg_idx - 1].lengths[strm_type]);
} else {
strm.lengths[strm_type] = streams[strm_id].length;
strm.lengths[strm_type] = streams[strm_id].length;
// RLE encoded streams are stored after all non-RLE streams
strm.data_ptrs[strm_type] = encoded_data.data() + stream_offsets.non_rle_data_size +
stream_offsets.offsets[strm_id] +
streams[strm_id].length * rg_idx;
Expand Down
3 changes: 2 additions & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,8 @@ ConfigureTest(COPYING_TEST
copying/shift_tests.cpp
copying/slice_tests.cpp
copying/split_tests.cpp
copying/utility_tests.cpp)
copying/utility_tests.cpp
copying/reverse_tests.cpp)

###################################################################################################
# - utilities tests -------------------------------------------------------------------------------
Expand Down
180 changes: 180 additions & 0 deletions cpp/tests/copying/reverse_tests.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_utilities.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/cudf_gtest.hpp>
#include <cudf_test/type_lists.hpp>

#include <cudf/copying.hpp>
#include <cudf/detail/iterator.cuh>
#include <cudf/scalar/scalar.hpp>
#include <cudf/scalar/scalar_factories.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>

#include <thrust/iterator/counting_iterator.h>
#include <thrust/tabulate.h>

template <typename T>
class ReverseTypedTestFixture : public cudf::test::BaseFixture {
};

TYPED_TEST_CASE(ReverseTypedTestFixture, cudf::test::AllTypes);
TYPED_TEST(ReverseTypedTestFixture, ReverseTable)
{
using T = TypeParam;
constexpr cudf::size_type num_values{10};

auto input = cudf::test::fixed_width_column_wrapper<T, int32_t>(
thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + num_values);

auto expected_elements = cudf::detail::make_counting_transform_iterator(
0, [num_values] __device__(auto i) { return num_values - i - 1; });

auto expected =
cudf::test::fixed_width_column_wrapper<T, typename decltype(expected_elements)::value_type>(
expected_elements, expected_elements + num_values);

auto input_table = cudf::table_view{{input}};
auto const p_ret = cudf::reverse(input_table);

EXPECT_EQ(p_ret->num_columns(), 1);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(p_ret->view().column(0), expected, true);
}

TYPED_TEST(ReverseTypedTestFixture, ReverseColumn)
{
using T = TypeParam;
constexpr cudf::size_type num_values{10};

auto input = cudf::test::fixed_width_column_wrapper<T, int32_t>(
thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + num_values);

auto expected_elements = cudf::detail::make_counting_transform_iterator(
0, [num_values] __device__(auto i) { return num_values - i - 1; });

auto expected =
cudf::test::fixed_width_column_wrapper<T, typename decltype(expected_elements)::value_type>(
expected_elements, expected_elements + num_values);

auto const column_ret = cudf::reverse(input);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(column_ret->view(), expected, true);
}

TYPED_TEST(ReverseTypedTestFixture, ReverseNullable)
{
using T = TypeParam;
constexpr cudf::size_type num_values{20};

std::vector<int64_t> input_values(num_values);
std::iota(input_values.begin(), input_values.end(), 1);

thrust::host_vector<bool> input_valids(num_values);
thrust::tabulate(
thrust::seq, input_valids.begin(), input_valids.end(), [](auto i) { return not(i % 2); });

std::vector<T> expected_values(input_values.size());
thrust::host_vector<bool> expected_valids(input_valids.size());

std::transform(std::make_reverse_iterator(input_values.end()),
std::make_reverse_iterator(input_values.begin()),
expected_values.begin(),
[](auto i) { return cudf::test::make_type_param_scalar<T>(i); });
std::reverse_copy(input_valids.begin(), input_valids.end(), expected_valids.begin());

cudf::test::fixed_width_column_wrapper<T, int64_t> input(
input_values.begin(), input_values.end(), input_valids.begin());

cudf::test::fixed_width_column_wrapper<T> expected(
expected_values.begin(), expected_values.end(), expected_valids.begin());

cudf::table_view input_table{{input}};
auto p_ret = cudf::reverse(input_table);

EXPECT_EQ(p_ret->num_columns(), 1);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(p_ret->view().column(0), expected);
}

TYPED_TEST(ReverseTypedTestFixture, ZeroSizeInput)
{
using T = TypeParam;
cudf::test::fixed_width_column_wrapper<T, int32_t> input(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(0));

cudf::test::fixed_width_column_wrapper<T, int32_t> expected(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(0));

cudf::table_view input_table{{input}};
auto p_ret = cudf::reverse(input_table);

EXPECT_EQ(p_ret->num_columns(), 1);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(p_ret->view().column(0), expected);
}

class ReverseStringTestFixture : public cudf::test::BaseFixture {
};

TEST_F(ReverseStringTestFixture, ReverseNullable)
{
constexpr cudf::size_type num_values{20};

std::vector<std::string> input_values(num_values);
thrust::host_vector<bool> input_valids(num_values);

thrust::tabulate(thrust::seq, input_values.begin(), input_values.end(), [](auto i) {
return "#" + std::to_string(i);
});
thrust::tabulate(
thrust::seq, input_valids.begin(), input_valids.end(), [](auto i) { return not(i % 2); });

std::vector<std::string> expected_values(input_values.size());
thrust::host_vector<bool> expected_valids(input_valids.size());

std::reverse_copy(input_values.begin(), input_values.end(), expected_values.begin());
std::reverse_copy(input_valids.begin(), input_valids.end(), expected_valids.begin());

auto input = cudf::test::strings_column_wrapper(
input_values.begin(), input_values.end(), input_valids.begin());

auto expected = cudf::test::strings_column_wrapper(
expected_values.begin(), expected_values.end(), expected_valids.begin());

cudf::table_view input_table{{input}};
auto p_ret = cudf::reverse(input_table);

EXPECT_EQ(p_ret->num_columns(), 1);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(p_ret->view().column(0), expected);
}

TEST_F(ReverseStringTestFixture, ZeroSizeInput)
{
std::vector<std::string> input_values{};
auto input = cudf::test::strings_column_wrapper(input_values.begin(), input_values.end());

auto count = cudf::test::fixed_width_column_wrapper<cudf::size_type>(
thrust::make_counting_iterator(0), thrust::make_counting_iterator(0));

auto expected = cudf::test::strings_column_wrapper(input_values.begin(), input_values.end());

cudf::table_view input_table{{input}};
auto p_ret = cudf::reverse(input_table);

EXPECT_EQ(p_ret->num_columns(), 1);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(p_ret->view().column(0), expected);
}
3 changes: 2 additions & 1 deletion python/.flake8
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ ignore =
E203

[pydocstyle]
match = ^.*abc\.py$
match = ^(.*abc\.py|types\.py)$
#match = ^(types\.py)$
# Due to https://github.com/PyCQA/pydocstyle/issues/363, we must exclude rather than include using match-dir.
match-dir = ^(?!ci|cpp|python/dask_cudf|python/cudf_kafka|python/custreamz).*$
# In addition to numpy style, we additionally ignore magic methods (D105) and newlines before docstrings (D204).
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,16 @@
UInt16Index,
UInt32Index,
UInt64Index,
cut,
from_pandas,
interval_range,
merge,
cut,
)
from cudf.core.algorithms import factorize
from cudf.core.dtypes import (
CategoricalDtype,
Decimal64Dtype,
IntervalDtype,
ListDtype,
StructDtype,
)
Expand Down
Loading

0 comments on commit 76f4fb2

Please sign in to comment.