From 7d5211322389cfe16aeac729b6b97753f0b1e5a5 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Fri, 11 Mar 2022 11:46:30 -0500 Subject: [PATCH 1/5] Fix cudf::shift to handle offset greater than column size --- cpp/src/copying/shift.cu | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/src/copying/shift.cu b/cpp/src/copying/shift.cu index dacc1d07447..38fb16f66f4 100644 --- a/cpp/src/copying/shift.cu +++ b/cpp/src/copying/shift.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -70,7 +70,7 @@ struct shift_functor { std::unique_ptr> operator()(Args&&...) { - CUDF_FAIL("shift does not support non-fixed-width types."); + CUDF_FAIL("shift only supports fixed-width or string types."); } template @@ -125,6 +125,7 @@ struct shift_functor { // avoid assigning elements we know to be invalid. if (not scalar_is_valid) { + if (std::abs(offset) > size) { return output; } if (offset > 0) { index_begin = thrust::make_counting_iterator(offset); data = data + offset; From d559fed36073e82871dde5ea110c1c516d2c06d8 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Fri, 11 Mar 2022 11:47:09 -0500 Subject: [PATCH 2/5] fix strings specialization logic for offset > size --- cpp/src/strings/copying/shift.cu | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cpp/src/strings/copying/shift.cu b/cpp/src/strings/copying/shift.cu index 024c8d2924d..bdcf01bd336 100644 --- a/cpp/src/strings/copying/shift.cu +++ b/cpp/src/strings/copying/shift.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -94,6 +94,9 @@ std::unique_ptr shift(strings_column_view const& input, { auto d_fill_str = static_cast(fill_value).value(stream); + // adjust offset when greater than the size of the input + if (std::abs(offset) > input.size()) { offset = input.size(); } + // output offsets column is the same size as the input auto const input_offsets = cudf::detail::slice( From 6043eec429d67591091648a11b6dcd02ffe740ac Mon Sep 17 00:00:00 2001 From: David Wendt Date: Fri, 11 Mar 2022 11:47:29 -0500 Subject: [PATCH 3/5] add gtests for offset > size --- cpp/tests/copying/shift_tests.cpp | 78 +++++++++++++++++-------------- 1 file changed, 43 insertions(+), 35 deletions(-) diff --git a/cpp/tests/copying/shift_tests.cpp b/cpp/tests/copying/shift_tests.cpp index 256f9129cbf..47615a584a4 100644 --- a/cpp/tests/copying/shift_tests.cpp +++ b/cpp/tests/copying/shift_tests.cpp @@ -20,16 +20,14 @@ #include #include -#include #include #include +#include #include -#include #include #include -#include using cudf::test::fixed_width_column_wrapper; using TestTypes = cudf::test::Types; @@ -72,28 +70,12 @@ constexpr auto lowest() } template -struct ShiftTest : public cudf::test::BaseFixture { +struct ShiftTestsTyped : public cudf::test::BaseFixture { }; -TYPED_TEST_SUITE(ShiftTest, cudf::test::FixedWidthTypes); +TYPED_TEST_SUITE(ShiftTestsTyped, cudf::test::FixedWidthTypes); -TYPED_TEST(ShiftTest, OneColumnEmpty) -{ - using T = TypeParam; - - std::vector vals{}; - std::vector mask{}; - - auto input = fixed_width_column_wrapper{}; - auto expected = fixed_width_column_wrapper(vals.begin(), vals.end(), mask.begin()); - - auto fill = make_scalar(); - auto actual = cudf::shift(input, 5, *fill); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *actual); -} - -TYPED_TEST(ShiftTest, TwoColumnsEmpty) +TYPED_TEST(ShiftTestsTyped, ColumnEmpty) { using T = TypeParam; @@ -109,7 +91,7 @@ TYPED_TEST(ShiftTest, TwoColumnsEmpty) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *actual); } -TYPED_TEST(ShiftTest, OneColumn) +TYPED_TEST(ShiftTestsTyped, NonNullColumn) { using T = TypeParam; @@ -134,7 +116,7 @@ TYPED_TEST(ShiftTest, OneColumn) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *actual); } -TYPED_TEST(ShiftTest, OneColumnNegativeShift) +TYPED_TEST(ShiftTestsTyped, NegativeShift) { using T = TypeParam; @@ -159,7 +141,7 @@ TYPED_TEST(ShiftTest, OneColumnNegativeShift) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *actual); } -TYPED_TEST(ShiftTest, OneColumnNullFill) +TYPED_TEST(ShiftTestsTyped, NullScalar) { using T = TypeParam; @@ -186,7 +168,7 @@ TYPED_TEST(ShiftTest, OneColumnNullFill) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *actual); } -TYPED_TEST(ShiftTest, TwoColumnsNullableInput) +TYPED_TEST(ShiftTestsTyped, NullableColumn) { using T = TypeParam; @@ -199,25 +181,21 @@ TYPED_TEST(ShiftTest, TwoColumnsNullableInput) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *actual); } -TYPED_TEST(ShiftTest, MismatchFillValueDtypes) +TYPED_TEST(ShiftTestsTyped, MismatchFillValueDtypes) { using T = TypeParam; - if (std::is_same_v) { return; } - auto input = fixed_width_column_wrapper{}; - auto fill = make_scalar(); + auto fill = cudf::string_scalar(""); - std::unique_ptr output; - - EXPECT_THROW(output = cudf::shift(input, 5, *fill), cudf::logic_error); + EXPECT_THROW(cudf::shift(input, 5, fill), cudf::logic_error); } -struct ShiftTestNonFixedWidth : public cudf::test::BaseFixture { +struct ShiftTests : public cudf::test::BaseFixture { }; -TEST_F(ShiftTestNonFixedWidth, StringsShiftTest) +TEST_F(ShiftTests, StringsShiftTest) { auto input = cudf::test::strings_column_wrapper({"", "bb", "ccc", "ddddddé", ""}, {0, 1, 1, 1, 0}); @@ -243,3 +221,33 @@ TEST_F(ShiftTestNonFixedWidth, StringsShiftTest) auto sliced_left = cudf::test::strings_column_wrapper({"ccc", "ddddddé", "xx"}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(sliced_left, *results); } + +TEST_F(ShiftTests, OffsetGreaterThanSize) +{ + auto const input_str = + cudf::test::strings_column_wrapper({"", "bb", "ccc", "ddé", ""}, {0, 1, 1, 1, 0}); + auto results = cudf::shift(input_str, 6, cudf::string_scalar("xx")); + auto expected_str = cudf::test::strings_column_wrapper({"xx", "xx", "xx", "xx", "xx"}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_str, *results); + results = cudf::shift(input_str, -6, cudf::string_scalar("xx")); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_str, *results); + + results = cudf::shift(input_str, 6, cudf::string_scalar("", false)); + expected_str = cudf::test::strings_column_wrapper({"", "", "", "", ""}, {0, 0, 0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_str, *results); + results = cudf::shift(input_str, -6, cudf::string_scalar("", false)); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_str, *results); + + auto const input = fixed_width_column_wrapper({0, 2, 3, 4, 0}, {0, 1, 1, 1, 0}); + results = cudf::shift(input, 6, cudf::numeric_scalar(9)); + auto expected = fixed_width_column_wrapper({9, 9, 9, 9, 9}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *results); + results = cudf::shift(input, -6, cudf::numeric_scalar(9)); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *results); + + results = cudf::shift(input, 6, cudf::numeric_scalar(0, false)); + expected = fixed_width_column_wrapper({0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *results); + results = cudf::shift(input, -6, cudf::numeric_scalar(0, false)); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *results); +} From acaea132b84a2b754806966b9d77cfdb82a576b4 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Fri, 11 Mar 2022 11:47:50 -0500 Subject: [PATCH 4/5] correct doxygen for cudf::shift --- cpp/include/cudf/copying.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index 850a11426af..2e559afef4f 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -371,7 +371,7 @@ std::unique_ptr copy_range( * @param fill_value Fill value for indeterminable outputs. * @param mr Device memory resource used to allocate the returned result's device memory * - * @throw cudf::logic_error if @p input dtype is not fixed-with. + * @throw cudf::logic_error if @p input dtype is neither fixed-width nor string type * @throw cudf::logic_error if @p fill_value dtype does not match @p input dtype. */ std::unique_ptr shift( From 6f85bd0c0698cca5024c916bdcef7f976ca41b7c Mon Sep 17 00:00:00 2001 From: David Wendt Date: Mon, 14 Mar 2022 10:25:07 -0400 Subject: [PATCH 5/5] remove temp workaround from python code --- python/cudf/cudf/core/column/column.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 1c1845373e1..acc0c94a54d 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -349,14 +349,6 @@ def _fill( return self def shift(self, offset: int, fill_value: ScalarLike) -> ColumnBase: - # libcudf currently doesn't handle case when offset > len(df) - # ticket to fix the bug in link below: - # https://github.com/rapidsai/cudf/issues/10314 - if abs(offset) > len(self): - if fill_value is None: - return column_empty_like(self, masked=True) - else: - return full(len(self), fill_value, dtype=self.dtype) return libcudf.copying.shift(self, offset, fill_value) @property