From ff41841cee58a2e945d39dfb1d11d823393814ed Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 20 Oct 2022 09:04:36 -0700 Subject: [PATCH] Remove validation that requires introspection (#11938) This PR removes optional validation for some APIs. Performing these validations requires data introspection, which we do not want. This PR resolves #5505. Authors: - Vyas Ramasubramani (https://github.com/vyasr) - David Wendt (https://github.com/davidwendt) Approvers: - Mark Harris (https://github.com/harrism) - GALI PREM SAGAR (https://github.com/galipremsagar) - Matthew Roeschke (https://github.com/mroeschke) - David Wendt (https://github.com/davidwendt) - Nghia Truong (https://github.com/ttnghia) - Jason Lowe (https://github.com/jlowe) URL: https://github.com/rapidsai/cudf/pull/11938 --- cpp/benchmarks/lists/copying/scatter_lists.cu | 2 +- cpp/include/cudf/copying.hpp | 18 ++-- cpp/include/cudf/detail/scatter.cuh | 11 --- cpp/include/cudf/detail/scatter.hpp | 18 +--- cpp/include/cudf/filling.hpp | 8 +- cpp/src/copying/copy.cu | 9 +- cpp/src/copying/scatter.cu | 32 ++----- cpp/src/filling/repeat.cu | 11 +-- cpp/src/groupby/sort/scan.cpp | 6 +- cpp/src/groupby/sort/sort_helper.cu | 1 - cpp/src/partitioning/partitioning.cu | 4 +- cpp/src/rolling/detail/lead_lag_nested.cuh | 1 - .../copying/scatter_list_scalar_tests.cpp | 4 +- .../copying/scatter_struct_scalar_tests.cpp | 4 +- cpp/tests/copying/scatter_tests.cpp | 87 ++++++------------- cpp/tests/filling/repeat_tests.cpp | 23 +---- java/src/main/java/ai/rapids/cudf/Table.java | 38 ++------ java/src/main/native/src/TableJni.cpp | 15 ++-- .../test/java/ai/rapids/cudf/TableTest.java | 15 +--- python/cudf/cudf/_lib/copying.pyx | 22 +++-- python/cudf/cudf/_lib/cpp/copying.pxd | 2 - python/cudf/cudf/_lib/cpp/filling.pxd | 3 +- python/cudf/cudf/_lib/filling.pyx | 9 +- python/cudf/cudf/core/column/column.py | 23 ++--- 24 files changed, 99 insertions(+), 267 deletions(-) diff --git a/cpp/benchmarks/lists/copying/scatter_lists.cu b/cpp/benchmarks/lists/copying/scatter_lists.cu index 823693721a0..d86fb0578e5 100644 --- a/cpp/benchmarks/lists/copying/scatter_lists.cu +++ b/cpp/benchmarks/lists/copying/scatter_lists.cu @@ -108,7 +108,7 @@ void BM_lists_scatter(::benchmark::State& state) for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 - scatter(table_view{{*source}}, *scatter_map, table_view{{*target}}, false, mr); + scatter(table_view{{*source}}, *scatter_map, table_view{{*target}}, mr); } state.SetBytesProcessed(static_cast(state.iterations()) * state.range(0) * 2 * diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index 1c3ca179d17..79dcaaaf00b 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -140,13 +140,12 @@ std::unique_ptr reverse( * If the same index appears more than once in the scatter map, the result is * undefined. * + * If any values in `scatter_map` are outside of the interval [-n, n) where `n` + * is the number of rows in the `target` table, behavior is undefined. + * * A negative value `i` in the `scatter_map` is interpreted as `i+n`, where `n` * is the number of rows in the `target` table. * - * @throws cudf::logic_error if `check_bounds == true` and an index exists in - * `scatter_map` outside the range `[-n, n)`, where `n` is the number of rows in - * the target table. If `check_bounds == false`, the behavior is undefined. - * * @param source The input columns containing values to be scattered into the * target columns * @param scatter_map A non-nullable column of integral indices that maps the @@ -154,8 +153,6 @@ std::unique_ptr reverse( * to or less than the number of elements in the source columns. * @param target The set of columns into which values from the source_table * are to be scattered - * @param check_bounds Optionally perform bounds checking on the values of - * `scatter_map` and throw an error if any of its values are out of bounds. * @param mr Device memory resource used to allocate the returned table's device memory * @return Result of scattering values from source to target */ @@ -163,7 +160,6 @@ std::unique_ptr scatter( table_view const& source, column_view const& scatter_map, table_view const& target, - bool check_bounds = false, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -184,9 +180,8 @@ std::unique_ptr
scatter( * If the same index appears more than once in the scatter map, the result is * undefined. * - * @throws cudf::logic_error if `check_bounds == true` and an index exists in - * `scatter_map` outside the range `[-n, n)`, where `n` is the number of rows in - * the target table. If `check_bounds == false`, the behavior is undefined. + * If any values in `scatter_map` are outside of the interval [-n, n) where `n` + * is the number of rows in the `target` table, behavior is undefined. * * @param source The input scalars containing values to be scattered into the * target columns @@ -194,8 +189,6 @@ std::unique_ptr
scatter( * the rows in the target table to be replaced by source. * @param target The set of columns into which values from the source_table * are to be scattered - * @param check_bounds Optionally perform bounds checking on the values of - * `scatter_map` and throw an error if any of its values are out of bounds. * @param mr Device memory resource used to allocate the returned table's device memory * @return Result of scattering values from source to target */ @@ -203,7 +196,6 @@ std::unique_ptr
scatter( std::vector> const& source, column_view const& indices, table_view const& target, - bool check_bounds = false, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index 09b16b11a73..413f4c4dae4 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -390,7 +390,6 @@ std::unique_ptr
scatter( MapIterator scatter_map_begin, MapIterator scatter_map_end, table_view const& target, - bool check_bounds = false, rmm::cuda_stream_view stream = cudf::default_stream_value, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { @@ -398,16 +397,6 @@ std::unique_ptr
scatter( using MapType = typename thrust::iterator_traits::value_type; - if (check_bounds) { - auto const begin = -target.num_rows(); - auto const end = target.num_rows(); - auto bounds = bounds_checker{begin, end}; - CUDF_EXPECTS( - std::distance(scatter_map_begin, scatter_map_end) == - thrust::count_if(rmm::exec_policy(stream), scatter_map_begin, scatter_map_end, bounds), - "Scatter map index out of bounds"); - } - CUDF_EXPECTS(std::distance(scatter_map_begin, scatter_map_end) <= source.num_rows(), "scatter map size should be <= to number of rows in source"); diff --git a/cpp/include/cudf/detail/scatter.hpp b/cpp/include/cudf/detail/scatter.hpp index 8c993368ff2..801088b803c 100644 --- a/cpp/include/cudf/detail/scatter.hpp +++ b/cpp/include/cudf/detail/scatter.hpp @@ -45,10 +45,8 @@ namespace detail { * * If the same index appears more than once in the scatter map, the result is * undefined. - * - * @throws cudf::logic_error if `check_bounds == true` and an index exists in - * `scatter_map` outside the range `[-n, n)`, where `n` is the number of rows in - * the target table. If `check_bounds == false`, the behavior is undefined. + * If any values in `scatter_map` are outside of the interval [-n, n) where `n` + * is the number of rows in the `target` table, behavior is undefined. * * @param source The input columns containing values to be scattered into the * target columns @@ -57,8 +55,6 @@ namespace detail { * to or less than the number of elements in the source columns. * @param target The set of columns into which values from the source_table * are to be scattered - * @param check_bounds Optionally perform bounds checking on the values of - * `scatter_map` and throw an error if any of its values are out of bounds. * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned table's device memory * @return Result of scattering values from source to target @@ -67,7 +63,6 @@ std::unique_ptr
scatter( table_view const& source, column_view const& scatter_map, table_view const& target, - bool check_bounds = false, rmm::cuda_stream_view stream = cudf::default_stream_value, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -81,7 +76,6 @@ std::unique_ptr
scatter( table_view const& source, device_span const scatter_map, table_view const& target, - bool check_bounds = false, rmm::cuda_stream_view stream = cudf::default_stream_value, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -101,9 +95,8 @@ std::unique_ptr
scatter( * If the same index appears more than once in the scatter map, the result is * undefined. * - * @throws cudf::logic_error if `check_bounds == true` and an index exists in - * `scatter_map` outside the range `[-n, n)`, where `n` is the number of rows in - * the target table. If `check_bounds == false`, the behavior is undefined. + * If any values in `indices` are outside of the interval [-n, n) where `n` + * is the number of rows in the `target` table, behavior is undefined. * * @param source The input scalars containing values to be scattered into the * target columns @@ -111,8 +104,6 @@ std::unique_ptr
scatter( * the rows in the target table to be replaced by source. * @param target The set of columns into which values from the source_table * are to be scattered - * @param check_bounds Optionally perform bounds checking on the values of - * `scatter_map` and throw an error if any of its values are out of bounds. * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned table's device memory * @return Result of scattering values from source to target @@ -121,7 +112,6 @@ std::unique_ptr
scatter( std::vector> const& source, column_view const& indices, table_view const& target, - bool check_bounds = false, rmm::cuda_stream_view stream = cudf::default_stream_value, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/filling.hpp b/cpp/include/cudf/filling.hpp index 5f9d13f9a2c..8688e97ab7e 100644 --- a/cpp/include/cudf/filling.hpp +++ b/cpp/include/cudf/filling.hpp @@ -103,26 +103,22 @@ std::unique_ptr fill( * ``` * @p count should not have null values; should not contain negative values; * and the sum of count elements should not overflow the size_type's limit. - * It is undefined behavior if @p count has negative values or the sum overflows - * and @p check_count is set to false. + * The behavior of this function is undefined if @p count has negative values + * or the sum overflows. * * @throws cudf::logic_error if the data type of @p count is not size_type. * @throws cudf::logic_error if @p input_table and @p count have different * number of rows. * @throws cudf::logic_error if @p count has null values. - * @throws cudf::logic_error if @p check_count is set to true and @p count - * has negative values or the sum of @p count elements overflows. * * @param input_table Input table * @param count Non-nullable column of an integral type - * @param check_count Whether to check count (negative values and overflow) * @param mr Device memory resource used to allocate the returned table's device memory * @return The result table containing the repetitions */ std::unique_ptr
repeat( table_view const& input_table, column_view const& count, - bool check_count = false, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu index 5585eac923c..7e5b9288628 100644 --- a/cpp/src/copying/copy.cu +++ b/cpp/src/copying/copy.cu @@ -180,7 +180,6 @@ std::unique_ptr scatter_gather_based_if_else(cudf::column_view const& lh table_view{std::vector{scatter_src_lhs->get_column(0).view()}}, gather_map, table_view{std::vector{rhs}}, - false, stream, mr); @@ -208,12 +207,8 @@ std::unique_ptr scatter_gather_based_if_else(cudf::scalar const& lhs, static_cast(scatter_map_size), scatter_map.begin()}; - auto result = cudf::detail::scatter(scatter_source, - scatter_map_column_view, - table_view{std::vector{rhs}}, - false, - stream, - mr); + auto result = cudf::detail::scatter( + scatter_source, scatter_map_column_view, table_view{std::vector{rhs}}, stream, mr); return std::move(result->release()[0]); } diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu index 79c27816009..63711a43c3b 100644 --- a/cpp/src/copying/scatter.cu +++ b/cpp/src/copying/scatter.cu @@ -285,7 +285,6 @@ struct column_scalar_scatterer_impl { std::unique_ptr
scatter(table_view const& source, column_view const& scatter_map, table_view const& target, - bool check_bounds, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -307,13 +306,12 @@ std::unique_ptr
scatter(table_view const& source, // create index type normalizing iterator for the scatter_map auto map_begin = indexalator_factory::make_input_iterator(scatter_map); auto map_end = map_begin + scatter_map.size(); - return detail::scatter(source, map_begin, map_end, target, check_bounds, stream, mr); + return detail::scatter(source, map_begin, map_end, target, stream, mr); } std::unique_ptr
scatter(table_view const& source, device_span const scatter_map, table_view const& target, - bool check_bounds, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -322,13 +320,12 @@ std::unique_ptr
scatter(table_view const& source, auto map_col = column_view(data_type{type_to_id()}, static_cast(scatter_map.size()), scatter_map.data()); - return scatter(source, map_col, target, check_bounds, stream, mr); + return scatter(source, map_col, target, stream, mr); } std::unique_ptr
scatter(std::vector> const& source, column_view const& indices, table_view const& target, - bool check_bounds, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -340,20 +337,9 @@ std::unique_ptr
scatter(std::vector> // Create normalizing iterator for indices column auto map_begin = indexalator_factory::make_input_iterator(indices); - auto map_end = map_begin + indices.size(); // Optionally check map index values are within the number of target rows. auto const n_rows = target.num_rows(); - if (check_bounds) { - CUDF_EXPECTS( - indices.size() == thrust::count_if(rmm::exec_policy(stream), - map_begin, - map_end, - [n_rows] __device__(size_type index) { - return ((index >= -n_rows) && (index < n_rows)); - }), - "Scatter map index out of bounds"); - } // Transform negative indices to index + target size auto scatter_rows = indices.size(); @@ -404,12 +390,8 @@ std::unique_ptr boolean_mask_scatter(column_view const& input, // The scatter map is actually a table with only one column, which is scatter map. auto scatter_map = detail::apply_boolean_mask(table_view{{indices->view()}}, boolean_mask, stream); - auto output_table = detail::scatter(table_view{{input}}, - scatter_map->get_column(0).view(), - table_view{{target}}, - false, - stream, - mr); + auto output_table = detail::scatter( + table_view{{input}}, scatter_map->get_column(0).view(), table_view{{target}}, stream, mr); // There is only one column in output_table return std::make_unique(std::move(output_table->get_column(0))); @@ -505,21 +487,19 @@ std::unique_ptr
boolean_mask_scatter( std::unique_ptr
scatter(table_view const& source, column_view const& scatter_map, table_view const& target, - bool check_bounds, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::scatter(source, scatter_map, target, check_bounds, cudf::default_stream_value, mr); + return detail::scatter(source, scatter_map, target, cudf::default_stream_value, mr); } std::unique_ptr
scatter(std::vector> const& source, column_view const& indices, table_view const& target, - bool check_bounds, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::scatter(source, indices, target, check_bounds, cudf::default_stream_value, mr); + return detail::scatter(source, indices, target, cudf::default_stream_value, mr); } std::unique_ptr
boolean_mask_scatter(table_view const& input, diff --git a/cpp/src/filling/repeat.cu b/cpp/src/filling/repeat.cu index 90c644933ec..b2587e67350 100644 --- a/cpp/src/filling/repeat.cu +++ b/cpp/src/filling/repeat.cu @@ -103,7 +103,6 @@ namespace cudf { namespace detail { std::unique_ptr
repeat(table_view const& input_table, column_view const& count, - bool check_count, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -112,19 +111,12 @@ std::unique_ptr
repeat(table_view const& input_table, if (input_table.num_rows() == 0) { return cudf::empty_like(input_table); } - if (check_count) { cudf::type_dispatcher(count.type(), count_checker{count}, stream); } - auto count_iter = cudf::detail::indexalator_factory::make_input_iterator(count); rmm::device_uvector offsets(count.size(), stream); thrust::inclusive_scan( rmm::exec_policy(stream), count_iter, count_iter + count.size(), offsets.begin()); - if (check_count) { - CUDF_EXPECTS(thrust::is_sorted(rmm::exec_policy(stream), offsets.begin(), offsets.end()), - "count has negative values or the resulting table has too many rows."); - } - size_type output_size{offsets.back_element(stream)}; rmm::device_uvector indices(output_size, stream); thrust::upper_bound(rmm::exec_policy(stream), @@ -162,11 +154,10 @@ std::unique_ptr
repeat(table_view const& input_table, std::unique_ptr
repeat(table_view const& input_table, column_view const& count, - bool check_count, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::repeat(input_table, count, check_count, cudf::default_stream_value, mr); + return detail::repeat(input_table, count, cudf::default_stream_value, mr); } std::unique_ptr
repeat(table_view const& input_table, diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp index 5d345273782..743ca5e8065 100644 --- a/cpp/src/groupby/sort/scan.cpp +++ b/cpp/src/groupby/sort/scan.cpp @@ -178,9 +178,9 @@ void scan_result_functor::operator()(aggregation const& agg) stream, mr); } - result = std::move(cudf::detail::scatter( - table_view{{*result}}, *gather_map, table_view{{*result}}, false, stream, mr) - ->release()[0]); + result = std::move( + cudf::detail::scatter(table_view{{*result}}, *gather_map, table_view{{*result}}, stream, mr) + ->release()[0]); if (rank_agg._null_handling == null_policy::EXCLUDE) { result->set_null_mask(cudf::detail::copy_bitmask(get_grouped_values(), stream, mr)); } diff --git a/cpp/src/groupby/sort/sort_helper.cu b/cpp/src/groupby/sort/sort_helper.cu index a0abaf71160..53ab65e9be7 100644 --- a/cpp/src/groupby/sort/sort_helper.cu +++ b/cpp/src/groupby/sort/sort_helper.cu @@ -244,7 +244,6 @@ column_view sort_groupby_helper::unsorted_keys_labels(rmm::cuda_stream_view stre cudf::detail::scatter(table_view({group_labels_view}), scatter_map, table_view({temp_labels->view()}), - false, stream, rmm::mr::get_current_device_resource()); diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu index 3e0cc26dcdd..296a9f40fbb 100644 --- a/cpp/src/partitioning/partitioning.cu +++ b/cpp/src/partitioning/partitioning.cu @@ -610,7 +610,7 @@ std::pair, std::vector> hash_partition_table( // Use the resulting scatter map to materialize the output auto output = detail::scatter( - input, row_partition_numbers.begin(), row_partition_numbers.end(), input, false, stream, mr); + input, row_partition_numbers.begin(), row_partition_numbers.end(), input, stream, mr); stream.synchronize(); // Async D2H copy must finish before returning host vec return std::pair(std::move(output), std::move(partition_offsets)); @@ -698,7 +698,7 @@ struct dispatch_map_type { // Scatter the rows into their partitions auto scattered = - cudf::detail::scatter(t, scatter_map.begin(), scatter_map.end(), t, false, stream, mr); + cudf::detail::scatter(t, scatter_map.begin(), scatter_map.end(), t, stream, mr); return std::pair(std::move(scattered), std::move(partition_offsets)); } diff --git a/cpp/src/rolling/detail/lead_lag_nested.cuh b/cpp/src/rolling/detail/lead_lag_nested.cuh index a23786ec7f3..859ed7e5d53 100644 --- a/cpp/src/rolling/detail/lead_lag_nested.cuh +++ b/cpp/src/rolling/detail/lead_lag_nested.cuh @@ -198,7 +198,6 @@ std::unique_ptr compute_lead_lag_for_nested(aggregation::Kind op, table_view{std::vector{gathered_defaults->release()[0]->view()}}, scatter_map, table_view{std::vector{output_with_nulls->release()[0]->view()}}, - false, stream, mr); return std::move(scattered_results->release()[0]); diff --git a/cpp/tests/copying/scatter_list_scalar_tests.cpp b/cpp/tests/copying/scatter_list_scalar_tests.cpp index 7d3de9b6c15..40b5dcff7b6 100644 --- a/cpp/tests/copying/scatter_list_scalar_tests.cpp +++ b/cpp/tests/copying/scatter_list_scalar_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,7 +38,7 @@ std::unique_ptr single_scalar_scatter(column_view const& target, { std::vector> slrs{slr}; table_view targets{{target}}; - auto result = scatter(slrs, scatter_map, targets, true); + auto result = scatter(slrs, scatter_map, targets); return std::move(result->release()[0]); } diff --git a/cpp/tests/copying/scatter_struct_scalar_tests.cpp b/cpp/tests/copying/scatter_struct_scalar_tests.cpp index 44e65110f33..62201224893 100644 --- a/cpp/tests/copying/scatter_struct_scalar_tests.cpp +++ b/cpp/tests/copying/scatter_struct_scalar_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,7 +41,7 @@ TYPED_TEST_SUITE(TypedStructScalarScatterTest, FixedWidthTypes); column scatter_single_scalar(scalar const& slr, column_view scatter_map, column_view target) { - auto result = scatter({slr}, scatter_map, table_view{{target}}, false); + auto result = scatter({slr}, scatter_map, table_view{{target}}); return result->get_column(0); } diff --git a/cpp/tests/copying/scatter_tests.cpp b/cpp/tests/copying/scatter_tests.cpp index 306ab8a3d5c..f853920e24c 100644 --- a/cpp/tests/copying/scatter_tests.cpp +++ b/cpp/tests/copying/scatter_tests.cpp @@ -39,7 +39,7 @@ TEST_F(ScatterUntypedTests, ScatterMapTooLong) auto const source_table = cudf::table_view({source, source}); auto const target_table = cudf::table_view({target, target}); - EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table, true), cudf::logic_error); + EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table), cudf::logic_error); } // Throw logic error if scatter map has nulls @@ -54,7 +54,7 @@ TEST_F(ScatterUntypedTests, ScatterMapNulls) auto const source_table = cudf::table_view({source, source}); auto const target_table = cudf::table_view({target, target}); - EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table, true), cudf::logic_error); + EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table), cudf::logic_error); } // Throw logic error if scatter map has nulls @@ -72,7 +72,7 @@ TEST_F(ScatterUntypedTests, ScatterScalarMapNulls) auto const target_table = cudf::table_view({target}); - EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table, true), cudf::logic_error); + EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table), cudf::logic_error); } // Throw logic error if source and target have different number of columns @@ -87,7 +87,7 @@ TEST_F(ScatterUntypedTests, ScatterColumnNumberMismatch) auto const source_table = cudf::table_view({source}); auto const target_table = cudf::table_view({target, target}); - EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table, true), cudf::logic_error); + EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table), cudf::logic_error); } // Throw logic error if number of scalars doesn't match number of columns @@ -105,7 +105,7 @@ TEST_F(ScatterUntypedTests, ScatterScalarColumnNumberMismatch) auto const target_table = cudf::table_view({target, target}); - EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table, true), cudf::logic_error); + EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table), cudf::logic_error); } // Throw logic error if source and target have different data types @@ -120,7 +120,7 @@ TEST_F(ScatterUntypedTests, ScatterDataTypeMismatch) auto const source_table = cudf::table_view({source}); auto const target_table = cudf::table_view({target}); - EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table, true), cudf::logic_error); + EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table), cudf::logic_error); } // Throw logic error if source and target have different data types @@ -138,7 +138,7 @@ TEST_F(ScatterUntypedTests, ScatterScalarDataTypeMismatch) auto const target_table = cudf::table_view({target}); - EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table, true), cudf::logic_error); + EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table), cudf::logic_error); } template @@ -148,43 +148,6 @@ class ScatterIndexTypeTests : public cudf::test::BaseFixture { using IndexTypes = cudf::test::Types; TYPED_TEST_SUITE(ScatterIndexTypeTests, IndexTypes); -// Throw logic error if check_bounds is set and index is out of bounds -TYPED_TEST(ScatterIndexTypeTests, ScatterOutOfBounds) -{ - using cudf::test::fixed_width_column_wrapper; - - fixed_width_column_wrapper source({1, 2, 3, 4, 5, 6}); - fixed_width_column_wrapper target({10, 20, 30, 40, 50, 60, 70, 80}); - fixed_width_column_wrapper upper_bound({-3, 3, 1, 8}); - fixed_width_column_wrapper lower_bound({-3, 3, 1, -9}); - - auto const source_table = cudf::table_view({source, source}); - auto const target_table = cudf::table_view({target, target}); - - EXPECT_THROW(cudf::scatter(source_table, upper_bound, target_table, true), cudf::logic_error); - EXPECT_THROW(cudf::scatter(source_table, lower_bound, target_table, true), cudf::logic_error); -} - -// Throw logic error if check_bounds is set and index is out of bounds -TYPED_TEST(ScatterIndexTypeTests, ScatterScalarOutOfBounds) -{ - using cudf::scalar_type_t; - using cudf::test::fixed_width_column_wrapper; - - auto const source = scalar_type_t(100, true); - std::reference_wrapper slr_ref{source}; - std::vector> source_vector{slr_ref}; - - fixed_width_column_wrapper target({10, 20, 30, 40, 50, 60, 70, 80}); - fixed_width_column_wrapper upper_bound({-3, 3, 1, 8}); - fixed_width_column_wrapper lower_bound({-3, 3, 1, -9}); - - auto const target_table = cudf::table_view({target}); - - EXPECT_THROW(cudf::scatter(source_vector, upper_bound, target_table, true), cudf::logic_error); - EXPECT_THROW(cudf::scatter(source_vector, lower_bound, target_table, true), cudf::logic_error); -} - // Validate that each of the index types work TYPED_TEST(ScatterIndexTypeTests, ScatterIndexType) { @@ -199,7 +162,7 @@ TYPED_TEST(ScatterIndexTypeTests, ScatterIndexType) auto const target_table = cudf::table_view({target, target}); auto const expected_table = cudf::table_view({expected, expected}); - auto const result = cudf::scatter(source_table, scatter_map, target_table, true); + auto const result = cudf::scatter(source_table, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -221,7 +184,7 @@ TYPED_TEST(ScatterIndexTypeTests, ScatterScalarIndexType) auto const target_table = cudf::table_view({target}); auto const expected_table = cudf::table_view({expected}); - auto const result = cudf::scatter(source_vector, scatter_map, target_table, true); + auto const result = cudf::scatter(source_vector, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -248,7 +211,7 @@ TYPED_TEST(ScatterInvalidIndexTypeTests, ScatterInvalidIndexType) auto const source_table = cudf::table_view({source, source}); auto const target_table = cudf::table_view({target, target}); - EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table, true), cudf::logic_error); + EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table), cudf::logic_error); } // Throw logic error if scatter map column has invalid data type @@ -266,7 +229,7 @@ TYPED_TEST(ScatterInvalidIndexTypeTests, ScatterScalarInvalidIndexType) auto const target_table = cudf::table_view({target}); - EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table, true), cudf::logic_error); + EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table), cudf::logic_error); } template @@ -287,7 +250,7 @@ TYPED_TEST(ScatterDataTypeTests, EmptyScatterMap) auto const source_table = cudf::table_view({source, source}); auto const target_table = cudf::table_view({target, target}); - auto const result = cudf::scatter(source_table, scatter_map, target_table, true); + auto const result = cudf::scatter(source_table, scatter_map, target_table); // Expect a copy of the input table CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), target_table); @@ -309,7 +272,7 @@ TYPED_TEST(ScatterDataTypeTests, EmptyScalarScatterMap) auto const target_table = cudf::table_view({target}); - auto const result = cudf::scatter(source_vector, scatter_map, target_table, true); + auto const result = cudf::scatter(source_vector, scatter_map, target_table); // Expect a copy of the input table CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), target_table); @@ -328,7 +291,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterNoNulls) auto const target_table = cudf::table_view({target, target}); auto const expected_table = cudf::table_view({expected, expected}); - auto const result = cudf::scatter(source_table, scatter_map, target_table, true); + auto const result = cudf::scatter(source_table, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -348,7 +311,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterBothNulls) auto const target_table = cudf::table_view({target, target}); auto const expected_table = cudf::table_view({expected, expected}); - auto const result = cudf::scatter(source_table, scatter_map, target_table, true); + auto const result = cudf::scatter(source_table, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -367,7 +330,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterSourceNulls) auto const target_table = cudf::table_view({target, target}); auto const expected_table = cudf::table_view({expected, expected}); - auto const result = cudf::scatter(source_table, scatter_map, target_table, true); + auto const result = cudf::scatter(source_table, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -387,7 +350,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterTargetNulls) auto const target_table = cudf::table_view({target, target}); auto const expected_table = cudf::table_view({expected, expected}); - auto const result = cudf::scatter(source_table, scatter_map, target_table, true); + auto const result = cudf::scatter(source_table, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -409,7 +372,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterScalarNoNulls) auto const target_table = cudf::table_view({target}); auto const expected_table = cudf::table_view({expected}); - auto const result = cudf::scatter(source_vector, scatter_map, target_table, true); + auto const result = cudf::scatter(source_vector, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -433,7 +396,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterScalarTargetNulls) auto const target_table = cudf::table_view({target}); auto const expected_table = cudf::table_view({expected}); - auto const result = cudf::scatter(source_vector, scatter_map, target_table, true); + auto const result = cudf::scatter(source_vector, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -457,7 +420,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterScalarSourceNulls) auto const target_table = cudf::table_view({target}); auto const expected_table = cudf::table_view({expected}); - auto const result = cudf::scatter(source_vector, scatter_map, target_table, true); + auto const result = cudf::scatter(source_vector, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -482,7 +445,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterScalarBothNulls) auto const target_table = cudf::table_view({target}); auto const expected_table = cudf::table_view({expected}); - auto const result = cudf::scatter(source_vector, scatter_map, target_table, true); + auto const result = cudf::scatter(source_vector, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -510,7 +473,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterSourceNullsLarge) auto const target_table = cudf::table_view({target, target}); auto const expected_table = cudf::table_view({expected, expected}); - auto const result = cudf::scatter(source_table, scatter_map, target_table, true); + auto const result = cudf::scatter(source_table, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -540,7 +503,7 @@ TEST_F(ScatterStringsTests, ScatterNoNulls) auto const target_table = cudf::table_view({target, target}); auto const expected_table = cudf::table_view({expected, expected}); - auto const result = cudf::scatter(source_table, scatter_map, target_table, true); + auto const result = cudf::scatter(source_table, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -568,7 +531,7 @@ TEST_F(ScatterStringsTests, ScatterScalarNoNulls) auto const target_table = cudf::table_view({target}); auto const expected_table = cudf::table_view({expected}); - auto const result = cudf::scatter(source_vector, scatter_map, target_table, true); + auto const result = cudf::scatter(source_vector, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table); } @@ -937,7 +900,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointScatter) auto const target_table = cudf::table_view({target, target}); auto const expected_table = cudf::table_view({expected, expected}); - auto const result = cudf::scatter(source_table, scatter_map, target_table, true); + auto const result = cudf::scatter(source_table, scatter_map, target_table); CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table, result->view()); } diff --git a/cpp/tests/filling/repeat_tests.cpp b/cpp/tests/filling/repeat_tests.cpp index 7d30298b1bd..df8dceb0f8d 100644 --- a/cpp/tests/filling/repeat_tests.cpp +++ b/cpp/tests/filling/repeat_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -272,24 +272,3 @@ TEST_F(RepeatErrorTestFixture, CountHasNulls) // input_table.has_nulls() == true EXPECT_THROW(auto ret = cudf::repeat(input_table, count), cudf::logic_error); } - -TEST_F(RepeatErrorTestFixture, NegativeCountOrOverflow) -{ - auto input = cudf::test::fixed_width_column_wrapper( - thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + 100); - - auto count_neg = cudf::test::fixed_width_column_wrapper( - thrust::make_constant_iterator(-1, 0), thrust::make_constant_iterator(-1, 100)); - - auto value = std::numeric_limits::max() / 10; - auto count_overflow = cudf::test::fixed_width_column_wrapper( - thrust::make_constant_iterator(value, 0), thrust::make_constant_iterator(value, 100)); - - cudf::table_view input_table{{input}}; - - // negative - EXPECT_THROW(auto p_ret = cudf::repeat(input_table, count_neg, true), cudf::logic_error); - - // overflow - EXPECT_THROW(auto p_ret = cudf::repeat(input_table, count_overflow, true), cudf::logic_error); -} diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index dbc2a28c38c..c6f606e971d 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -706,10 +706,10 @@ private static native long[] dropDuplicates(long nativeHandle, int[] keyColumns, private static native long[] gather(long tableHandle, long gatherView, boolean checkBounds); private static native long[] scatterTable(long srcTableHandle, long scatterView, - long targetTableHandle, boolean checkBounds) + long targetTableHandle) throws CudfException; private static native long[] scatterScalars(long[] srcScalarHandles, long scatterView, - long targetTableHandle, boolean checkBounds) + long targetTableHandle) throws CudfException; private static native long[] convertToRows(long nativeHandle); @@ -723,8 +723,7 @@ private static native long[] scatterScalars(long[] srcScalarHandles, long scatte private static native long[] repeatStaticCount(long tableHandle, int count); private static native long[] repeatColumnCount(long tableHandle, - long columnHandle, - boolean checkCount); + long columnHandle); private static native long rowBitCount(long tableHandle) throws CudfException; @@ -1686,22 +1685,7 @@ public Table repeat(int count) { * @throws CudfException on any error. */ public Table repeat(ColumnView counts) { - return repeat(counts, true); - } - - /** - * Create a new table by repeating each row of this table. The number of - * repetitions of each row is defined by the corresponding value in counts. - * @param counts the number of times to repeat each row. Cannot have nulls, must be an - * Integer type, and must have one entry for each row in the table. - * @param checkCount should counts be checked for errors before processing. Be careful if you - * disable this because if you pass in bad data you might just get back an - * empty table or bad data. - * @return the new Table. - * @throws CudfException on any error. - */ - public Table repeat(ColumnView counts, boolean checkCount) { - return new Table(repeatColumnCount(this.nativeHandle, counts.getNativeView(), checkCount)); + return new Table(repeatColumnCount(this.nativeHandle, counts.getNativeView())); } /** @@ -2349,14 +2333,11 @@ public Table gather(ColumnView gatherMap, OutOfBoundsPolicy outOfBoundsPolicy) { * * @param scatterMap The map of indexes. Must be non-nullable and integral type. * @param target The table into which rows from the current table are to be scattered out-of-place. - * @param checkBounds Optionally perform bounds checking on the values of`scatterMap` and throw - * an exception if any of its values are out of bounds. * @return A new table which is the result of out-of-place scattering the source table into the * target table. */ - public Table scatter(ColumnView scatterMap, Table target, boolean checkBounds) { - return new Table(scatterTable(nativeHandle, scatterMap.getNativeView(), target.getNativeView(), - checkBounds)); + public Table scatter(ColumnView scatterMap, Table target) { + return new Table(scatterTable(nativeHandle, scatterMap.getNativeView(), target.getNativeView())); } /** @@ -2376,20 +2357,17 @@ public Table scatter(ColumnView scatterMap, Table target, boolean checkBounds) { * @param source The input scalars containing values to be scattered into the target table. * @param scatterMap The map of indexes. Must be non-nullable and integral type. * @param target The table into which the values from source are to be scattered out-of-place. - * @param checkBounds Optionally perform bounds checking on the values of`scatterMap` and throw - * an exception if any of its values are out of bounds. * @return A new table which is the result of out-of-place scattering the source values into the * target table. */ - public static Table scatter(Scalar[] source, ColumnView scatterMap, Table target, - boolean checkBounds) { + public static Table scatter(Scalar[] source, ColumnView scatterMap, Table target) { long[] srcScalarHandles = new long[source.length]; for(int i = 0; i < source.length; ++i) { assert source[i] != null : "Scalar vectors passed in should not contain null"; srcScalarHandles[i] = source[i].getScalarHandle(); } return new Table(scatterScalars(srcScalarHandles, scatterMap.getNativeView(), - target.getNativeView(), checkBounds)); + target.getNativeView())); } private static GatherMap[] buildJoinGatherMaps(long[] gatherMapData) { diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index c23c5a3ccb2..cbd0aee335e 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -2979,8 +2979,7 @@ Java_ai_rapids_cudf_Table_convertToRowsFixedWidthOptimized(JNIEnv *env, jclass, JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_scatterTable(JNIEnv *env, jclass, jlong j_input, jlong j_map, - jlong j_target, - jboolean check_bounds) { + jlong j_target) { JNI_NULL_CHECK(env, j_input, "input table is null", 0); JNI_NULL_CHECK(env, j_map, "map column is null", 0); JNI_NULL_CHECK(env, j_target, "target table is null", 0); @@ -2989,15 +2988,14 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_scatterTable(JNIEnv *env, auto const input = reinterpret_cast(j_input); auto const map = reinterpret_cast(j_map); auto const target = reinterpret_cast(j_target); - return convert_table_for_return(env, cudf::scatter(*input, *map, *target, check_bounds)); + return convert_table_for_return(env, cudf::scatter(*input, *map, *target)); } CATCH_STD(env, 0); } JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_scatterScalars(JNIEnv *env, jclass, jlongArray j_input, - jlong j_map, jlong j_target, - jboolean check_bounds) { + jlong j_map, jlong j_target) { JNI_NULL_CHECK(env, j_input, "input scalars array is null", 0); JNI_NULL_CHECK(env, j_map, "map column is null", 0); JNI_NULL_CHECK(env, j_target, "target table is null", 0); @@ -3009,7 +3007,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_scatterScalars(JNIEnv *en [](auto &scalar) { return std::ref(*scalar); }); auto const map = reinterpret_cast(j_map); auto const target = reinterpret_cast(j_target); - return convert_table_for_return(env, cudf::scatter(input, *map, *target, check_bounds)); + return convert_table_for_return(env, cudf::scatter(input, *map, *target)); } CATCH_STD(env, 0); } @@ -3094,15 +3092,14 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_repeatStaticCount(JNIEnv JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_repeatColumnCount(JNIEnv *env, jclass, jlong input_jtable, - jlong count_jcol, - jboolean check_count) { + jlong count_jcol) { JNI_NULL_CHECK(env, input_jtable, "input table is null", 0); JNI_NULL_CHECK(env, count_jcol, "count column is null", 0); try { cudf::jni::auto_set_device(env); auto const input = reinterpret_cast(input_jtable); auto const count = reinterpret_cast(count_jcol); - return convert_table_for_return(env, cudf::repeat(*input, *count, check_count)); + return convert_table_for_return(env, cudf::repeat(*input, *count)); } CATCH_STD(env, 0); } diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index f31da054091..f564a55463b 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -2689,17 +2689,6 @@ void testRepeatColumn() { } } - @Test - void testRepeatColumnBad() { - try (Table t = new Table.TestBuilder() - .column(1, 2) - .column("a", "b") - .build(); - ColumnVector repeats = ColumnVector.fromBytes((byte)2, (byte)-1)) { - assertThrows(CudfException.class, () -> t.repeat(repeats)); - } - } - @Test void testInterleaveIntColumns() { try (Table t = new Table.TestBuilder() @@ -6963,7 +6952,7 @@ void testScatterTable() { .decimal32Column(-3, 1, -2, 2, 4, 3) .decimal64Column(-8, 100001L, -200002L, 200002L, 400004L, 300003L) .build(); - Table result = srcTable.scatter(scatterMap, targetTable, false)) { + Table result = srcTable.scatter(scatterMap, targetTable)) { assertTablesAreEqual(expected, result); } } @@ -6981,7 +6970,7 @@ void testScatterScalars() { .column(0, -2, 0, -4, 0) .column("A", "BB", "A", "BBBB", "A") .build(); - Table result = Table.scatter(new Scalar[] { s1, s2 }, scatterMap, targetTable, false)) { + Table result = Table.scatter(new Scalar[] { s1, s2 }, scatterMap, targetTable)) { assertTablesAreEqual(expected, result); } } diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index f1183e008f8..a9cfbbbe223 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -194,8 +194,7 @@ def gather( cdef scatter_scalar(list source_device_slrs, column_view scatter_map, - table_view target_table, - bool bounds_check): + table_view target_table): cdef vector[reference_wrapper[constscalar]] c_source cdef DeviceScalar d_slr cdef unique_ptr[table] c_result @@ -212,7 +211,6 @@ cdef scatter_scalar(list source_device_slrs, c_source, scatter_map, target_table, - bounds_check ) ) @@ -221,8 +219,7 @@ cdef scatter_scalar(list source_device_slrs, cdef scatter_column(list source_columns, column_view scatter_map, - table_view target_table, - bool bounds_check): + table_view target_table): cdef table_view c_source = table_view_from_columns(source_columns) cdef unique_ptr[table] c_result @@ -232,7 +229,6 @@ cdef scatter_column(list source_columns, c_source, scatter_map, target_table, - bounds_check ) ) return columns_from_unique_ptr(move(c_result)) @@ -257,14 +253,24 @@ def scatter(list sources, Column scatter_map, list target_columns, cdef column_view scatter_map_view = scatter_map.view() cdef table_view target_table_view = table_view_from_columns(target_columns) + if bounds_check: + n_rows = len(target_columns[0]) + if not ( + (scatter_map >= -n_rows).all() + and (scatter_map < n_rows).all() + ): + raise IndexError( + f"index out of bounds for column of size {n_rows}" + ) + if isinstance(sources[0], Column): return scatter_column( - sources, scatter_map_view, target_table_view, bounds_check + sources, scatter_map_view, target_table_view ) else: source_scalars = [as_device_scalar(slr) for slr in sources] return scatter_scalar( - source_scalars, scatter_map_view, target_table_view, bounds_check + source_scalars, scatter_map_view, target_table_view ) diff --git a/python/cudf/cudf/_lib/cpp/copying.pxd b/python/cudf/cudf/_lib/cpp/copying.pxd index a1c433774b5..bc89d364004 100644 --- a/python/cudf/cudf/_lib/cpp/copying.pxd +++ b/python/cudf/cudf/_lib/cpp/copying.pxd @@ -44,14 +44,12 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: table_view source_table, column_view scatter_map, table_view target_table, - bool bounds_check ) except + cdef unique_ptr[table] scatter ( vector[reference_wrapper[constscalar]] source_scalars, column_view indices, table_view target, - bool bounds_check ) except + ctypedef enum mask_allocation_policy: diff --git a/python/cudf/cudf/_lib/cpp/filling.pxd b/python/cudf/cudf/_lib/cpp/filling.pxd index 4233ab60ff2..e412f294537 100644 --- a/python/cudf/cudf/_lib/cpp/filling.pxd +++ b/python/cudf/cudf/_lib/cpp/filling.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from libcpp cimport bool from libcpp.memory cimport unique_ptr @@ -29,7 +29,6 @@ cdef extern from "cudf/filling.hpp" namespace "cudf" nogil: cdef unique_ptr[table] repeat( const table_view & input, const column_view & count, - bool check_count ) except + cdef unique_ptr[table] repeat( diff --git a/python/cudf/cudf/_lib/filling.pyx b/python/cudf/cudf/_lib/filling.pyx index 7de63def6a6..592d56158a1 100644 --- a/python/cudf/cudf/_lib/filling.pyx +++ b/python/cudf/cudf/_lib/filling.pyx @@ -54,24 +54,23 @@ def fill(Column destination, int begin, int end, DeviceScalar value): return Column.from_unique_ptr(move(c_result)) -def repeat(list inp, object count, bool check_count=False): +def repeat(list inp, object count): if isinstance(count, Column): - return _repeat_via_column(inp, count, check_count) + return _repeat_via_column(inp, count) else: return _repeat_via_size_type(inp, count) -def _repeat_via_column(list inp, Column count, bool check_count): +def _repeat_via_column(list inp, Column count): cdef table_view c_inp = table_view_from_columns(inp) cdef column_view c_count = count.view() - cdef bool c_check_count = check_count + cdef bool c_check_count = False cdef unique_ptr[table] c_result with nogil: c_result = move(cpp_filling.repeat( c_inp, c_count, - c_check_count )) return columns_from_unique_ptr(move(c_result)) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 66ae984ee81..7291b695312 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -571,21 +571,14 @@ def _scatter_by_column( self._check_scatter_key_length(num_keys, value) - try: - if is_bool_dtype(key.dtype): - return libcudf.copying.boolean_mask_scatter( - [value], [self], key - )[0]._with_type_metadata(self.dtype) - else: - return libcudf.copying.scatter([value], key, [self])[ - 0 - ]._with_type_metadata(self.dtype) - except RuntimeError as e: - if "out of bounds" in str(e): - raise IndexError( - f"index out of bounds for column of size {len(self)}" - ) from e - raise + if is_bool_dtype(key.dtype): + return libcudf.copying.boolean_mask_scatter([value], [self], key)[ + 0 + ]._with_type_metadata(self.dtype) + else: + return libcudf.copying.scatter([value], key, [self])[ + 0 + ]._with_type_metadata(self.dtype) def _check_scatter_key_length( self, num_keys: int, value: Union[cudf.core.scalar.Scalar, ColumnBase]