From 57ac8c449fb828ef575c53c9a2c63ec87ebd6367 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Thu, 27 Jan 2022 08:31:39 -0500 Subject: [PATCH 1/3] Add cudf::strings::findall_record API (#9911) Reference #9856 specifically https://github.com/rapidsai/cudf/issues/9856#issuecomment-991395561 Adds `cudf::strings::findall_record` which was initially implemented in nvstrings but not ported over since LIST column types did not exist at the time and returning a vector of small columns was very inefficient. This API should also allow using the current python function `cudf.str.findall()` with the `expand=False` parameter more effectively. A follow-on PR will address these python changes. This PR reorganizes the libcudf strings _find_ source files into the `cpp/src/strings/search` subdirectory as well. Also, `findall()` has only a regex version so the `_re` suffix is dropped from the name in the libcudf implementation. The python changes in this PR address only the name change and the addition of the new API in the cython interface. Depends on #9909 -- shares the `cudf::strings::detail::count_matches()` utility function. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Robert Maynard (https://github.com/robertmaynard) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/9911 --- cpp/CMakeLists.txt | 8 +- cpp/benchmarks/string/contains.cpp | 2 +- cpp/include/cudf/strings/findall.hpp | 34 +++- .../{count_matches.cuh => count_matches.cu} | 20 +- cpp/src/strings/count_matches.hpp | 50 +++++ cpp/src/strings/extract/extract_all.cu | 4 +- cpp/src/strings/{ => search}/find.cu | 0 cpp/src/strings/{ => search}/find_multiple.cu | 0 cpp/src/strings/{ => search}/findall.cu | 14 +- cpp/src/strings/search/findall_record.cu | 171 ++++++++++++++++++ cpp/tests/strings/findall_tests.cpp | 36 +++- python/cudf/cudf/_lib/cpp/strings/findall.pxd | 12 +- python/cudf/cudf/_lib/strings/findall.pyx | 28 ++- 13 files changed, 340 insertions(+), 39 deletions(-) rename cpp/src/strings/{count_matches.cuh => count_matches.cu} (87%) create mode 100644 cpp/src/strings/count_matches.hpp rename cpp/src/strings/{ => search}/find.cu (100%) rename cpp/src/strings/{ => search}/find_multiple.cu (100%) rename cpp/src/strings/{ => search}/findall.cu (95%) create mode 100644 cpp/src/strings/search/findall_record.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index e4637408110..90e94ffcc7b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -425,13 +425,11 @@ add_library( src/strings/copying/concatenate.cu src/strings/copying/copying.cu src/strings/copying/shift.cu + src/strings/count_matches.cu src/strings/extract/extract.cu src/strings/extract/extract_all.cu src/strings/filling/fill.cu src/strings/filter_chars.cu - src/strings/findall.cu - src/strings/find.cu - src/strings/find_multiple.cu src/strings/padding.cu src/strings/json/json_path.cu src/strings/regex/regcomp.cpp @@ -441,6 +439,10 @@ add_library( src/strings/replace/multi_re.cu src/strings/replace/replace.cu src/strings/replace/replace_re.cu + src/strings/search/findall.cu + src/strings/search/findall_record.cu + src/strings/search/find.cu + src/strings/search/find_multiple.cu src/strings/split/partition.cu src/strings/split/split.cu src/strings/split/split_record.cu diff --git a/cpp/benchmarks/string/contains.cpp b/cpp/benchmarks/string/contains.cpp index 980b353406a..fbcfabb4532 100644 --- a/cpp/benchmarks/string/contains.cpp +++ b/cpp/benchmarks/string/contains.cpp @@ -46,7 +46,7 @@ static void BM_contains(benchmark::State& state, contains_type ct) cudf::strings::count_re(input, "\\d+"); break; case contains_type::findall: // returns occurrences of matches - cudf::strings::findall_re(input, "\\d+"); + cudf::strings::findall(input, "\\d+"); break; } } diff --git a/cpp/include/cudf/strings/findall.hpp b/cpp/include/cudf/strings/findall.hpp index 6c3139747af..4207cddbafb 100644 --- a/cpp/include/cudf/strings/findall.hpp +++ b/cpp/include/cudf/strings/findall.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -52,7 +52,37 @@ namespace strings { * @param mr Device memory resource used to allocate the returned table's device memory. * @return New table of strings columns. */ -std::unique_ptr findall_re( +std::unique_ptr
findall( + strings_column_view const& strings, + std::string const& pattern, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Returns a lists column of strings for each matching occurrence of the + * regex pattern within each string. + * + * @code{.pseudo} + * Example: + * s = ["bunny", "rabbit", "hare", "dog"] + * r = findall_record(s, "[ab]"") + * r is now a lists column like: + * [ ["b"] + * ["a","b","b"] + * ["a"] + * null ] + * @endcode + * + * A null output row results if the pattern is not found in the corresponding row + * input string. + * + * See the @ref md_regex "Regex Features" page for details on patterns supported by this API. + * + * @param strings Strings instance for this operation. + * @param pattern Regex pattern to match within each string. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return New lists column of strings. + */ +std::unique_ptr findall_record( strings_column_view const& strings, std::string const& pattern, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/src/strings/count_matches.cuh b/cpp/src/strings/count_matches.cu similarity index 87% rename from cpp/src/strings/count_matches.cuh rename to cpp/src/strings/count_matches.cu index c14142f4779..d0a6825666b 100644 --- a/cpp/src/strings/count_matches.cuh +++ b/cpp/src/strings/count_matches.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,16 +14,13 @@ * limitations under the License. */ -#pragma once - +#include #include -#include #include #include #include -#include #include #include @@ -32,6 +29,7 @@ namespace cudf { namespace strings { namespace detail { +namespace { /** * @brief Functor counts the total matches to the given regex in each string. */ @@ -50,12 +48,13 @@ struct count_matches_fn { int32_t end = d_str.length(); while ((begin < end) && (prog.find(idx, d_str, begin, end) > 0)) { ++count; - begin = end; + begin = end + (begin == end); end = d_str.length(); } return count; } }; +} // namespace /** * @brief Returns a column of regex match counts for each string in the given column. @@ -67,11 +66,10 @@ struct count_matches_fn { * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. */ -std::unique_ptr count_matches( - column_device_view const& d_strings, - reprog_device const& d_prog, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr count_matches(column_device_view const& d_strings, + reprog_device const& d_prog, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { // Create output column auto counts = make_numeric_column( diff --git a/cpp/src/strings/count_matches.hpp b/cpp/src/strings/count_matches.hpp new file mode 100644 index 00000000000..1339f2b1ebd --- /dev/null +++ b/cpp/src/strings/count_matches.hpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +namespace cudf { + +class column_device_view; + +namespace strings { +namespace detail { + +class reprog_device; + +/** + * @brief Returns a column of regex match counts for each string in the given column. + * + * A null entry will result in a zero count for that output row. + * + * @param d_strings Device view of the input strings column. + * @param d_prog Regex instance to evaluate on each string. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory. + */ +std::unique_ptr count_matches( + column_device_view const& d_strings, + reprog_device const& d_prog, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +} // namespace detail +} // namespace strings +} // namespace cudf diff --git a/cpp/src/strings/extract/extract_all.cu b/cpp/src/strings/extract/extract_all.cu index 584741298c2..c4749eae003 100644 --- a/cpp/src/strings/extract/extract_all.cu +++ b/cpp/src/strings/extract/extract_all.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include #include diff --git a/cpp/src/strings/find.cu b/cpp/src/strings/search/find.cu similarity index 100% rename from cpp/src/strings/find.cu rename to cpp/src/strings/search/find.cu diff --git a/cpp/src/strings/find_multiple.cu b/cpp/src/strings/search/find_multiple.cu similarity index 100% rename from cpp/src/strings/find_multiple.cu rename to cpp/src/strings/search/find_multiple.cu diff --git a/cpp/src/strings/findall.cu b/cpp/src/strings/search/findall.cu similarity index 95% rename from cpp/src/strings/findall.cu rename to cpp/src/strings/search/findall.cu index c82ab4f81c3..8fb754848d4 100644 --- a/cpp/src/strings/findall.cu +++ b/cpp/src/strings/search/findall.cu @@ -109,11 +109,11 @@ struct findall_count_fn : public findall_fn { } // namespace // -std::unique_ptr
findall_re( +std::unique_ptr
findall( strings_column_view const& strings, std::string const& pattern, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(), - rmm::cuda_stream_view stream = rmm::cuda_stream_default) + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { auto const strings_count = strings.size(); auto const d_strings = column_device_view::create(strings.parent(), stream); @@ -205,12 +205,12 @@ std::unique_ptr
findall_re( // external API -std::unique_ptr
findall_re(strings_column_view const& strings, - std::string const& pattern, - rmm::mr::device_memory_resource* mr) +std::unique_ptr
findall(strings_column_view const& strings, + std::string const& pattern, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::findall_re(strings, pattern, mr); + return detail::findall(strings, pattern, rmm::cuda_stream_default, mr); } } // namespace strings diff --git a/cpp/src/strings/search/findall_record.cu b/cpp/src/strings/search/findall_record.cu new file mode 100644 index 00000000000..9ffdb33f5f2 --- /dev/null +++ b/cpp/src/strings/search/findall_record.cu @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include + +namespace cudf { +namespace strings { +namespace detail { + +using string_index_pair = thrust::pair; + +namespace { + +/** + * @brief This functor handles extracting matched strings by applying the compiled regex pattern + * and creating string_index_pairs for all the substrings. + */ +template +struct findall_fn { + column_device_view const d_strings; + reprog_device prog; + offset_type const* d_offsets; + string_index_pair* d_indices; + + __device__ void operator()(size_type const idx) + { + if (d_strings.is_null(idx)) { return; } + auto const d_str = d_strings.element(idx); + + auto d_output = d_indices + d_offsets[idx]; + size_type output_idx = 0; + + int32_t begin = 0; + int32_t end = d_str.length(); + while ((begin < end) && (prog.find(idx, d_str, begin, end) > 0)) { + auto const spos = d_str.byte_offset(begin); // convert + auto const epos = d_str.byte_offset(end); // to bytes + + d_output[output_idx++] = string_index_pair{d_str.data() + spos, (epos - spos)}; + + begin = end + (begin == end); + end = d_str.length(); + } + } +}; + +} // namespace + +// +std::unique_ptr findall_record( + strings_column_view const& strings, + std::string const& pattern, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + auto const strings_count = strings.size(); + auto const d_strings = column_device_view::create(strings.parent(), stream); + + // compile regex into device object + auto const d_prog = + reprog_device::create(pattern, get_character_flags_table(), strings_count, stream); + + // Create lists offsets column + auto offsets = count_matches(*d_strings, *d_prog, stream, mr); + auto d_offsets = offsets->mutable_view().data(); + + // Compute null output rows + auto [null_mask, null_count] = cudf::detail::valid_if( + d_offsets, + d_offsets + strings_count, + [] __device__(auto const v) { return v > 0; }, + stream, + mr); + + auto const valid_count = strings_count - null_count; + // Return an empty lists column if there are no valid rows + if (valid_count == 0) { + return make_lists_column(0, + make_empty_column(type_to_id()), + make_empty_column(type_id::STRING), + 0, + rmm::device_buffer{}, + stream, + mr); + } + + // Convert counts into offsets + thrust::exclusive_scan( + rmm::exec_policy(stream), d_offsets, d_offsets + strings_count + 1, d_offsets); + + // Create indices vector with the total number of groups that will be extracted + auto total_matches = cudf::detail::get_value(offsets->view(), strings_count, stream); + + rmm::device_uvector indices(total_matches, stream); + auto d_indices = indices.data(); + auto begin = thrust::make_counting_iterator(0); + + // Build the string indices + auto const regex_insts = d_prog->insts_counts(); + if (regex_insts <= RX_SMALL_INSTS) { + findall_fn fn{*d_strings, *d_prog, d_offsets, d_indices}; + thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, fn); + } else if (regex_insts <= RX_MEDIUM_INSTS) { + findall_fn fn{*d_strings, *d_prog, d_offsets, d_indices}; + thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, fn); + } else if (regex_insts <= RX_LARGE_INSTS) { + findall_fn fn{*d_strings, *d_prog, d_offsets, d_indices}; + thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, fn); + } else { + findall_fn fn{*d_strings, *d_prog, d_offsets, d_indices}; + thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, fn); + } + + // Build the child strings column from the resulting indices + auto strings_output = make_strings_column(indices.begin(), indices.end(), stream, mr); + + // Build the lists column from the offsets and the strings + return make_lists_column(strings_count, + std::move(offsets), + std::move(strings_output), + null_count, + std::move(null_mask), + stream, + mr); +} + +} // namespace detail + +// external API + +std::unique_ptr findall_record(strings_column_view const& strings, + std::string const& pattern, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::findall_record(strings, pattern, rmm::cuda_stream_default, mr); +} + +} // namespace strings +} // namespace cudf diff --git a/cpp/tests/strings/findall_tests.cpp b/cpp/tests/strings/findall_tests.cpp index d7bf162d36f..4b1305a870a 100644 --- a/cpp/tests/strings/findall_tests.cpp +++ b/cpp/tests/strings/findall_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,7 +56,7 @@ TEST_F(StringsFindallTests, FindallTest) nullptr}; std::string pattern = "(\\w+)"; - auto results = cudf::strings::findall_re(strings_view, pattern); + auto results = cudf::strings::findall(strings_view, pattern); EXPECT_TRUE(results->num_columns() == 2); cudf::test::strings_column_wrapper expected1( @@ -75,6 +75,28 @@ TEST_F(StringsFindallTests, FindallTest) CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected); } +TEST_F(StringsFindallTests, FindallRecord) +{ + cudf::test::strings_column_wrapper input( + {"3-A", "4-May 5-Day 6-Hay", "12-Dec-2021-Jan", "Feb-March", "4 ABC", "", "", "25-9000-Hal"}, + {1, 1, 1, 1, 1, 0, 1, 1}); + + auto results = cudf::strings::findall_record(cudf::strings_column_view(input), "(\\d+)-(\\w+)"); + + bool valids[] = {1, 1, 1, 0, 0, 0, 0, 1}; + using LCW = cudf::test::lists_column_wrapper; + LCW expected({LCW{"3-A"}, + LCW{"4-May", "5-Day", "6-Hay"}, + LCW{"12-Dec", "2021-Jan"}, + LCW{}, + LCW{}, + LCW{}, + LCW{}, + LCW{"25-9000"}}, + valids); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); +} + TEST_F(StringsFindallTests, MediumRegex) { // This results in 15 regex instructions and falls in the 'medium' range. @@ -87,7 +109,7 @@ TEST_F(StringsFindallTests, MediumRegex) thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); auto strings_view = cudf::strings_column_view(strings); - auto results = cudf::strings::findall_re(strings_view, medium_regex); + auto results = cudf::strings::findall(strings_view, medium_regex); EXPECT_TRUE(results->num_columns() == 2); std::vector h_expected1{"first words 1234", nullptr}; @@ -115,9 +137,11 @@ TEST_F(StringsFindallTests, LargeRegex) std::vector h_strings{ "hello @abc @def world The quick brown @fox jumps over the lazy @dog hello " "http://www.world.com I'm here @home zzzz", - "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234" + "12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012" + "34" "5678901234567890", - "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnop" + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmn" + "op" "qrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"}; cudf::test::strings_column_wrapper strings( h_strings.begin(), @@ -125,7 +149,7 @@ TEST_F(StringsFindallTests, LargeRegex) thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); auto strings_view = cudf::strings_column_view(strings); - auto results = cudf::strings::findall_re(strings_view, large_regex); + auto results = cudf::strings::findall(strings_view, large_regex); EXPECT_TRUE(results->num_columns() == 1); std::vector h_expected{large_regex.c_str(), nullptr, nullptr}; diff --git a/python/cudf/cudf/_lib/cpp/strings/findall.pxd b/python/cudf/cudf/_lib/cpp/strings/findall.pxd index 189d0770b81..5533467d72a 100644 --- a/python/cudf/cudf/_lib/cpp/strings/findall.pxd +++ b/python/cudf/cudf/_lib/cpp/strings/findall.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. from libcpp.memory cimport unique_ptr from libcpp.string cimport string @@ -10,6 +10,10 @@ from cudf._lib.cpp.table.table cimport table cdef extern from "cudf/strings/findall.hpp" namespace "cudf::strings" nogil: - cdef unique_ptr[table] findall_re( - column_view source_strings, - string pattern) except + + cdef unique_ptr[table] findall( + const column_view& source_strings, + const string& pattern) except + + + cdef unique_ptr[column] findall_record( + const column_view& source_strings, + const string& pattern) except + diff --git a/python/cudf/cudf/_lib/strings/findall.pyx b/python/cudf/cudf/_lib/strings/findall.pyx index 80af18e7c71..b17988018a6 100644 --- a/python/cudf/cudf/_lib/strings/findall.pyx +++ b/python/cudf/cudf/_lib/strings/findall.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. from libcpp.memory cimport unique_ptr from libcpp.string cimport string @@ -8,7 +8,10 @@ from cudf._lib.column cimport Column from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.scalar.scalar cimport string_scalar -from cudf._lib.cpp.strings.findall cimport findall_re as cpp_findall_re +from cudf._lib.cpp.strings.findall cimport ( + findall as cpp_findall, + findall_record as cpp_findall_record, +) from cudf._lib.cpp.table.table cimport table from cudf._lib.scalar cimport DeviceScalar from cudf._lib.utils cimport data_from_unique_ptr @@ -25,7 +28,7 @@ def findall(Column source_strings, pattern): cdef string pattern_string = str(pattern).encode() with nogil: - c_result = move(cpp_findall_re( + c_result = move(cpp_findall( source_view, pattern_string )) @@ -34,3 +37,22 @@ def findall(Column source_strings, pattern): move(c_result), column_names=range(0, c_result.get()[0].num_columns()) ) + + +def findall_record(Column source_strings, pattern): + """ + Returns data with all non-overlapping matches of `pattern` + in each string of `source_strings` as a lists column. + """ + cdef unique_ptr[column] c_result + cdef column_view source_view = source_strings.view() + + cdef string pattern_string = str(pattern).encode() + + with nogil: + c_result = move(cpp_findall_record( + source_view, + pattern_string + )) + + return Column.from_unique_ptr(move(c_result)) From cfcb3acb4431fb1bd38d632ad2a3d5ae233fe177 Mon Sep 17 00:00:00 2001 From: Ray Douglass <3107146+raydouglass@users.noreply.github.com> Date: Thu, 27 Jan 2022 11:12:25 -0500 Subject: [PATCH 2/3] Always upload cudf packages (#10147) Always upload all cudf packages Authors: - Ray Douglass (https://github.com/raydouglass) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - Jordan Jacobelli (https://github.com/Ethyling) --- ci/cpu/prebuild.sh | 29 ++++------------------------- 1 file changed, 4 insertions(+), 25 deletions(-) diff --git a/ci/cpu/prebuild.sh b/ci/cpu/prebuild.sh index 8a2c9d9be7c..1699fc16a47 100755 --- a/ci/cpu/prebuild.sh +++ b/ci/cpu/prebuild.sh @@ -3,32 +3,11 @@ # Copyright (c) 2020, NVIDIA CORPORATION. set -e -DEFAULT_CUDA_VER="11.5" -DEFAULT_PYTHON_VER="3.8" - -#Always upload cudf Python package +#Always upload cudf packages export UPLOAD_CUDF=1 - -#Upload libcudf once per CUDA -if [[ "$PYTHON" == "${DEFAULT_PYTHON_VER}" ]]; then - export UPLOAD_LIBCUDF=1 -else - export UPLOAD_LIBCUDF=0 -fi - -# upload cudf_kafka for all versions of Python -if [[ "$CUDA" == "${DEFAULT_CUDA_VER}" ]]; then - export UPLOAD_CUDF_KAFKA=1 -else - export UPLOAD_CUDF_KAFKA=0 -fi - -#We only want to upload libcudf_kafka once per python/CUDA combo -if [[ "$PYTHON" == "${DEFAULT_PYTHON_VER}" ]] && [[ "$CUDA" == "${DEFAULT_CUDA_VER}" ]]; then - export UPLOAD_LIBCUDF_KAFKA=1 -else - export UPLOAD_LIBCUDF_KAFKA=0 -fi +export UPLOAD_LIBCUDF=1 +export UPLOAD_CUDF_KAFKA=1 +export UPLOAD_LIBCUDF_KAFKA=1 if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then #If project flash is not activate, always build both From 5dd1c396a3e848f8709991b40a2aac16bf51eff7 Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Fri, 28 Jan 2022 00:56:14 +0530 Subject: [PATCH 3/3] benchmark fixture - static object pointer fix (#10145) benchmark fixture - fix sharing static object pointer, replace with static shared pointer. Authors: - Karthikeyan (https://github.com/karthikeyann) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Mike Wilson (https://github.com/hyperbolic2346) URL: https://github.com/rapidsai/cudf/pull/10145 --- cpp/benchmarks/fixture/benchmark_fixture.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/fixture/benchmark_fixture.hpp b/cpp/benchmarks/fixture/benchmark_fixture.hpp index ca3a748ccad..5f23cbbafdd 100644 --- a/cpp/benchmarks/fixture/benchmark_fixture.hpp +++ b/cpp/benchmarks/fixture/benchmark_fixture.hpp @@ -32,8 +32,9 @@ inline auto make_cuda() { return std::make_shared inline auto make_pool_instance() { static rmm::mr::cuda_memory_resource cuda_mr; - static rmm::mr::pool_memory_resource pool_mr{&cuda_mr}; - return std::shared_ptr(&pool_mr); + static auto pool_mr = + std::make_shared>(&cuda_mr); + return pool_mr; } } // namespace