Skip to content

Commit

Permalink
Fix invalid-device-fn error in cudf::strings::replace_re with multipl…
Browse files Browse the repository at this point in the history
…e regex's (#7336)

Found errors when testing libcudf built with gcc-9 and nvcc 11.1.
```
STRINGS_TEST : StringsReplaceTests.ReplaceMultiRegexTest
MERGE_TEST : MergeStringTest/8.Merge2StringKeyColumns (with BOOL8 column types only)
PARTITIONING_TEST : RoundRobinTest/8.RoundRobin (tests with BOOL8 column types only)
```
The strings test cause an `invalid-device-function` error. The code for `cudf::strings::replace_re` that accepts multiple regex's was fixed to better manage the device memory holding the regex structures. The error occurred when cleaning up the temporary memory.

The other two tests failed because the iterator used for generating the expected data (in CPU code) was producing the wrong results. This may be a gcc optimization bug since adding a printf cleared the error. I was able to recode the iterator's functor so it would succeed on both gcc9 and gcc7.

Authors:
  - David (@davidwendt)

Approvers:
  - @nvdbaranec
  - Paul Taylor (@trxcllnt)

URL: #7336
  • Loading branch information
davidwendt authored Feb 10, 2021
1 parent efcd52d commit da3ab29
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 68 deletions.
44 changes: 24 additions & 20 deletions cpp/src/strings/replace/multi_re.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -21,8 +21,8 @@
#include <cudf/column/column.hpp>
#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/null_mask.hpp>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/replace_re.hpp>
#include <cudf/strings/string_view.cuh>
Expand Down Expand Up @@ -97,13 +97,12 @@ struct replace_multi_regex_fn {
}
// all the ranges have been updated from each regex match;
// look for any that match at this character position (ch_pos)
auto itr = thrust::find_if(
thrust::seq, d_ranges, d_ranges + number_of_patterns, [ch_pos] __device__(auto range) {
auto itr =
thrust::find_if(thrust::seq, d_ranges, d_ranges + number_of_patterns, [ch_pos](auto range) {
return range.first == ch_pos;
});
if (itr !=
d_ranges +
number_of_patterns) { // match found, compute and replace the string in the output
if (itr != d_ranges + number_of_patterns) {
// match found, compute and replace the string in the output
size_type ptn_idx = static_cast<size_type>(itr - d_ranges);
size_type begin = d_ranges[ptn_idx].first;
size_type end = d_ranges[ptn_idx].second;
Expand Down Expand Up @@ -149,22 +148,27 @@ std::unique_ptr<column> replace_re(
auto repls_column = column_device_view::create(repls.parent(), stream);
auto d_repls = *repls_column;
auto d_flags = get_character_flags_table();

// compile regexes into device objects
size_type regex_insts = 0;
std::vector<std::unique_ptr<reprog_device, std::function<void(reprog_device*)>>> h_progs;
rmm::device_vector<reprog_device> progs;
thrust::host_vector<reprog_device> progs;
for (auto itr = patterns.begin(); itr != patterns.end(); ++itr) {
auto prog = reprog_device::create(*itr, d_flags, strings_count, stream);
auto insts = prog->insts_counts();
if (insts > regex_insts) regex_insts = insts;
auto prog = reprog_device::create(*itr, d_flags, strings_count, stream);
regex_insts = std::max(regex_insts, prog->insts_counts());
progs.push_back(*prog);
h_progs.emplace_back(std::move(prog));
}
auto d_progs = progs.data().get();

// copy null mask
auto null_mask = copy_bitmask(strings.parent());
auto null_count = strings.null_count();
// copy all the reprog_device instances to a device memory array
rmm::device_buffer progs_buffer{sizeof(reprog_device) * progs.size()};
CUDA_TRY(cudaMemcpyAsync(progs_buffer.data(),
progs.data(),
progs.size() * sizeof(reprog_device),
cudaMemcpyHostToDevice,
stream.value()));
reprog_device* d_progs = reinterpret_cast<reprog_device*>(progs_buffer.data());

// create working buffer for ranges pairs
rmm::device_vector<found_range> found_ranges(patterns.size() * strings_count);
auto d_found_ranges = found_ranges.data().get();
Expand All @@ -178,31 +182,31 @@ std::unique_ptr<column> replace_re(
replace_multi_regex_fn<RX_STACK_SMALL>{
d_strings, d_progs, static_cast<size_type>(progs.size()), d_found_ranges, d_repls},
strings_count,
null_count,
strings.null_count(),
stream,
mr);
else if (regex_insts <= RX_MEDIUM_INSTS)
children = make_strings_children(
replace_multi_regex_fn<RX_STACK_MEDIUM>{
d_strings, d_progs, static_cast<size_type>(progs.size()), d_found_ranges, d_repls},
strings_count,
null_count,
strings.null_count(),
stream,
mr);
else
children = make_strings_children(
replace_multi_regex_fn<RX_STACK_LARGE>{
d_strings, d_progs, static_cast<size_type>(progs.size()), d_found_ranges, d_repls},
strings_count,
null_count,
strings.null_count(),
stream,
mr);

return make_strings_column(strings_count,
std::move(children.first),
std::move(children.second),
null_count,
std::move(null_mask),
strings.null_count(),
cudf::detail::copy_bitmask(strings.parent(), stream, mr),
stream,
mr);
}
Expand Down
21 changes: 9 additions & 12 deletions cpp/tests/merge/merge_string_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,12 +188,10 @@ TYPED_TEST(MergeStringTest, Merge2StringKeyColumns)
"hi",
"hj"});

auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [outputRows](auto row) {
if (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8) {
return (row % 2 == 0) ? 1 : 0;
} else
return (row);
});
auto seq_out2 = cudf::detail::make_counting_transform_iterator(
0, [bool8 = (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8)](auto row) {
return bool8 ? static_cast<decltype(row)>(row % 2 == 0) : row;
});
fixed_width_column_wrapper<TypeParam, typename decltype(seq_out2)::value_type> expectedDataWrap2(
seq_out2, seq_out2 + outputRows);

Expand Down Expand Up @@ -376,12 +374,11 @@ TYPED_TEST(MergeStringTest, Merge2StringKeyNullColumns)
"hj"},
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0});

auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [outputRows](auto row) {
if (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8) {
return (row % 2 == 0) ? 1 : 0;
} else
return (row);
});
auto seq_out2 = cudf::detail::make_counting_transform_iterator(
0, [bool8 = (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8)](auto row) {
return bool8 ? static_cast<decltype(row)>(row % 2 == 0) : row;
});

fixed_width_column_wrapper<TypeParam, typename decltype(seq_out2)::value_type> expectedDataWrap2(
seq_out2, seq_out2 + outputRows);

Expand Down
60 changes: 24 additions & 36 deletions cpp/tests/partitioning/round_robin_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,10 @@ TYPED_TEST(RoundRobinTest, RoundRobinPartitions13_3)

cudf::size_type inputRows = static_cast<cudf::column_view const&>(rrColWrap1).size();

auto sequence_l = cudf::detail::make_counting_transform_iterator(0, [](auto row) {
if (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8) {
return (row % 2 == 0) ? 1 : 0;
} else
return row;
});
auto sequence_l = cudf::detail::make_counting_transform_iterator(
0, [bool8 = (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8)](auto row) {
return bool8 ? static_cast<decltype(row)>(row % 2 == 0) : row;
});

cudf::test::fixed_width_column_wrapper<TypeParam, typename decltype(sequence_l)::value_type>
rrColWrap2(sequence_l, sequence_l + inputRows);
Expand Down Expand Up @@ -191,12 +189,10 @@ TYPED_TEST(RoundRobinTest, RoundRobinPartitions11_3)

cudf::size_type inputRows = static_cast<cudf::column_view const&>(rrColWrap1).size();

auto sequence_l = cudf::detail::make_counting_transform_iterator(0, [](auto row) {
if (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8) {
return (row % 2 == 0) ? 1 : 0;
} else
return row;
});
auto sequence_l = cudf::detail::make_counting_transform_iterator(
0, [bool8 = (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8)](auto row) {
return bool8 ? static_cast<decltype(row)>(row % 2 == 0) : row;
});

cudf::test::fixed_width_column_wrapper<TypeParam, typename decltype(sequence_l)::value_type>
rrColWrap2(sequence_l, sequence_l + inputRows);
Expand Down Expand Up @@ -324,12 +320,10 @@ TYPED_TEST(RoundRobinTest, RoundRobinDegeneratePartitions11_15)

cudf::size_type inputRows = static_cast<cudf::column_view const&>(rrColWrap1).size();

auto sequence_l = cudf::detail::make_counting_transform_iterator(0, [](auto row) {
if (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8) {
return (row % 2 == 0) ? 1 : 0;
} else
return row;
});
auto sequence_l = cudf::detail::make_counting_transform_iterator(
0, [bool8 = (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8)](auto row) {
return bool8 ? static_cast<decltype(row)>(row % 2 == 0) : row;
});

cudf::test::fixed_width_column_wrapper<TypeParam, typename decltype(sequence_l)::value_type>
rrColWrap2(sequence_l, sequence_l + inputRows);
Expand Down Expand Up @@ -460,12 +454,10 @@ TYPED_TEST(RoundRobinTest, RoundRobinDegeneratePartitions11_11)

cudf::size_type inputRows = static_cast<cudf::column_view const&>(rrColWrap1).size();

auto sequence_l = cudf::detail::make_counting_transform_iterator(0, [](auto row) {
if (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8) {
return (row % 2 == 0) ? 1 : 0;
} else
return row;
});
auto sequence_l = cudf::detail::make_counting_transform_iterator(
0, [bool8 = (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8)](auto row) {
return bool8 ? static_cast<decltype(row)>(row % 2 == 0) : row;
});

cudf::test::fixed_width_column_wrapper<TypeParam, typename decltype(sequence_l)::value_type>
rrColWrap2(sequence_l, sequence_l + inputRows);
Expand Down Expand Up @@ -528,12 +520,10 @@ TYPED_TEST(RoundRobinTest, RoundRobinNPartitionsDivideNRows)

cudf::size_type inputRows = static_cast<cudf::column_view const&>(rrColWrap1).size();

auto sequence_l = cudf::detail::make_counting_transform_iterator(0, [](auto row) {
if (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8) {
return (row % 2 == 0) ? 1 : 0;
} else
return row;
});
auto sequence_l = cudf::detail::make_counting_transform_iterator(
0, [bool8 = (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8)](auto row) {
return bool8 ? static_cast<decltype(row)>(row % 2 == 0) : row;
});

cudf::test::fixed_width_column_wrapper<TypeParam, typename decltype(sequence_l)::value_type>
rrColWrap2(sequence_l, sequence_l + inputRows);
Expand Down Expand Up @@ -644,12 +634,10 @@ TYPED_TEST(RoundRobinTest, RoundRobinSinglePartition)

cudf::size_type inputRows = static_cast<cudf::column_view const&>(rrColWrap1).size();

auto sequence_l = cudf::detail::make_counting_transform_iterator(0, [](auto row) {
if (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8) {
return (row % 2 == 0) ? 1 : 0;
} else
return row;
});
auto sequence_l = cudf::detail::make_counting_transform_iterator(
0, [bool8 = (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8)](auto row) {
return bool8 ? static_cast<decltype(row)>(row % 2 == 0) : row;
});

cudf::test::fixed_width_column_wrapper<TypeParam, typename decltype(sequence_l)::value_type>
rrColWrap2(sequence_l, sequence_l + inputRows);
Expand Down

0 comments on commit da3ab29

Please sign in to comment.