Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change pattern parameter for regex APIs from std::string to std::string_view #10810

Merged
merged 8 commits into from
May 13, 2022
8 changes: 4 additions & 4 deletions cpp/include/cudf/strings/contains.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -51,7 +51,7 @@ namespace strings {
*/
std::unique_ptr<column> contains_re(
strings_column_view const& strings,
std::string const& pattern,
std::string_view pattern,
regex_flags const flags = regex_flags::DEFAULT,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

Expand All @@ -78,7 +78,7 @@ std::unique_ptr<column> contains_re(
*/
std::unique_ptr<column> matches_re(
strings_column_view const& strings,
std::string const& pattern,
std::string_view pattern,
regex_flags const flags = regex_flags::DEFAULT,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

Expand All @@ -105,7 +105,7 @@ std::unique_ptr<column> matches_re(
*/
std::unique_ptr<column> count_re(
strings_column_view const& strings,
std::string const& pattern,
std::string_view pattern,
regex_flags const flags = regex_flags::DEFAULT,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/strings/extract.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ namespace strings {
*/
std::unique_ptr<table> extract(
strings_column_view const& strings,
std::string const& pattern,
std::string_view pattern,
regex_flags const flags = regex_flags::DEFAULT,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

Expand Down Expand Up @@ -90,7 +90,7 @@ std::unique_ptr<table> extract(
*/
std::unique_ptr<column> extract_all_record(
strings_column_view const& strings,
std::string const& pattern,
std::string_view pattern,
regex_flags const flags = regex_flags::DEFAULT,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/strings/findall.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ namespace strings {
*/
std::unique_ptr<table> findall(
strings_column_view const& input,
std::string const& pattern,
std::string_view pattern,
regex_flags const flags = regex_flags::DEFAULT,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

Expand Down Expand Up @@ -90,7 +90,7 @@ std::unique_ptr<table> findall(
*/
std::unique_ptr<column> findall_record(
strings_column_view const& input,
std::string const& pattern,
std::string_view pattern,
regex_flags const flags = regex_flags::DEFAULT,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

Expand Down
6 changes: 3 additions & 3 deletions cpp/include/cudf/strings/replace_re.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ namespace strings {
*/
std::unique_ptr<column> replace_re(
strings_column_view const& strings,
std::string const& pattern,
std::string_view pattern,
string_scalar const& replacement = string_scalar(""),
std::optional<size_type> max_replace_count = std::nullopt,
regex_flags const flags = regex_flags::DEFAULT,
Expand Down Expand Up @@ -98,8 +98,8 @@ std::unique_ptr<column> replace_re(
*/
std::unique_ptr<column> replace_with_backrefs(
strings_column_view const& strings,
std::string const& pattern,
std::string const& replacement,
std::string_view pattern,
std::string_view replacement,
regex_flags const flags = regex_flags::DEFAULT,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

Expand Down
8 changes: 4 additions & 4 deletions cpp/include/cudf/strings/split/split_re.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ namespace strings {
*/
std::unique_ptr<table> split_re(
strings_column_view const& input,
std::string const& pattern,
std::string_view pattern,
size_type maxsplit = -1,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

Expand Down Expand Up @@ -121,7 +121,7 @@ std::unique_ptr<table> split_re(
*/
std::unique_ptr<table> rsplit_re(
strings_column_view const& input,
std::string const& pattern,
std::string_view pattern,
size_type maxsplit = -1,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

Expand Down Expand Up @@ -173,7 +173,7 @@ std::unique_ptr<table> rsplit_re(
*/
std::unique_ptr<column> split_record_re(
strings_column_view const& input,
std::string const& pattern,
std::string_view pattern,
size_type maxsplit = -1,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

Expand Down Expand Up @@ -227,7 +227,7 @@ std::unique_ptr<column> split_record_re(
*/
std::unique_ptr<column> rsplit_record_re(
strings_column_view const& input,
std::string const& pattern,
std::string_view pattern,
size_type maxsplit = -1,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

Expand Down
14 changes: 7 additions & 7 deletions cpp/src/strings/contains.cu
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ struct contains_fn {
};

std::unique_ptr<column> contains_impl(strings_column_view const& input,
std::string const& pattern,
std::string_view pattern,
regex_flags const flags,
bool const beginning_only,
rmm::cuda_stream_view stream,
Expand Down Expand Up @@ -85,7 +85,7 @@ std::unique_ptr<column> contains_impl(strings_column_view const& input,

std::unique_ptr<column> contains_re(
strings_column_view const& input,
std::string const& pattern,
std::string_view pattern,
regex_flags const flags,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
Expand All @@ -95,7 +95,7 @@ std::unique_ptr<column> contains_re(

std::unique_ptr<column> matches_re(
strings_column_view const& input,
std::string const& pattern,
std::string_view pattern,
regex_flags const flags,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
Expand All @@ -105,7 +105,7 @@ std::unique_ptr<column> matches_re(

std::unique_ptr<column> count_re(
strings_column_view const& input,
std::string const& pattern,
std::string_view pattern,
regex_flags const flags,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
Expand All @@ -128,7 +128,7 @@ std::unique_ptr<column> count_re(
// external APIs

std::unique_ptr<column> contains_re(strings_column_view const& strings,
std::string const& pattern,
std::string_view pattern,
regex_flags const flags,
rmm::mr::device_memory_resource* mr)
{
Expand All @@ -137,7 +137,7 @@ std::unique_ptr<column> contains_re(strings_column_view const& strings,
}

std::unique_ptr<column> matches_re(strings_column_view const& strings,
std::string const& pattern,
std::string_view pattern,
regex_flags const flags,
rmm::mr::device_memory_resource* mr)
{
Expand All @@ -146,7 +146,7 @@ std::unique_ptr<column> matches_re(strings_column_view const& strings,
}

std::unique_ptr<column> count_re(strings_column_view const& strings,
std::string const& pattern,
std::string_view pattern,
regex_flags const flags,
rmm::mr::device_memory_resource* mr)
{
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/strings/extract/extract.cu
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ struct extract_fn {

//
std::unique_ptr<table> extract(strings_column_view const& input,
std::string const& pattern,
std::string_view pattern,
regex_flags const flags,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand Down Expand Up @@ -130,7 +130,7 @@ std::unique_ptr<table> extract(strings_column_view const& input,
// external API

std::unique_ptr<table> extract(strings_column_view const& strings,
std::string const& pattern,
std::string_view pattern,
regex_flags const flags,
rmm::mr::device_memory_resource* mr)
{
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/strings/extract/extract_all.cu
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ struct extract_fn {
*/
std::unique_ptr<column> extract_all_record(
strings_column_view const& input,
std::string const& pattern,
std::string_view pattern,
regex_flags const flags,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
Expand Down Expand Up @@ -165,7 +165,7 @@ std::unique_ptr<column> extract_all_record(
// external API

std::unique_ptr<column> extract_all_record(strings_column_view const& strings,
std::string const& pattern,
std::string_view pattern,
regex_flags const flags,
rmm::mr::device_memory_resource* mr)
{
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/strings/regex/regex.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ class reprog_device {
* @return The program device object.
*/
static std::unique_ptr<reprog_device, std::function<void(reprog_device*)>> create(
std::string const& pattern, rmm::cuda_stream_view stream);
std::string_view pattern, rmm::cuda_stream_view stream);

/**
* @brief Create the device program instance from a regex pattern.
Expand All @@ -99,7 +99,7 @@ class reprog_device {
* @return The program device object.
*/
static std::unique_ptr<reprog_device, std::function<void(reprog_device*)>> create(
std::string const& pattern, regex_flags const re_flags, rmm::cuda_stream_view stream);
std::string_view pattern, regex_flags const re_flags, rmm::cuda_stream_view stream);

/**
* @brief Called automatically by the unique_ptr returned from create().
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/strings/regex/regexec.cu
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@ reprog_device::reprog_device(reprog& prog)
}

std::unique_ptr<reprog_device, std::function<void(reprog_device*)>> reprog_device::create(
std::string const& pattern, rmm::cuda_stream_view stream)
std::string_view pattern, rmm::cuda_stream_view stream)
{
return reprog_device::create(pattern, regex_flags::MULTILINE, stream);
}

// Create instance of the reprog that can be passed into a device kernel
std::unique_ptr<reprog_device, std::function<void(reprog_device*)>> reprog_device::create(
std::string const& pattern, regex_flags const flags, rmm::cuda_stream_view stream)
std::string_view pattern, regex_flags const flags, rmm::cuda_stream_view stream)
{
// compile pattern into host object
reprog h_prog = reprog::create_from(pattern, flags);
Expand Down
19 changes: 10 additions & 9 deletions cpp/src/strings/replace/backref_re.cu
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,14 @@ namespace {
*
* Reference: https://www.regular-expressions.info/refreplacebackref.html
*/
std::string get_backref_pattern(std::string const& repl)
std::string get_backref_pattern(std::string_view repl)
{
std::string const backslash_pattern = "\\\\(\\d+)";
std::string const bracket_pattern = "\\$\\{(\\d+)\\}";
std::smatch m;
return std::regex_search(repl, m, std::regex(backslash_pattern)) ? backslash_pattern
: bracket_pattern;
std::string r{repl};
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
return std::regex_search(r, m, std::regex(backslash_pattern)) ? backslash_pattern
: bracket_pattern;
}
/**
* @brief Parse the back-ref index and position values from a given replace format.
Expand All @@ -66,11 +67,11 @@ std::string get_backref_pattern(std::string const& repl)
* For example, for input string 'hello \2 and \1' the returned `backref_type` vector
* contains `[(2,6),(1,11)]` and the returned string is 'hello and '.
*/
std::pair<std::string, std::vector<backref_type>> parse_backrefs(std::string const& repl,
std::pair<std::string, std::vector<backref_type>> parse_backrefs(std::string_view repl,
int const group_count)
{
std::vector<backref_type> backrefs;
std::string str = repl; // make a modifiable copy
std::string str{repl}; // make a modifiable copy
std::smatch m;
std::regex ex(get_backref_pattern(repl));
std::string rtn;
Expand Down Expand Up @@ -100,8 +101,8 @@ std::pair<std::string, std::vector<backref_type>> parse_backrefs(std::string con

//
std::unique_ptr<column> replace_with_backrefs(strings_column_view const& input,
std::string const& pattern,
std::string const& replacement,
std::string_view pattern,
std::string_view replacement,
regex_flags const flags,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand Down Expand Up @@ -144,8 +145,8 @@ std::unique_ptr<column> replace_with_backrefs(strings_column_view const& input,
// external API

std::unique_ptr<column> replace_with_backrefs(strings_column_view const& strings,
std::string const& pattern,
std::string const& replacement,
std::string_view pattern,
std::string_view replacement,
regex_flags const flags,
rmm::mr::device_memory_resource* mr)
{
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/strings/replace/replace_re.cu
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ struct replace_regex_fn {
//
std::unique_ptr<column> replace_re(
strings_column_view const& input,
std::string const& pattern,
std::string_view pattern,
string_scalar const& replacement,
std::optional<size_type> max_replace_count,
regex_flags const flags,
Expand Down Expand Up @@ -135,7 +135,7 @@ std::unique_ptr<column> replace_re(
// external API

std::unique_ptr<column> replace_re(strings_column_view const& strings,
std::string const& pattern,
std::string_view pattern,
string_scalar const& replacement,
std::optional<size_type> max_replace_count,
regex_flags const flags,
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/strings/search/findall.cu
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ struct findall_fn {
} // namespace

std::unique_ptr<table> findall(strings_column_view const& input,
std::string const& pattern,
std::string_view pattern,
regex_flags const flags,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand Down Expand Up @@ -147,7 +147,7 @@ std::unique_ptr<table> findall(strings_column_view const& input,
// external API

std::unique_ptr<table> findall(strings_column_view const& input,
std::string const& pattern,
std::string_view pattern,
regex_flags const flags,
rmm::mr::device_memory_resource* mr)
{
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/strings/search/findall_record.cu
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ std::unique_ptr<column> findall_util(column_device_view const& d_strings,
//
std::unique_ptr<column> findall_record(
strings_column_view const& input,
std::string const& pattern,
std::string_view pattern,
regex_flags const flags,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
Expand Down Expand Up @@ -133,7 +133,7 @@ std::unique_ptr<column> findall_record(
// external API

std::unique_ptr<column> findall_record(strings_column_view const& input,
std::string const& pattern,
std::string_view pattern,
regex_flags const flags,
rmm::mr::device_memory_resource* mr)
{
Expand Down
Loading