From e3de42319827ce0f41d47390b6385902454e9274 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Fri, 6 May 2022 16:46:35 -0400 Subject: [PATCH 1/3] Change pattern parameter for regex APIs from std::string to std::string_view --- cpp/include/cudf/strings/contains.hpp | 8 ++++---- cpp/include/cudf/strings/extract.hpp | 4 ++-- cpp/include/cudf/strings/findall.hpp | 4 ++-- cpp/include/cudf/strings/replace_re.hpp | 4 ++-- cpp/include/cudf/strings/split/split_re.hpp | 8 ++++---- cpp/src/strings/contains.cu | 14 +++++++------- cpp/src/strings/extract/extract.cu | 4 ++-- cpp/src/strings/extract/extract_all.cu | 4 ++-- cpp/src/strings/regex/regex.cuh | 4 ++-- cpp/src/strings/regex/regexec.cu | 4 ++-- cpp/src/strings/replace/backref_re.cu | 4 ++-- cpp/src/strings/replace/replace_re.cu | 4 ++-- cpp/src/strings/search/findall.cu | 4 ++-- cpp/src/strings/search/findall_record.cu | 4 ++-- cpp/src/strings/split/split_re.cu | 20 ++++++++++---------- 15 files changed, 47 insertions(+), 47 deletions(-) diff --git a/cpp/include/cudf/strings/contains.hpp b/cpp/include/cudf/strings/contains.hpp index 9f408a40314..5b8b2f56bae 100644 --- a/cpp/include/cudf/strings/contains.hpp +++ b/cpp/include/cudf/strings/contains.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,7 +51,7 @@ namespace strings { */ std::unique_ptr contains_re( strings_column_view const& strings, - std::string const& pattern, + std::string_view pattern, regex_flags const flags = regex_flags::DEFAULT, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -78,7 +78,7 @@ std::unique_ptr contains_re( */ std::unique_ptr matches_re( strings_column_view const& strings, - std::string const& pattern, + std::string_view pattern, regex_flags const flags = regex_flags::DEFAULT, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -105,7 +105,7 @@ std::unique_ptr matches_re( */ std::unique_ptr count_re( strings_column_view const& strings, - std::string const& pattern, + std::string_view pattern, regex_flags const flags = regex_flags::DEFAULT, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/strings/extract.hpp b/cpp/include/cudf/strings/extract.hpp index 94e9f36d7d3..680d0f5b7bc 100644 --- a/cpp/include/cudf/strings/extract.hpp +++ b/cpp/include/cudf/strings/extract.hpp @@ -55,7 +55,7 @@ namespace strings { */ std::unique_ptr extract( strings_column_view const& strings, - std::string const& pattern, + std::string_view pattern, regex_flags const flags = regex_flags::DEFAULT, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -90,7 +90,7 @@ std::unique_ptr
extract( */ std::unique_ptr extract_all_record( strings_column_view const& strings, - std::string const& pattern, + std::string_view pattern, regex_flags const flags = regex_flags::DEFAULT, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/strings/findall.hpp b/cpp/include/cudf/strings/findall.hpp index 25ebdc61673..25c6d523250 100644 --- a/cpp/include/cudf/strings/findall.hpp +++ b/cpp/include/cudf/strings/findall.hpp @@ -56,7 +56,7 @@ namespace strings { */ std::unique_ptr
findall( strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, regex_flags const flags = regex_flags::DEFAULT, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -90,7 +90,7 @@ std::unique_ptr
findall( */ std::unique_ptr findall_record( strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, regex_flags const flags = regex_flags::DEFAULT, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/strings/replace_re.hpp b/cpp/include/cudf/strings/replace_re.hpp index 0ab3953470d..7946bd67391 100644 --- a/cpp/include/cudf/strings/replace_re.hpp +++ b/cpp/include/cudf/strings/replace_re.hpp @@ -50,7 +50,7 @@ namespace strings { */ std::unique_ptr replace_re( strings_column_view const& strings, - std::string const& pattern, + std::string_view pattern, string_scalar const& replacement = string_scalar(""), std::optional max_replace_count = std::nullopt, regex_flags const flags = regex_flags::DEFAULT, @@ -98,7 +98,7 @@ std::unique_ptr replace_re( */ std::unique_ptr replace_with_backrefs( strings_column_view const& strings, - std::string const& pattern, + std::string_view pattern, std::string const& replacement, regex_flags const flags = regex_flags::DEFAULT, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/strings/split/split_re.hpp b/cpp/include/cudf/strings/split/split_re.hpp index 9f40956722d..57246bd91d2 100644 --- a/cpp/include/cudf/strings/split/split_re.hpp +++ b/cpp/include/cudf/strings/split/split_re.hpp @@ -71,7 +71,7 @@ namespace strings { */ std::unique_ptr
split_re( strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, size_type maxsplit = -1, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -121,7 +121,7 @@ std::unique_ptr
split_re( */ std::unique_ptr
rsplit_re( strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, size_type maxsplit = -1, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -173,7 +173,7 @@ std::unique_ptr
rsplit_re( */ std::unique_ptr split_record_re( strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, size_type maxsplit = -1, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -227,7 +227,7 @@ std::unique_ptr split_record_re( */ std::unique_ptr rsplit_record_re( strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, size_type maxsplit = -1, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/src/strings/contains.cu b/cpp/src/strings/contains.cu index 987cd076fd0..d75d914bb8e 100644 --- a/cpp/src/strings/contains.cu +++ b/cpp/src/strings/contains.cu @@ -56,7 +56,7 @@ struct contains_fn { }; std::unique_ptr contains_impl(strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, regex_flags const flags, bool const beginning_only, rmm::cuda_stream_view stream, @@ -85,7 +85,7 @@ std::unique_ptr contains_impl(strings_column_view const& input, std::unique_ptr contains_re( strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, regex_flags const flags, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) @@ -95,7 +95,7 @@ std::unique_ptr contains_re( std::unique_ptr matches_re( strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, regex_flags const flags, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) @@ -105,7 +105,7 @@ std::unique_ptr matches_re( std::unique_ptr count_re( strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, regex_flags const flags, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) @@ -128,7 +128,7 @@ std::unique_ptr count_re( // external APIs std::unique_ptr contains_re(strings_column_view const& strings, - std::string const& pattern, + std::string_view pattern, regex_flags const flags, rmm::mr::device_memory_resource* mr) { @@ -137,7 +137,7 @@ std::unique_ptr contains_re(strings_column_view const& strings, } std::unique_ptr matches_re(strings_column_view const& strings, - std::string const& pattern, + std::string_view pattern, regex_flags const flags, rmm::mr::device_memory_resource* mr) { @@ -146,7 +146,7 @@ std::unique_ptr matches_re(strings_column_view const& strings, } std::unique_ptr count_re(strings_column_view const& strings, - std::string const& pattern, + std::string_view pattern, regex_flags const flags, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/src/strings/extract/extract.cu b/cpp/src/strings/extract/extract.cu index 59b90952d97..018fb7ba2fb 100644 --- a/cpp/src/strings/extract/extract.cu +++ b/cpp/src/strings/extract/extract.cu @@ -85,7 +85,7 @@ struct extract_fn { // std::unique_ptr
extract(strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, regex_flags const flags, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -130,7 +130,7 @@ std::unique_ptr
extract(strings_column_view const& input, // external API std::unique_ptr
extract(strings_column_view const& strings, - std::string const& pattern, + std::string_view pattern, regex_flags const flags, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/src/strings/extract/extract_all.cu b/cpp/src/strings/extract/extract_all.cu index 95b8a43a9d4..60c28027833 100644 --- a/cpp/src/strings/extract/extract_all.cu +++ b/cpp/src/strings/extract/extract_all.cu @@ -96,7 +96,7 @@ struct extract_fn { */ std::unique_ptr extract_all_record( strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, regex_flags const flags, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) @@ -165,7 +165,7 @@ std::unique_ptr extract_all_record( // external API std::unique_ptr extract_all_record(strings_column_view const& strings, - std::string const& pattern, + std::string_view pattern, regex_flags const flags, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/src/strings/regex/regex.cuh b/cpp/src/strings/regex/regex.cuh index 5ccc70222d5..2ee195a2c5e 100644 --- a/cpp/src/strings/regex/regex.cuh +++ b/cpp/src/strings/regex/regex.cuh @@ -88,7 +88,7 @@ class reprog_device { * @return The program device object. */ static std::unique_ptr> create( - std::string const& pattern, rmm::cuda_stream_view stream); + std::string_view pattern, rmm::cuda_stream_view stream); /** * @brief Create the device program instance from a regex pattern. @@ -99,7 +99,7 @@ class reprog_device { * @return The program device object. */ static std::unique_ptr> create( - std::string const& pattern, regex_flags const re_flags, rmm::cuda_stream_view stream); + std::string_view pattern, regex_flags const re_flags, rmm::cuda_stream_view stream); /** * @brief Called automatically by the unique_ptr returned from create(). diff --git a/cpp/src/strings/regex/regexec.cu b/cpp/src/strings/regex/regexec.cu index 4b58d9d8a88..16f5b6fa03d 100644 --- a/cpp/src/strings/regex/regexec.cu +++ b/cpp/src/strings/regex/regexec.cu @@ -43,14 +43,14 @@ reprog_device::reprog_device(reprog& prog) } std::unique_ptr> reprog_device::create( - std::string const& pattern, rmm::cuda_stream_view stream) + std::string_view pattern, rmm::cuda_stream_view stream) { return reprog_device::create(pattern, regex_flags::MULTILINE, stream); } // Create instance of the reprog that can be passed into a device kernel std::unique_ptr> reprog_device::create( - std::string const& pattern, regex_flags const flags, rmm::cuda_stream_view stream) + std::string_view pattern, regex_flags const flags, rmm::cuda_stream_view stream) { // compile pattern into host object reprog h_prog = reprog::create_from(pattern, flags); diff --git a/cpp/src/strings/replace/backref_re.cu b/cpp/src/strings/replace/backref_re.cu index 107adf07263..3befba439a5 100644 --- a/cpp/src/strings/replace/backref_re.cu +++ b/cpp/src/strings/replace/backref_re.cu @@ -100,7 +100,7 @@ std::pair> parse_backrefs(std::string con // std::unique_ptr replace_with_backrefs(strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, std::string const& replacement, regex_flags const flags, rmm::cuda_stream_view stream, @@ -144,7 +144,7 @@ std::unique_ptr replace_with_backrefs(strings_column_view const& input, // external API std::unique_ptr replace_with_backrefs(strings_column_view const& strings, - std::string const& pattern, + std::string_view pattern, std::string const& replacement, regex_flags const flags, rmm::mr::device_memory_resource* mr) diff --git a/cpp/src/strings/replace/replace_re.cu b/cpp/src/strings/replace/replace_re.cu index 159f83453bd..1ed29587ac7 100644 --- a/cpp/src/strings/replace/replace_re.cu +++ b/cpp/src/strings/replace/replace_re.cu @@ -101,7 +101,7 @@ struct replace_regex_fn { // std::unique_ptr replace_re( strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, string_scalar const& replacement, std::optional max_replace_count, regex_flags const flags, @@ -135,7 +135,7 @@ std::unique_ptr replace_re( // external API std::unique_ptr replace_re(strings_column_view const& strings, - std::string const& pattern, + std::string_view pattern, string_scalar const& replacement, std::optional max_replace_count, regex_flags const flags, diff --git a/cpp/src/strings/search/findall.cu b/cpp/src/strings/search/findall.cu index 64e46d07e25..c92e1e7bbd9 100644 --- a/cpp/src/strings/search/findall.cu +++ b/cpp/src/strings/search/findall.cu @@ -86,7 +86,7 @@ struct findall_fn { } // namespace std::unique_ptr
findall(strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, regex_flags const flags, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -147,7 +147,7 @@ std::unique_ptr
findall(strings_column_view const& input, // external API std::unique_ptr
findall(strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, regex_flags const flags, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/src/strings/search/findall_record.cu b/cpp/src/strings/search/findall_record.cu index 2f4b9ce5b24..e4cf4dad618 100644 --- a/cpp/src/strings/search/findall_record.cu +++ b/cpp/src/strings/search/findall_record.cu @@ -93,7 +93,7 @@ std::unique_ptr findall_util(column_device_view const& d_strings, // std::unique_ptr findall_record( strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, regex_flags const flags, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) @@ -133,7 +133,7 @@ std::unique_ptr findall_record( // external API std::unique_ptr findall_record(strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, regex_flags const flags, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/src/strings/split/split_re.cu b/cpp/src/strings/split/split_re.cu index 16edd0606e9..750f5fbe942 100644 --- a/cpp/src/strings/split/split_re.cu +++ b/cpp/src/strings/split/split_re.cu @@ -184,7 +184,7 @@ struct tokens_transform_fn { }; std::unique_ptr
split_re(strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, split_direction direction, size_type maxsplit, rmm::cuda_stream_view stream, @@ -252,7 +252,7 @@ std::unique_ptr
split_re(strings_column_view const& input, } std::unique_ptr split_record_re(strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, split_direction direction, size_type maxsplit, rmm::cuda_stream_view stream, @@ -289,7 +289,7 @@ std::unique_ptr split_record_re(strings_column_view const& input, } // namespace std::unique_ptr
split_re(strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, size_type maxsplit, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -298,7 +298,7 @@ std::unique_ptr
split_re(strings_column_view const& input, } std::unique_ptr split_record_re(strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, size_type maxsplit, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -307,7 +307,7 @@ std::unique_ptr split_record_re(strings_column_view const& input, } std::unique_ptr
rsplit_re(strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, size_type maxsplit, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -316,7 +316,7 @@ std::unique_ptr
rsplit_re(strings_column_view const& input, } std::unique_ptr rsplit_record_re(strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, size_type maxsplit, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -329,7 +329,7 @@ std::unique_ptr rsplit_record_re(strings_column_view const& input, // external APIs std::unique_ptr
split_re(strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, size_type maxsplit, rmm::mr::device_memory_resource* mr) { @@ -338,7 +338,7 @@ std::unique_ptr
split_re(strings_column_view const& input, } std::unique_ptr split_record_re(strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, size_type maxsplit, rmm::mr::device_memory_resource* mr) { @@ -347,7 +347,7 @@ std::unique_ptr split_record_re(strings_column_view const& input, } std::unique_ptr
rsplit_re(strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, size_type maxsplit, rmm::mr::device_memory_resource* mr) { @@ -356,7 +356,7 @@ std::unique_ptr
rsplit_re(strings_column_view const& input, } std::unique_ptr rsplit_record_re(strings_column_view const& input, - std::string const& pattern, + std::string_view pattern, size_type maxsplit, rmm::mr::device_memory_resource* mr) { From 7d344cc82668fffe0705f010d83184269e44c466 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 11 May 2022 12:51:52 -0400 Subject: [PATCH 2/3] change backref replacement parm to string_view --- cpp/include/cudf/strings/replace_re.hpp | 2 +- cpp/src/strings/replace/backref_re.cu | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/cpp/include/cudf/strings/replace_re.hpp b/cpp/include/cudf/strings/replace_re.hpp index 7946bd67391..36c287009d0 100644 --- a/cpp/include/cudf/strings/replace_re.hpp +++ b/cpp/include/cudf/strings/replace_re.hpp @@ -99,7 +99,7 @@ std::unique_ptr replace_re( std::unique_ptr replace_with_backrefs( strings_column_view const& strings, std::string_view pattern, - std::string const& replacement, + std::string_view replacement, regex_flags const flags = regex_flags::DEFAULT, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/src/strings/replace/backref_re.cu b/cpp/src/strings/replace/backref_re.cu index 3befba439a5..49ea1bcc3fc 100644 --- a/cpp/src/strings/replace/backref_re.cu +++ b/cpp/src/strings/replace/backref_re.cu @@ -46,13 +46,14 @@ namespace { * * Reference: https://www.regular-expressions.info/refreplacebackref.html */ -std::string get_backref_pattern(std::string const& repl) +std::string get_backref_pattern(std::string_view repl) { std::string const backslash_pattern = "\\\\(\\d+)"; std::string const bracket_pattern = "\\$\\{(\\d+)\\}"; std::smatch m; - return std::regex_search(repl, m, std::regex(backslash_pattern)) ? backslash_pattern - : bracket_pattern; + std::string r{repl}; + return std::regex_search(r, m, std::regex(backslash_pattern)) ? backslash_pattern + : bracket_pattern; } /** * @brief Parse the back-ref index and position values from a given replace format. @@ -66,11 +67,11 @@ std::string get_backref_pattern(std::string const& repl) * For example, for input string 'hello \2 and \1' the returned `backref_type` vector * contains `[(2,6),(1,11)]` and the returned string is 'hello and '. */ -std::pair> parse_backrefs(std::string const& repl, +std::pair> parse_backrefs(std::string_view repl, int const group_count) { std::vector backrefs; - std::string str = repl; // make a modifiable copy + std::string str{repl}; // make a modifiable copy std::smatch m; std::regex ex(get_backref_pattern(repl)); std::string rtn; @@ -101,7 +102,7 @@ std::pair> parse_backrefs(std::string con // std::unique_ptr replace_with_backrefs(strings_column_view const& input, std::string_view pattern, - std::string const& replacement, + std::string_view replacement, regex_flags const flags, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -145,7 +146,7 @@ std::unique_ptr replace_with_backrefs(strings_column_view const& input, std::unique_ptr replace_with_backrefs(strings_column_view const& strings, std::string_view pattern, - std::string const& replacement, + std::string_view replacement, regex_flags const flags, rmm::mr::device_memory_resource* mr) { From 8f5cde795ef1c40179753614c59f78ebec75ac30 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 12 May 2022 11:02:51 -0400 Subject: [PATCH 3/3] move variable r declaration --- cpp/src/strings/replace/backref_re.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/strings/replace/backref_re.cu b/cpp/src/strings/replace/backref_re.cu index 49ea1bcc3fc..55498e760ff 100644 --- a/cpp/src/strings/replace/backref_re.cu +++ b/cpp/src/strings/replace/backref_re.cu @@ -50,8 +50,8 @@ std::string get_backref_pattern(std::string_view repl) { std::string const backslash_pattern = "\\\\(\\d+)"; std::string const bracket_pattern = "\\$\\{(\\d+)\\}"; + std::string const r{repl}; std::smatch m; - std::string r{repl}; return std::regex_search(r, m, std::regex(backslash_pattern)) ? backslash_pattern : bracket_pattern; }