Skip to content

Commit

Permalink
Fix regex octal parsing to limit to 3 characters (#10233)
Browse files Browse the repository at this point in the history
Closes #10223 

Fix the regex octal pattern logic to stop parsing after 3 digit characters.
Updated the gtest to include adjacent octal and digit characters.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Mark Harris (https://github.com/harrism)

URL: #10233
  • Loading branch information
davidwendt authored Feb 8, 2022
1 parent 8af4e84 commit 1bc3727
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 6 deletions.
8 changes: 5 additions & 3 deletions cpp/src/strings/regex/regcomp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,11 +257,13 @@ class regex_parser {
if (quoted) {
// treating all quoted numbers as Octal, since we are not supporting backreferences
if (yy >= '0' && yy <= '7') {
yy = yy - '0';
char32_t c = *exprp;
while (c >= '0' && c <= '7') {
yy = yy - '0';
auto c = *exprp;
auto digits = 1;
while (c >= '0' && c <= '7' && digits < 3) {
yy = (yy << 3) | (c - '0');
c = *(++exprp);
++digits;
}
return CHAR;
} else {
Expand Down
6 changes: 3 additions & 3 deletions cpp/tests/strings/contains_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,14 +239,14 @@ TEST_F(StringsContainsTests, MatchesIPV4Test)

TEST_F(StringsContainsTests, OctalTest)
{
cudf::test::strings_column_wrapper strings({"AZ", "B", "CDAZEY", ""});
cudf::test::strings_column_wrapper strings({"A3", "B", "CDA3EY", ""});
auto strings_view = cudf::strings_column_view(strings);
cudf::test::fixed_width_column_wrapper<bool> expected({1, 0, 1, 0});
auto results = cudf::strings::contains_re(strings_view, "\\101");
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
results = cudf::strings::contains_re(strings_view, "\\101Z");
results = cudf::strings::contains_re(strings_view, "\\1013");
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
results = cudf::strings::contains_re(strings_view, "D*\\101\\132");
results = cudf::strings::contains_re(strings_view, "D*\\101\\063");
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
}

Expand Down

0 comments on commit 1bc3727

Please sign in to comment.