Skip to content

Commit

Permalink
Fix regex octal parsing to limit to 3 characters
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed Feb 7, 2022
1 parent 2e458b9 commit 13e7093
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 6 deletions.
8 changes: 5 additions & 3 deletions cpp/src/strings/regex/regcomp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,11 +257,13 @@ class regex_parser {
if (quoted) {
// treating all quoted numbers as Octal, since we are not supporting backreferences
if (yy >= '0' && yy <= '7') {
yy = yy - '0';
char32_t c = *exprp;
while (c >= '0' && c <= '7') {
yy = yy - '0';
auto c = *exprp;
auto digits = 1;
while (c >= '0' && c <= '7' && digits < 3) {
yy = (yy << 3) | (c - '0');
c = *(++exprp);
++digits;
}
return CHAR;
} else {
Expand Down
6 changes: 3 additions & 3 deletions cpp/tests/strings/contains_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,14 +239,14 @@ TEST_F(StringsContainsTests, MatchesIPV4Test)

TEST_F(StringsContainsTests, OctalTest)
{
cudf::test::strings_column_wrapper strings({"AZ", "B", "CDAZEY", ""});
cudf::test::strings_column_wrapper strings({"A3", "B", "CDA3EY", ""});
auto strings_view = cudf::strings_column_view(strings);
cudf::test::fixed_width_column_wrapper<bool> expected({1, 0, 1, 0});
auto results = cudf::strings::contains_re(strings_view, "\\101");
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
results = cudf::strings::contains_re(strings_view, "\\101Z");
results = cudf::strings::contains_re(strings_view, "\\1013");
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
results = cudf::strings::contains_re(strings_view, "D*\\101\\132");
results = cudf::strings::contains_re(strings_view, "D*\\101\\063");
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
}

Expand Down

0 comments on commit 13e7093

Please sign in to comment.