From 13e70937beade5eda1c956d7dd786b7c049a8936 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Mon, 7 Feb 2022 11:50:38 -0500 Subject: [PATCH] Fix regex octal parsing to limit to 3 characters --- cpp/src/strings/regex/regcomp.cpp | 8 +++++--- cpp/tests/strings/contains_tests.cpp | 6 +++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/cpp/src/strings/regex/regcomp.cpp b/cpp/src/strings/regex/regcomp.cpp index 244cec1d780..065c358d08b 100644 --- a/cpp/src/strings/regex/regcomp.cpp +++ b/cpp/src/strings/regex/regcomp.cpp @@ -257,11 +257,13 @@ class regex_parser { if (quoted) { // treating all quoted numbers as Octal, since we are not supporting backreferences if (yy >= '0' && yy <= '7') { - yy = yy - '0'; - char32_t c = *exprp; - while (c >= '0' && c <= '7') { + yy = yy - '0'; + auto c = *exprp; + auto digits = 1; + while (c >= '0' && c <= '7' && digits < 3) { yy = (yy << 3) | (c - '0'); c = *(++exprp); + ++digits; } return CHAR; } else { diff --git a/cpp/tests/strings/contains_tests.cpp b/cpp/tests/strings/contains_tests.cpp index 12a00aa35ab..a72ec61dd8f 100644 --- a/cpp/tests/strings/contains_tests.cpp +++ b/cpp/tests/strings/contains_tests.cpp @@ -239,14 +239,14 @@ TEST_F(StringsContainsTests, MatchesIPV4Test) TEST_F(StringsContainsTests, OctalTest) { - cudf::test::strings_column_wrapper strings({"AZ", "B", "CDAZEY", ""}); + cudf::test::strings_column_wrapper strings({"A3", "B", "CDA3EY", ""}); auto strings_view = cudf::strings_column_view(strings); cudf::test::fixed_width_column_wrapper expected({1, 0, 1, 0}); auto results = cudf::strings::contains_re(strings_view, "\\101"); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); - results = cudf::strings::contains_re(strings_view, "\\101Z"); + results = cudf::strings::contains_re(strings_view, "\\1013"); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); - results = cudf::strings::contains_re(strings_view, "D*\\101\\132"); + results = cudf::strings::contains_re(strings_view, "D*\\101\\063"); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); }