From d7f97ada22e1296d879e1de49b97ec9e50d32490 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Fri, 7 Jan 2022 09:29:45 -0500 Subject: [PATCH 1/2] Fix octal pattern matching in regex string --- cpp/src/strings/regex/regcomp.cpp | 8 ++++---- cpp/tests/strings/contains_tests.cpp | 15 ++++++++++++++- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/cpp/src/strings/regex/regcomp.cpp b/cpp/src/strings/regex/regcomp.cpp index 7da4915d668..9471380fe0b 100644 --- a/cpp/src/strings/regex/regcomp.cpp +++ b/cpp/src/strings/regex/regcomp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -258,10 +258,10 @@ class regex_parser { // treating all quoted numbers as Octal, since we are not supporting backreferences if (yy >= '0' && yy <= '7') { yy = yy - '0'; - char32_t c = *exprp++; + char32_t c = *exprp; while (c >= '0' && c <= '7') { yy = (yy << 3) | (c - '0'); - c = *exprp++; + c = *++exprp; } return CHAR; } else { @@ -926,7 +926,7 @@ void reprog::optimize2() _startinst_ids.push_back(-1); // terminator mark } -#ifndef NDBUG +#ifndef NDEBUG void reprog::print(regex_flags const flags) { printf("Flags = 0x%08x\n", static_cast(flags)); diff --git a/cpp/tests/strings/contains_tests.cpp b/cpp/tests/strings/contains_tests.cpp index f95b282171f..48c4aac9e8a 100644 --- a/cpp/tests/strings/contains_tests.cpp +++ b/cpp/tests/strings/contains_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -237,6 +237,19 @@ TEST_F(StringsContainsTests, MatchesIPV4Test) } } +TEST_F(StringsContainsTests, OctalTest) +{ + cudf::test::strings_column_wrapper strings({"AZ", "B", "CDAZEY", ""}); + auto strings_view = cudf::strings_column_view(strings); + cudf::test::fixed_width_column_wrapper expected({1, 0, 1, 0}); + auto results = cudf::strings::contains_re(strings_view, "\\101"); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + results = cudf::strings::contains_re(strings_view, "\\101Z"); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + results = cudf::strings::contains_re(strings_view, "D*\\101\\132"); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); +} + TEST_F(StringsContainsTests, EmbeddedNullCharacter) { std::vector data(10); From 377ab9da64bace39545281711a8d5a7451bdf141 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 12 Jan 2022 09:26:54 -0500 Subject: [PATCH 2/2] change *++exprp to *(++exprp) --- cpp/src/strings/regex/regcomp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/strings/regex/regcomp.cpp b/cpp/src/strings/regex/regcomp.cpp index 9471380fe0b..8fbd82b8dc7 100644 --- a/cpp/src/strings/regex/regcomp.cpp +++ b/cpp/src/strings/regex/regcomp.cpp @@ -261,7 +261,7 @@ class regex_parser { char32_t c = *exprp; while (c >= '0' && c <= '7') { yy = (yy << 3) | (c - '0'); - c = *++exprp; + c = *(++exprp); } return CHAR; } else {