Skip to content

Commit

Permalink
Support some escape characters in search list when rewriting regexp_r…
Browse files Browse the repository at this point in the history
…eplace to string replace

Signed-off-by: Haoyang Li <[email protected]>
  • Loading branch information
thirtiseven committed Dec 3, 2024
1 parent 017fdef commit b48335a
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 6 deletions.
7 changes: 5 additions & 2 deletions integration_tests/src/main/python/regexp_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1012,7 +1012,9 @@ def test_regexp_replace_simple(regexp_enabled):
'REGEXP_REPLACE(a, "ab", "PROD")',
'REGEXP_REPLACE(a, "ae", "PROD")',
'REGEXP_REPLACE(a, "bc", "PROD")',
'REGEXP_REPLACE(a, "fa", "PROD")'
'REGEXP_REPLACE(a, "fa", "PROD")',
'REGEXP_REPLACE(a, "a\n", "PROD")',
'REGEXP_REPLACE(a, "\n", "PROD")'
),
conf=conf
)
Expand All @@ -1032,7 +1034,8 @@ def test_regexp_replace_multi_optimization(regexp_enabled):
'REGEXP_REPLACE(a, "aa|bb|cc|dd", "PROD")',
'REGEXP_REPLACE(a, "(aa|bb)|(cc|dd)", "PROD")',
'REGEXP_REPLACE(a, "aa|bb|cc|dd|ee", "PROD")',
'REGEXP_REPLACE(a, "aa|bb|cc|dd|ee|ff", "PROD")'
'REGEXP_REPLACE(a, "aa|bb|cc|dd|ee|ff", "PROD")',
'REGEXP_REPLACE(a, "a\n|b\a|c\t", "PROD")'
),
conf=conf
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -593,8 +593,9 @@ object GpuOverrides extends Logging {
}

def isSupportedStringReplacePattern(strLit: String): Boolean = {
// check for regex special characters, except for \u0000 which we can support
!regexList.filterNot(_ == "\u0000").exists(pattern => strLit.contains(pattern))
// check for regex special characters, except for \u0000, \n, \r, and \t which we can support
val supported = Seq("\u0000", "\n", "\r", "\t")
!regexList.filterNot(supported.contains(_)).exists(pattern => strLit.contains(pattern))
}

def isSupportedStringReplacePattern(exp: Expression): Boolean = {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -207,7 +207,8 @@ class RegExpUtilsSuite extends AnyFunSuite {
"aa|bb|cc|dd" -> Seq("aa", "bb", "cc", "dd"),
"(aa|bb)|(cc|dd)" -> Seq("aa", "bb", "cc", "dd"),
"aa|bb|cc|dd|ee" -> Seq("aa", "bb", "cc", "dd", "ee"),
"aa|bb|cc|dd|ee|ff" -> Seq("aa", "bb", "cc", "dd", "ee", "ff")
"aa|bb|cc|dd|ee|ff" -> Seq("aa", "bb", "cc", "dd", "ee", "ff"),
"a\n|b\t|c\r" -> Seq("a\n", "b\t", "c\r")
)

regexChoices.foreach { case (pattern, choices) =>
Expand Down

0 comments on commit b48335a

Please sign in to comment.