diff --git a/docs/compatibility.md b/docs/compatibility.md index 574465b4496..f9af6764498 100644 --- a/docs/compatibility.md +++ b/docs/compatibility.md @@ -495,6 +495,7 @@ The following regular expression patterns are not yet supported on the GPU and w - Character classes that use union, intersection, or subtraction semantics, such as `[a-d[m-p]]`, `[a-z&&[def]]`, or `[a-z&&[^bc]]` - Empty groups: `()` +- Empty pattern: `""` Work is ongoing to increase the range of regular expressions that can run on the GPU. diff --git a/integration_tests/src/main/python/regexp_test.py b/integration_tests/src/main/python/regexp_test.py index 2372d700d72..89929eb6762 100644 --- a/integration_tests/src/main/python/regexp_test.py +++ b/integration_tests/src/main/python/regexp_test.py @@ -790,6 +790,15 @@ def test_rlike_fallback_empty_group(): 'RLike', conf=_regexp_conf) +@allow_non_gpu('ProjectExec', 'RLike') +def test_rlike_fallback_empty_pattern(): + gen = mk_str_gen('[abcd]{1,3}') + assert_gpu_fallback_collect( + lambda spark: unary_op_df(spark, gen).selectExpr( + 'a rlike ""'), + 'RLike', + conf=_regexp_conf) + def test_rlike_escape(): gen = mk_str_gen('[ab]{0,2};?[\\-\\+]{0,2}/?') assert_gpu_and_cpu_are_equal_collect( diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RegexParser.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RegexParser.scala index 99d8d124a52..45d5e07dd73 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RegexParser.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RegexParser.scala @@ -2045,10 +2045,10 @@ object RegexRewrite { private def getPrefixRangePattern(astLs: collection.Seq[RegexAST]): Option[(String, Int, Int, Int)] = { val haveLiteralPrefix = isliteralString(astLs.dropRight(1)) - val endsWithRange = astLs.last match { - case RegexRepetition( - RegexCharacterClass(false,ListBuffer(RegexCharacterRange(a,b))), - quantifier) => { + val endsWithRange = astLs.lastOption match { + case Some(RegexRepetition( + RegexCharacterClass(false, ListBuffer(RegexCharacterRange(a,b))), + quantifier)) => { val (start, end) = (a, b) match { case (RegexChar(start), RegexChar(end)) => (start, end) case _ => return None