Skip to content

Commit

Permalink
Add except declaration in Cython interface for regex_program::create (#…
Browse files Browse the repository at this point in the history
…13054)

Add the `except +` declaration to the `cudf::strings::regex_program::create()` function in the Cython `regex_program.pxd` interface since invalid regex patterns are thrown by this call. This allows the normal Cython exception handling to pass the exception to the Python logic without aborting the process.

Closes #13052

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Ashwin Srinath (https://github.com/shwina)

URL: #13054
  • Loading branch information
davidwendt authored Apr 5, 2023
1 parent 9a770f6 commit 7a739ce
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 29 deletions.
7 changes: 5 additions & 2 deletions python/cudf/cudf/_lib/cpp/strings/regex_program.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022, NVIDIA CORPORATION.
# Copyright (c) 2022-2023, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr
from libcpp.string cimport string
Expand All @@ -12,4 +12,7 @@ cdef extern from "cudf/strings/regex/regex_program.hpp" \
cdef cppclass regex_program:

@staticmethod
unique_ptr[regex_program] create(string pattern, regex_flags flags)
unique_ptr[regex_program] create(
string pattern,
regex_flags flags
) except +
14 changes: 7 additions & 7 deletions python/cudf/cudf/_lib/strings/contains.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2023, NVIDIA CORPORATION.

from cython.operator cimport dereference
from libc.stdint cimport uint32_t
Expand Down Expand Up @@ -35,10 +35,10 @@ def contains_re(Column source_strings, object reg_ex, uint32_t flags):

cdef string reg_ex_string = <string>str(reg_ex).encode()
cdef regex_flags c_flags = <regex_flags>flags
cdef unique_ptr[regex_program] c_prog = \
regex_program.create(reg_ex_string, c_flags)
cdef unique_ptr[regex_program] c_prog

with nogil:
c_prog = move(regex_program.create(reg_ex_string, c_flags))
c_result = move(cpp_contains_re(
source_view,
dereference(c_prog)
Expand All @@ -58,10 +58,10 @@ def count_re(Column source_strings, object reg_ex, uint32_t flags):

cdef string reg_ex_string = <string>str(reg_ex).encode()
cdef regex_flags c_flags = <regex_flags>flags
cdef unique_ptr[regex_program] c_prog = \
regex_program.create(reg_ex_string, c_flags)
cdef unique_ptr[regex_program] c_prog

with nogil:
c_prog = move(regex_program.create(reg_ex_string, c_flags))
c_result = move(cpp_count_re(
source_view,
dereference(c_prog)
Expand All @@ -81,10 +81,10 @@ def match_re(Column source_strings, object reg_ex, uint32_t flags):

cdef string reg_ex_string = <string>str(reg_ex).encode()
cdef regex_flags c_flags = <regex_flags>flags
cdef unique_ptr[regex_program] c_prog = \
regex_program.create(reg_ex_string, c_flags)
cdef unique_ptr[regex_program] c_prog

with nogil:
c_prog = move(regex_program.create(reg_ex_string, c_flags))
c_result = move(cpp_matches_re(
source_view,
dereference(c_prog)
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/_lib/strings/extract.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2023, NVIDIA CORPORATION.

from cython.operator cimport dereference
from libc.stdint cimport uint32_t
Expand Down Expand Up @@ -30,10 +30,10 @@ def extract(Column source_strings, object pattern, uint32_t flags):

cdef string pattern_string = <string>str(pattern).encode()
cdef regex_flags c_flags = <regex_flags>flags
cdef unique_ptr[regex_program] c_prog = \
regex_program.create(pattern_string, c_flags)
cdef unique_ptr[regex_program] c_prog

with nogil:
c_prog = move(regex_program.create(pattern_string, c_flags))
c_result = move(cpp_extract(
source_view,
dereference(c_prog)
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/_lib/strings/findall.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2022, NVIDIA CORPORATION.
# Copyright (c) 2019-2023, NVIDIA CORPORATION.

from cython.operator cimport dereference
from libc.stdint cimport uint32_t
Expand Down Expand Up @@ -27,10 +27,10 @@ def findall(Column source_strings, object pattern, uint32_t flags):

cdef string pattern_string = <string>str(pattern).encode()
cdef regex_flags c_flags = <regex_flags>flags
cdef unique_ptr[regex_program] c_prog = \
regex_program.create(pattern_string, c_flags)
cdef unique_ptr[regex_program] c_prog

with nogil:
c_prog = move(regex_program.create(pattern_string, c_flags))
c_result = move(cpp_findall(
source_view,
dereference(c_prog)
Expand Down
10 changes: 5 additions & 5 deletions python/cudf/cudf/_lib/strings/replace_re.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2023, NVIDIA CORPORATION.

from cython.operator cimport dereference
from libcpp.memory cimport unique_ptr
Expand Down Expand Up @@ -43,10 +43,10 @@ def replace_re(Column source_strings,
cdef const string_scalar* scalar_repl = \
<const string_scalar*>(repl.get_raw_ptr())
cdef regex_flags c_flags = regex_flags.DEFAULT
cdef unique_ptr[regex_program] c_prog = \
regex_program.create(pattern_string, c_flags)
cdef unique_ptr[regex_program] c_prog

with nogil:
c_prog = move(regex_program.create(pattern_string, c_flags))
c_result = move(cpp_replace_re(
source_view,
dereference(c_prog),
Expand All @@ -73,10 +73,10 @@ def replace_with_backrefs(
cdef string pattern_string = <string>str(pattern).encode()
cdef string repl_string = <string>str(repl).encode()
cdef regex_flags c_flags = regex_flags.DEFAULT
cdef unique_ptr[regex_program] c_prog = \
regex_program.create(pattern_string, c_flags)
cdef unique_ptr[regex_program] c_prog

with nogil:
c_prog = move(regex_program.create(pattern_string, c_flags))
c_result = move(cpp_replace_with_backrefs(
source_view,
dereference(c_prog),
Expand Down
18 changes: 9 additions & 9 deletions python/cudf/cudf/_lib/strings/split/split.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2023, NVIDIA CORPORATION.

from cython.operator cimport dereference
from libcpp.memory cimport unique_ptr
Expand Down Expand Up @@ -163,10 +163,10 @@ def split_re(Column source_strings,
cdef column_view source_view = source_strings.view()
cdef string pattern_string = <string>str(pattern).encode()
cdef regex_flags c_flags = regex_flags.DEFAULT
cdef unique_ptr[regex_program] c_prog = \
regex_program.create(pattern_string, c_flags)
cdef unique_ptr[regex_program] c_prog

with nogil:
c_prog = move(regex_program.create(pattern_string, c_flags))
c_result = move(cpp_split_re(
source_view,
dereference(c_prog),
Expand All @@ -192,10 +192,10 @@ def rsplit_re(Column source_strings,
cdef column_view source_view = source_strings.view()
cdef string pattern_string = <string>str(pattern).encode()
cdef regex_flags c_flags = regex_flags.DEFAULT
cdef unique_ptr[regex_program] c_prog = \
regex_program.create(pattern_string, c_flags)
cdef unique_ptr[regex_program] c_prog

with nogil:
c_prog = move(regex_program.create(pattern_string, c_flags))
c_result = move(cpp_rsplit_re(
source_view,
dereference(c_prog),
Expand All @@ -220,10 +220,10 @@ def split_record_re(Column source_strings,
cdef column_view source_view = source_strings.view()
cdef string pattern_string = <string>str(pattern).encode()
cdef regex_flags c_flags = regex_flags.DEFAULT
cdef unique_ptr[regex_program] c_prog = \
regex_program.create(pattern_string, c_flags)
cdef unique_ptr[regex_program] c_prog

with nogil:
c_prog = move(regex_program.create(pattern_string, c_flags))
c_result = move(cpp_split_record_re(
source_view,
dereference(c_prog),
Expand All @@ -248,10 +248,10 @@ def rsplit_record_re(Column source_strings,
cdef column_view source_view = source_strings.view()
cdef string pattern_string = <string>str(pattern).encode()
cdef regex_flags c_flags = regex_flags.DEFAULT
cdef unique_ptr[regex_program] c_prog = \
regex_program.create(pattern_string, c_flags)
cdef unique_ptr[regex_program] c_prog

with nogil:
c_prog = move(regex_program.create(pattern_string, c_flags))
c_result = move(cpp_rsplit_record_re(
source_view,
dereference(c_prog),
Expand Down
6 changes: 6 additions & 0 deletions python/cudf/cudf/tests/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -784,6 +784,12 @@ def test_string_extract(ps_gs, pat, expand, flags, flags_raise):
assert_eq(expect, got)


def test_string_invalid_regex():
gs = cudf.Series(["a"])
with pytest.raises(RuntimeError):
gs.str.extract(r"{\}")


@pytest.mark.parametrize(
"pat,regex",
[
Expand Down

0 comments on commit 7a739ce

Please sign in to comment.