From dff1848399a9a4ffb615bc660c8329cca0a69131 Mon Sep 17 00:00:00 2001 From: Chris Jarrett Date: Thu, 27 Oct 2022 08:42:47 -0700 Subject: [PATCH 1/2] Add replace operator --- dask_sql/physical/rex/core/call.py | 16 ++++++++++++++++ tests/integration/test_rex.py | 6 +++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/dask_sql/physical/rex/core/call.py b/dask_sql/physical/rex/core/call.py index 1903c8fd9..21f5d2ad6 100644 --- a/dask_sql/physical/rex/core/call.py +++ b/dask_sql/physical/rex/core/call.py @@ -535,6 +535,21 @@ def trim(self, s, search): return strip_call(search) +class ReplaceOperation(Operation): + """The replace operator (replace occurrences of pattern in a string)""" + + def __init__(self): + super().__init__(self.replace) + + def replace(self, s, pat, repl): + if isinstance(s, str): + return s.replace(pat, repl) + elif isinstance(s, dd.Series): + return s.str.replace(pat, repl) + else: + raise TypeError("The string expression must be a string or a column name") + + class OverlayOperation(Operation): """The overlay operator (replace string according to positions)""" @@ -965,6 +980,7 @@ class RexCallPlugin(BaseRexPlugin): "substr": SubStringOperation(), "substring": SubStringOperation(), "initcap": TensorScalarOperation(lambda x: x.str.title(), lambda x: x.title()), + "replace": ReplaceOperation(), # date/time operations "extract": ExtractOperation(), "localtime": Operation(lambda *args: pd.Timestamp.now()), diff --git a/tests/integration/test_rex.py b/tests/integration/test_rex.py index 655ff69de..b7d455fe3 100644 --- a/tests/integration/test_rex.py +++ b/tests/integration/test_rex.py @@ -522,7 +522,9 @@ def test_string_functions(c, gpu): SUBSTR(a, 3, 6) AS s, INITCAP(a) AS t, INITCAP(UPPER(a)) AS u, - INITCAP(LOWER(a)) AS v + INITCAP(LOWER(a)) AS v, + REPLACE(a, 'r', 'l') as w, + REPLACE('Another String', 'th', 'b') as x FROM {input_table} """ @@ -555,6 +557,8 @@ def test_string_functions(c, gpu): "t": ["A Normal String"], "u": ["A Normal String"], "v": ["A Normal String"], + "w": ["a nolmal stling"], + "x": ["Anober String"], } ) From 4207be0e1dc2c4754ae20e17cba84f91a38f5bc5 Mon Sep 17 00:00:00 2001 From: Chris Jarrett Date: Thu, 27 Oct 2022 12:07:56 -0700 Subject: [PATCH 2/2] Add unit tests --- dask_sql/physical/rex/core/call.py | 10 ++++------ tests/unit/test_call.py | 3 +++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/dask_sql/physical/rex/core/call.py b/dask_sql/physical/rex/core/call.py index 21f5d2ad6..a66b178dc 100644 --- a/dask_sql/physical/rex/core/call.py +++ b/dask_sql/physical/rex/core/call.py @@ -542,12 +542,10 @@ def __init__(self): super().__init__(self.replace) def replace(self, s, pat, repl): - if isinstance(s, str): - return s.replace(pat, repl) - elif isinstance(s, dd.Series): - return s.str.replace(pat, repl) - else: - raise TypeError("The string expression must be a string or a column name") + if is_frame(s): + s = s.str + + return s.replace(pat, repl) class OverlayOperation(Operation): diff --git a/tests/unit/test_call.py b/tests/unit/test_call.py index 0075c5cb5..05b116af8 100644 --- a/tests/unit/test_call.py +++ b/tests/unit/test_call.py @@ -182,6 +182,9 @@ def test_string_operations(): assert ops_mapping["substring"](a, 2) == " normal string" assert ops_mapping["substring"](a, 2, 2) == " n" assert ops_mapping["initcap"](a) == "A Normal String" + assert ops_mapping["replace"](a, "nor", "") == "a mal string" + assert ops_mapping["replace"](a, "normal", "new") == "a new string" + assert ops_mapping["replace"]("hello", "", "w") == "whwewlwlwow" def test_dates():