From ab0b133594b217d5aad605491fedccef7137d936 Mon Sep 17 00:00:00 2001
From: Christopher Harris <xixonia@gmail.com>
Date: Sat, 19 Mar 2022 14:19:25 -0500
Subject: [PATCH 1/7] add StringIO support to read_text

---
 python/cudf/cudf/_lib/text.pyx      | 18 ++++++++++++++----
 python/cudf/cudf/tests/test_text.py | 15 ++++++++++++++-
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/_lib/text.pyx b/python/cudf/cudf/_lib/text.pyx
index 9f33f32bdaf..e5940593a11 100644
--- a/python/cudf/cudf/_lib/text.pyx
+++ b/python/cudf/cudf/_lib/text.pyx
@@ -1,5 +1,7 @@
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
+from io import StringIO
+
 import cudf
 
 from cython.operator cimport dereference
@@ -26,14 +28,22 @@ def read_text(object filepaths_or_buffers,
     --------
     cudf.io.text.read_text
     """
-    cdef string filename = filepaths_or_buffers.encode()
+    cdef string filepath
+    cdef string data
     cdef string delim = delimiter.encode()
 
     cdef unique_ptr[data_chunk_source] datasource
     cdef unique_ptr[column] c_col
 
-    with nogil:
-        datasource = move(make_source_from_file(filename))
-        c_col = move(multibyte_split(dereference(datasource), delim))
+    if isinstance(filepaths_or_buffers, (StringIO)):
+        data = filepaths_or_buffers.read().encode()
+        with nogil:
+            datasource = move(make_source(data))
+            c_col = move(multibyte_split(dereference(datasource), delim))
+    else:
+        filepath = filepaths_or_buffers.encode()
+        with nogil:
+            datasource = move(make_source_from_file(filepath))
+            c_col = move(multibyte_split(dereference(datasource), delim))
 
     return {None: Column.from_unique_ptr(move(c_col))}
diff --git a/python/cudf/cudf/tests/test_text.py b/python/cudf/cudf/tests/test_text.py
index a447a60c709..a507432d0e2 100644
--- a/python/cudf/cudf/tests/test_text.py
+++ b/python/cudf/cudf/tests/test_text.py
@@ -1,4 +1,5 @@
-# Copyright (c) 2019, NVIDIA CORPORATION.
+
+from io import StringIO# Copyright (c) 2019, NVIDIA CORPORATION.
 
 import numpy as np
 import pytest
@@ -778,3 +779,15 @@ def test_read_text(datadir):
     actual = cudf.read_text(chess_file, delimiter=delimiter)
 
     assert_eq(expected, actual)
+
+
+def test_read_text_in_memory(datadir):
+    delimiter = "::"
+
+    # Since Python split removes the delimiter and read_text does
+    # not we need to add it back to the 'content'
+    expected = cudf.Series(["x::", "y::", "z"])
+
+    actual = cudf.read_text(StringIO("x::y::z"), delimiter=delimiter)
+
+    assert_eq(expected, actual)

From 3fa26da4c0ff184f33d0f87439f4edf057a016cc Mon Sep 17 00:00:00 2001
From: Christopher Harris <xixonia@gmail.com>
Date: Sat, 19 Mar 2022 14:25:15 -0500
Subject: [PATCH 2/7] fix copyright

---
 python/cudf/cudf/tests/test_text.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/tests/test_text.py b/python/cudf/cudf/tests/test_text.py
index a507432d0e2..a1f49bec932 100644
--- a/python/cudf/cudf/tests/test_text.py
+++ b/python/cudf/cudf/tests/test_text.py
@@ -1,5 +1,7 @@
 
-from io import StringIO# Copyright (c) 2019, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+
+from io import StringIO
 
 import numpy as np
 import pytest
@@ -344,7 +346,7 @@ def test_character_tokenize_series():
             "w",
             "o",
             ":",
-            "t",
+            "t",already-in-memory 
             "h",
             "r",
             "e",

From af0a0b1a48004214c3cc5ba9e73faf4ba4007cbb Mon Sep 17 00:00:00 2001
From: Christopher Harris <xixonia@gmail.com>
Date: Sat, 19 Mar 2022 15:11:53 -0500
Subject: [PATCH 3/7] rm extra whitespace

---
 python/cudf/cudf/tests/test_text.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_text.py b/python/cudf/cudf/tests/test_text.py
index a1f49bec932..6435630974f 100644
--- a/python/cudf/cudf/tests/test_text.py
+++ b/python/cudf/cudf/tests/test_text.py
@@ -1,4 +1,3 @@
-
 # Copyright (c) 2019-2022, NVIDIA CORPORATION.
 
 from io import StringIO

From 61e8f54caa87e545787dbd73ac0d98f316d8adaf Mon Sep 17 00:00:00 2001
From: Christopher Harris <xixonia@gmail.com>
Date: Sat, 19 Mar 2022 15:12:43 -0500
Subject: [PATCH 4/7] fix typos

---
 python/cudf/cudf/tests/test_text.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_text.py b/python/cudf/cudf/tests/test_text.py
index 6435630974f..dd516e46ebf 100644
--- a/python/cudf/cudf/tests/test_text.py
+++ b/python/cudf/cudf/tests/test_text.py
@@ -345,7 +345,7 @@ def test_character_tokenize_series():
             "w",
             "o",
             ":",
-            "t",already-in-memory 
+            "t",
             "h",
             "r",
             "e",

From 197bd73f5bb2cd5d77d274e7c1690c02dd69756c Mon Sep 17 00:00:00 2001
From: Christopher Harris <xixonia@gmail.com>
Date: Mon, 21 Mar 2022 22:45:44 -0500
Subject: [PATCH 5/7] remove unnecessary file

---
 python/cudf/cudf/tests/data/text/temp.txt | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 python/cudf/cudf/tests/data/text/temp.txt

diff --git a/python/cudf/cudf/tests/data/text/temp.txt b/python/cudf/cudf/tests/data/text/temp.txt
deleted file mode 100644
index 860e21333e6..00000000000
--- a/python/cudf/cudf/tests/data/text/temp.txt
+++ /dev/null
@@ -1 +0,0 @@
-<generator object test_read_text_byte_range_large.<locals>.<genexpr> at 0x7f1e5aa306d0>
\ No newline at end of file

From 68eb7e143bd5e1d0b5615de741f6cbcee69514bd Mon Sep 17 00:00:00 2001
From: Christopher Harris <xixonia@gmail.com>
Date: Tue, 22 Mar 2022 14:28:15 -0500
Subject: [PATCH 6/7] support TextIOBase in read_text

---
 python/cudf/cudf/_lib/text.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/_lib/text.pyx b/python/cudf/cudf/_lib/text.pyx
index 671a85b90ee..868574be187 100644
--- a/python/cudf/cudf/_lib/text.pyx
+++ b/python/cudf/cudf/_lib/text.pyx
@@ -1,6 +1,6 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
-from io import StringIO
+from io import TextIOBase
 
 import cudf
 
@@ -39,7 +39,7 @@ def read_text(object filepaths_or_buffers,
     cdef size_t c_byte_range_size
     cdef byte_range_info c_byte_range
 
-    if isinstance(filepaths_or_buffers, (StringIO)):
+    if isinstance(filepaths_or_buffers, TextIOBase):
         datasource = move(make_source(filepaths_or_buffers.read().encode()))
     else:
         datasource = move(make_source_from_file(filepaths_or_buffers.encode()))

From f381414d345e8471a5c62d0e5530bac274f70559 Mon Sep 17 00:00:00 2001
From: Christopher Harris <xixonia@gmail.com>
Date: Wed, 23 Mar 2022 11:02:21 -0500
Subject: [PATCH 7/7] inline the delimiter argument in test_text read_text test

Co-authored-by: Bradley Dice <bdice@bradleydice.com>
---
 python/cudf/cudf/tests/test_text.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/python/cudf/cudf/tests/test_text.py b/python/cudf/cudf/tests/test_text.py
index bb5e51636ab..c332924fd8b 100644
--- a/python/cudf/cudf/tests/test_text.py
+++ b/python/cudf/cudf/tests/test_text.py
@@ -834,12 +834,10 @@ def test_read_text_byte_range_large(datadir):
 
 
 def test_read_text_in_memory(datadir):
-    delimiter = "::"
-
     # Since Python split removes the delimiter and read_text does
     # not we need to add it back to the 'content'
     expected = cudf.Series(["x::", "y::", "z"])
 
-    actual = cudf.read_text(StringIO("x::y::z"), delimiter=delimiter)
+    actual = cudf.read_text(StringIO("x::y::z"), delimiter="::")
 
     assert_eq(expected, actual)