diff --git a/python/cudf/cudf/_lib/io/utils.pxd b/python/cudf/cudf/_lib/io/utils.pxd index 96504ebdd66..9b8bab012e2 100644 --- a/python/cudf/cudf/_lib/io/utils.pxd +++ b/python/cudf/cudf/_lib/io/utils.pxd @@ -13,9 +13,6 @@ from pylibcudf.libcudf.io.types cimport ( from cudf._lib.column cimport Column -cdef sink_info make_sinks_info( - list src, vector[unique_ptr[data_sink]] & data) except* -cdef sink_info make_sink_info(src, unique_ptr[data_sink] & data) except* cdef add_df_col_struct_names( df, child_names_dict @@ -26,7 +23,8 @@ cdef update_col_struct_field_names( ) cdef update_struct_field_names( table, - vector[column_name_info]& schema_info) + vector[column_name_info]& schema_info +) cdef Column update_column_struct_field_names( Column col, column_name_info& info diff --git a/python/cudf/cudf/_lib/io/utils.pyx b/python/cudf/cudf/_lib/io/utils.pyx index f23980b387a..df4675be599 100644 --- a/python/cudf/cudf/_lib/io/utils.pyx +++ b/python/cudf/cudf/_lib/io/utils.pyx @@ -1,97 +1,16 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. -from cpython.buffer cimport PyBUF_READ -from cpython.memoryview cimport PyMemoryView_FromMemory -from libcpp.memory cimport unique_ptr + from libcpp.string cimport string -from libcpp.utility cimport move + from libcpp.vector cimport vector -from pylibcudf.libcudf.io.data_sink cimport data_sink -from pylibcudf.libcudf.io.types cimport ( - column_name_info, - sink_info, -) +from pylibcudf.libcudf.io.types cimport column_name_info from cudf._lib.column cimport Column -import codecs -import io -import os - from cudf.core.dtypes import StructDtype -# Converts the Python sink input to libcudf IO sink_info. -cdef sink_info make_sinks_info( - list src, vector[unique_ptr[data_sink]] & sink -) except*: - cdef vector[data_sink *] data_sinks - cdef vector[string] paths - if isinstance(src[0], io.StringIO): - data_sinks.reserve(len(src)) - for s in src: - sink.push_back(unique_ptr[data_sink](new iobase_data_sink(s))) - data_sinks.push_back(sink.back().get()) - return sink_info(data_sinks) - elif isinstance(src[0], io.TextIOBase): - data_sinks.reserve(len(src)) - for s in src: - # Files opened in text mode expect writes to be str rather than - # bytes, which requires conversion from utf-8. If the underlying - # buffer is utf-8, we can bypass this conversion by writing - # directly to it. - if codecs.lookup(s.encoding).name not in {"utf-8", "ascii"}: - raise NotImplementedError(f"Unsupported encoding {s.encoding}") - sink.push_back( - unique_ptr[data_sink](new iobase_data_sink(s.buffer)) - ) - data_sinks.push_back(sink.back().get()) - return sink_info(data_sinks) - elif isinstance(src[0], io.IOBase): - data_sinks.reserve(len(src)) - for s in src: - sink.push_back(unique_ptr[data_sink](new iobase_data_sink(s))) - data_sinks.push_back(sink.back().get()) - return sink_info(data_sinks) - elif isinstance(src[0], (basestring, os.PathLike)): - paths.reserve(len(src)) - for s in src: - paths.push_back( os.path.expanduser(s).encode()) - return sink_info(move(paths)) - else: - raise TypeError("Unrecognized input type: {}".format(type(src))) - - -cdef sink_info make_sink_info(src, unique_ptr[data_sink] & sink) except*: - cdef vector[unique_ptr[data_sink]] datasinks - cdef sink_info info = make_sinks_info([src], datasinks) - if not datasinks.empty(): - sink.swap(datasinks[0]) - return info - - -# Adapts a python io.IOBase object as a libcudf IO data_sink. This lets you -# write from cudf to any python file-like object (File/BytesIO/SocketIO etc) -cdef cppclass iobase_data_sink(data_sink): - object buf - - iobase_data_sink(object buf_): - this.buf = buf_ - - void host_write(const void * data, size_t size) with gil: - if isinstance(buf, io.StringIO): - buf.write(PyMemoryView_FromMemory(data, size, PyBUF_READ) - .tobytes().decode()) - else: - buf.write(PyMemoryView_FromMemory(data, size, PyBUF_READ)) - - void flush() with gil: - buf.flush() - - size_t bytes_written() with gil: - return buf.tell() - - cdef add_df_col_struct_names(df, child_names_dict): for name, child_names in child_names_dict.items(): col = df._data[name]