From 77c02890bae7fff9d648920f95e40ee7a7861da6 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 18 Nov 2024 11:28:36 -0800 Subject: [PATCH 1/3] Remove cudf._lib.transpose in favor of inlining pylibcudf --- python/cudf/cudf/_lib/CMakeLists.txt | 1 - python/cudf/cudf/_lib/__init__.py | 2 -- python/cudf/cudf/_lib/transpose.pyx | 18 ------------------ python/cudf/cudf/core/dataframe.py | 9 ++++++++- 4 files changed, 8 insertions(+), 22 deletions(-) delete mode 100644 python/cudf/cudf/_lib/transpose.pyx diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 41a7db2285a..5275f8afe5c 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -48,7 +48,6 @@ set(cython_sources text.pyx timezone.pyx transform.pyx - transpose.pyx types.pyx unary.pyx utils.pyx diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py index 57df6899a22..08e78d0676e 100644 --- a/python/cudf/cudf/_lib/__init__.py +++ b/python/cudf/cudf/_lib/__init__.py @@ -34,8 +34,6 @@ strings_udf, text, timezone, - transpose, - unary, ) MAX_COLUMN_SIZE = np.iinfo(np.int32).max diff --git a/python/cudf/cudf/_lib/transpose.pyx b/python/cudf/cudf/_lib/transpose.pyx deleted file mode 100644 index 995d278cb88..00000000000 --- a/python/cudf/cudf/_lib/transpose.pyx +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. - -import pylibcudf as plc - -from cudf._lib.column cimport Column - - -def transpose(list source_columns): - """Transpose m n-row columns into n m-row columns - """ - input_table = plc.table.Table( - [col.to_pylibcudf(mode="read") for col in source_columns] - ) - result_table = plc.transpose.transpose(input_table) - return [ - Column.from_pylibcudf(col, data_ptr_exposed=True) - for col in result_table.columns() - ] diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index bf1c39b23da..7b18e405b7c 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -4089,7 +4089,14 @@ def transpose(self): if any(c.dtype != source_columns[0].dtype for c in source_columns): raise ValueError("Columns must all have the same dtype") - result_columns = libcudf.transpose.transpose(source_columns) + input_table = plc.table.Table( + [col.to_pylibcudf(mode="read") for col in source_columns] + ) + result_table = plc.transpose.transpose(input_table) + result_columns = [ + libcudf.column.Column.from_pylibcudf(col, data_ptr_exposed=True) + for col in result_table.columns() + ] if isinstance(source_dtype, cudf.CategoricalDtype): result_columns = [ From 2abc5f4f1fde43c1211468ba7577ecfeb38965bd Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 19 Nov 2024 16:22:11 -0800 Subject: [PATCH 2/3] Update python/cudf/cudf/core/dataframe.py Co-authored-by: Matthew Murray <41342305+Matt711@users.noreply.github.com> --- python/cudf/cudf/core/dataframe.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 7b18e405b7c..f7fd2238a50 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -4089,10 +4089,11 @@ def transpose(self): if any(c.dtype != source_columns[0].dtype for c in source_columns): raise ValueError("Columns must all have the same dtype") - input_table = plc.table.Table( - [col.to_pylibcudf(mode="read") for col in source_columns] + result_table = plc.transpose.transpose( + plc.table.Table( + [col.to_pylibcudf(mode="read") for col in source_columns] + ) ) - result_table = plc.transpose.transpose(input_table) result_columns = [ libcudf.column.Column.from_pylibcudf(col, data_ptr_exposed=True) for col in result_table.columns() From cd3b19b313805374c3d1a5d19f07593fc449e3e0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 19 Nov 2024 17:22:40 -0800 Subject: [PATCH 3/3] Add back plc import --- python/cudf/cudf/core/dataframe.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index f7fd2238a50..0377d1358f0 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -26,6 +26,8 @@ from pandas.io.formats.printing import pprint_thing from typing_extensions import Self, assert_never +import pylibcudf as plc + import cudf import cudf.core.common from cudf import _lib as libcudf