From 93313711fa8dc1eee613fabc5f10d74fbd6ee807 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 17 Aug 2023 09:57:12 -0700 Subject: [PATCH 1/6] Add new functions for dtype extraction that operate directly on pylibcudf types --- python/cudf/cudf/_lib/column.pyx | 4 +- python/cudf/cudf/_lib/pylibcudf/column.pxd | 5 ++ python/cudf/cudf/_lib/pylibcudf/column.pyx | 24 +++++++++ python/cudf/cudf/_lib/types.pyx | 57 ++++++++++++++++++++++ 4 files changed, 88 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx index 2b1fc14f398..50b56f1d484 100644 --- a/python/cudf/cudf/_lib/column.pyx +++ b/python/cudf/cudf/_lib/column.pyx @@ -40,6 +40,7 @@ from cudf._lib.types cimport ( ) from cudf._lib.null_mask import bitmask_allocation_size_bytes +from cudf._lib.types import dtype_from_pylibcudf_column cimport cudf._lib.cpp.types as libcudf_types cimport cudf._lib.cpp.unary as libcudf_unary @@ -607,8 +608,7 @@ cdef class Column: pylibcudf.Column A new pylibcudf.Column referencing the same data. """ - # TODO: Rewrite utility for dtype conversion to not need a column view. - dtype = dtype_from_column_view(col.view()) + dtype = dtype_from_pylibcudf_column(col) return cudf.core.column.build_column( data=as_buffer(col.data.obj) if col.data is not None else None, diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pxd b/python/cudf/cudf/_lib/pylibcudf/column.pxd index 2b08e6863a1..498714dc0c3 100644 --- a/python/cudf/cudf/_lib/pylibcudf/column.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/column.pxd @@ -23,8 +23,13 @@ cdef class Column: size_type offset # children: List[Column] list children + size_type _num_children cdef column_view view(self) nogil @staticmethod cdef Column from_libcudf(unique_ptr[column] libcudf_col) + + cpdef DataType type(self) noexcept + cpdef Column child(self, size_type index) noexcept + cpdef size_type num_children(self) noexcept diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pyx b/python/cudf/cudf/_lib/pylibcudf/column.pyx index be4eff4c49d..bfb010992c0 100644 --- a/python/cudf/cudf/_lib/pylibcudf/column.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/column.pyx @@ -52,6 +52,7 @@ cdef class Column: self.null_count = null_count self.offset = offset self.children = children + self._num_children = len(children) cdef column_view view(self) nogil: """Generate a libcudf column_view to pass to libcudf algorithms. @@ -133,3 +134,26 @@ cdef class Column: 0, children, ) + + cpdef DataType type(self): + """The type of data in the column.""" + return self.data_type + + cpdef Column child(self, size_type index) noexcept: + """Get a child column of this column. + + Parameters + ---------- + index : size_type + The index of the child column to get. + + Returns + ------- + Column + The child column. + """ + return self.children[index] + + cpdef size_type num_children(self) noexcept: + """The number of children of this column.""" + return self._num_children diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx index c3eca8090a3..283ba8a8e72 100644 --- a/python/cudf/cudf/_lib/types.pyx +++ b/python/cudf/cudf/_lib/types.pyx @@ -129,6 +129,11 @@ LIBCUDF_TO_SUPPORTED_NUMPY_TYPES = { TypeId.STRUCT: np.dtype("object"), } +PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES = { + pylibcudf.TypeId(k).value: v + for k, v in LIBCUDF_TO_SUPPORTED_NUMPY_TYPES.items() +} + duration_unit_map = { TypeId.DURATION_SECONDS: "s", TypeId.DURATION_MILLISECONDS: "ms", @@ -275,3 +280,55 @@ cdef bool is_decimal_type_id(libcudf_types.type_id tid) except *: libcudf_types.type_id.DECIMAL64, libcudf_types.type_id.DECIMAL32, ) + + +def dtype_from_pylibcudf_lists_column(pylibcudf.Column col): + # TODO: Currently hardcoding the child column index for lists, should come + # up with a cleaner solution here. + child = col.child(1) + + if child.type().id() == pylibcudf.TypeId.LIST: + return cudf.ListDtype(dtype_from_pylibcudf_lists_column(child)) + elif child.type().id() == pylibcudf.TypeId.EMPTY: + return cudf.ListDtype("int8") + else: + return cudf.ListDtype( + dtype_from_pylibcudf_column(child) + ) + + +def dtype_from_pylibcudf_structs_column(pylibcudf.Column col): + fields = { + str(i): dtype_from_pylibcudf_column(col.child(i)) + for i in range(col.num_children()) + } + return cudf.StructDtype(fields) + + +def dtype_from_pylibcudf_column(pylibcudf.Column col): + type_ = col.type() + tid = type_.id() + + if tid == pylibcudf.TypeId.LIST: + return dtype_from_pylibcudf_lists_column(col) + elif tid == pylibcudf.TypeId.STRUCT: + return dtype_from_pylibcudf_structs_column(col) + elif tid == pylibcudf.TypeId.DECIMAL64: + return cudf.Decimal64Dtype( + precision=cudf.Decimal64Dtype.MAX_PRECISION, + scale=-type_.scale() + ) + elif tid == pylibcudf.TypeId.DECIMAL32: + return cudf.Decimal32Dtype( + precision=cudf.Decimal32Dtype.MAX_PRECISION, + scale=-type_.scale() + ) + elif tid == pylibcudf.TypeId.DECIMAL128: + return cudf.Decimal128Dtype( + precision=cudf.Decimal128Dtype.MAX_PRECISION, + scale=-type_.scale() + ) + else: + return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[ + (tid) + ] From 38149f82d6bc7871360c296c7775b615e037e159 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 17 Aug 2023 10:45:13 -0700 Subject: [PATCH 2/6] Add accessor for list properties --- python/cudf/cudf/_lib/pylibcudf/column.pxd | 9 +++++++++ python/cudf/cudf/_lib/pylibcudf/column.pyx | 19 ++++++++++++++++++- python/cudf/cudf/_lib/types.pyx | 9 ++++----- 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pxd b/python/cudf/cudf/_lib/pylibcudf/column.pxd index 498714dc0c3..20d11350904 100644 --- a/python/cudf/cudf/_lib/pylibcudf/column.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/column.pxd @@ -33,3 +33,12 @@ cdef class Column: cpdef DataType type(self) noexcept cpdef Column child(self, size_type index) noexcept cpdef size_type num_children(self) noexcept + + cpdef list_view(self) + + +cdef class ListColumnView: + """Accessor for methods of a Column that are specific to lists.""" + cdef Column _column + cpdef child(self) + cpdef offsets(self) diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pyx b/python/cudf/cudf/_lib/pylibcudf/column.pyx index bfb010992c0..bb958bead9d 100644 --- a/python/cudf/cudf/_lib/pylibcudf/column.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/column.pyx @@ -9,7 +9,7 @@ from cudf._lib.cpp.column.column cimport column, column_contents from cudf._lib.cpp.types cimport size_type from .gpumemoryview cimport gpumemoryview -from .types cimport DataType +from .types cimport DataType, TypeId from .utils cimport int_to_bitmask_ptr, int_to_void_ptr @@ -157,3 +157,20 @@ cdef class Column: cpdef size_type num_children(self) noexcept: """The number of children of this column.""" return self._num_children + + cpdef list_view(self): + return ListColumnView(self) + + +cdef class ListColumnView: + """Accessor for methods of a Column that are specific to lists.""" + def __init__(self, Column col): + if col.type().id() != TypeId.LIST: + raise TypeError("Column is not a list type") + self._column = col + + cpdef child(self): + return self._column.child(1) + + cpdef offsets(self): + return self._column.child(1) diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx index 283ba8a8e72..d6396157af4 100644 --- a/python/cudf/cudf/_lib/types.pyx +++ b/python/cudf/cudf/_lib/types.pyx @@ -283,13 +283,12 @@ cdef bool is_decimal_type_id(libcudf_types.type_id tid) except *: def dtype_from_pylibcudf_lists_column(pylibcudf.Column col): - # TODO: Currently hardcoding the child column index for lists, should come - # up with a cleaner solution here. - child = col.child(1) + child = col.list_view().child() + tid = child.type().id() - if child.type().id() == pylibcudf.TypeId.LIST: + if tid == pylibcudf.TypeId.LIST: return cudf.ListDtype(dtype_from_pylibcudf_lists_column(child)) - elif child.type().id() == pylibcudf.TypeId.EMPTY: + elif tid == pylibcudf.TypeId.EMPTY: return cudf.ListDtype("int8") else: return cudf.ListDtype( From 9f7bea6e2c9f10b371b8cffd836b054663e98ed3 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 17 Aug 2023 11:43:46 -0700 Subject: [PATCH 3/6] Add method to access columns of a table --- python/cudf/cudf/_lib/pylibcudf/table.pxd | 4 +++- python/cudf/cudf/_lib/pylibcudf/table.pyx | 9 ++++++--- python/cudf/cudf/_lib/utils.pyx | 2 +- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/python/cudf/cudf/_lib/pylibcudf/table.pxd b/python/cudf/cudf/_lib/pylibcudf/table.pxd index 4f189f2c398..95f197b13eb 100644 --- a/python/cudf/cudf/_lib/pylibcudf/table.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/table.pxd @@ -8,9 +8,11 @@ from cudf._lib.cpp.table.table_view cimport table_view cdef class Table: # List[pylibcudf.Column] - cdef object columns + cdef list _columns cdef table_view view(self) nogil @staticmethod cdef Table from_libcudf(unique_ptr[table] libcudf_tbl) + + cpdef list columns(self) diff --git a/python/cudf/cudf/_lib/pylibcudf/table.pyx b/python/cudf/cudf/_lib/pylibcudf/table.pyx index db422dd420b..720f9815bd6 100644 --- a/python/cudf/cudf/_lib/pylibcudf/table.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/table.pyx @@ -21,7 +21,7 @@ cdef class Table: The columns in this table. """ def __init__(self, list columns): - self.columns = columns + self._columns = columns cdef table_view view(self) nogil: """Generate a libcudf table_view to pass to libcudf algorithms. @@ -31,11 +31,11 @@ cdef class Table: (even direct pylibcudf Cython users). """ # TODO: Make c_columns a class attribute that is updated along with - # self.columns whenever new columns are added or columns are removed. + # self._columns whenever new columns are added or columns are removed. cdef vector[column_view] c_columns with gil: - for col in self.columns: + for col in self._columns: c_columns.push_back(( col).view()) return table_view(c_columns) @@ -57,3 +57,6 @@ cdef class Table: Column.from_libcudf(move(c_columns[i])) for i in range(c_columns.size()) ]) + + cpdef list columns(self): + return self._columns diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx index 8907143c289..4815c705958 100644 --- a/python/cudf/cudf/_lib/utils.pyx +++ b/python/cudf/cudf/_lib/utils.pyx @@ -261,7 +261,7 @@ cdef columns_from_pylibcudf_table(pylibcudf.Table tbl): A list of columns. """ cdef pylibcudf.Column plc - return [Column.from_pylibcudf(plc) for plc in tbl.columns] + return [Column.from_pylibcudf(plc) for plc in tbl.columns()] cdef data_from_unique_ptr( From 984a67a6c512b1dbecc61e196b4acd86b4b79822 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 17 Aug 2023 11:56:45 -0700 Subject: [PATCH 4/6] Make pylibcudf columns more opaque --- python/cudf/cudf/_lib/column.pyx | 6 ++-- python/cudf/cudf/_lib/pylibcudf/column.pxd | 16 +++++---- python/cudf/cudf/_lib/pylibcudf/column.pyx | 38 +++++++++++++--------- 3 files changed, 35 insertions(+), 25 deletions(-) diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx index 50b56f1d484..36a5ccd9140 100644 --- a/python/cudf/cudf/_lib/column.pyx +++ b/python/cudf/cudf/_lib/column.pyx @@ -611,10 +611,12 @@ cdef class Column: dtype = dtype_from_pylibcudf_column(col) return cudf.core.column.build_column( - data=as_buffer(col.data.obj) if col.data is not None else None, + data=as_buffer(col.data().obj) if col.data() is not None else None, dtype=dtype, size=col.size, - mask=as_buffer(col.mask.obj) if col.mask is not None else None, + mask=as_buffer( + col.null_mask().obj + ) if col.null_mask() is not None else None, offset=col.offset, null_count=col.null_count, children=tuple([ diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pxd b/python/cudf/cudf/_lib/pylibcudf/column.pxd index 20d11350904..6d7d9f4019d 100644 --- a/python/cudf/cudf/_lib/pylibcudf/column.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/column.pxd @@ -15,14 +15,14 @@ cdef class Column: # TODO: Should we document these attributes? Should we mark them readonly? cdef: # Core data - DataType data_type - size_type size - gpumemoryview data - gpumemoryview mask - size_type null_count - size_type offset + DataType _data_type + size_type _size + gpumemoryview _data + gpumemoryview _mask + size_type _null_count + size_type _offset # children: List[Column] - list children + list _children size_type _num_children cdef column_view view(self) nogil @@ -33,6 +33,8 @@ cdef class Column: cpdef DataType type(self) noexcept cpdef Column child(self, size_type index) noexcept cpdef size_type num_children(self) noexcept + cpdef gpumemoryview data(self) + cpdef gpumemoryview null_mask(self) cpdef list_view(self) diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pyx b/python/cudf/cudf/_lib/pylibcudf/column.pyx index bb958bead9d..07c8d162afc 100644 --- a/python/cudf/cudf/_lib/pylibcudf/column.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/column.pyx @@ -45,13 +45,13 @@ cdef class Column: gpumemoryview mask, size_type null_count, size_type offset, list children ): - self.data_type = data_type - self.size = size - self.data = data - self.mask = mask - self.null_count = null_count - self.offset = offset - self.children = children + self._data_type = data_type + self._size = size + self._data = data + self._mask = mask + self._null_count = null_count + self._offset = offset + self._children = children self._num_children = len(children) cdef column_view view(self) nogil: @@ -64,16 +64,16 @@ cdef class Column: cdef const void * data = NULL cdef const bitmask_type * null_mask = NULL - if self.data is not None: - data = int_to_void_ptr(self.data.ptr) - if self.mask is not None: - null_mask = int_to_bitmask_ptr(self.mask.ptr) + if self._data is not None: + data = int_to_void_ptr(self._data.ptr) + if self._mask is not None: + null_mask = int_to_bitmask_ptr(self._mask.ptr) # TODO: Check if children can ever change. If not, this could be # computed once in the constructor and always be reused. cdef vector[column_view] c_children with gil: - if self.children is not None: + if self._children is not None: for child in self.children: # Need to cast to Column here so that Cython knows that # `view` returns a typed object, not a Python object. We @@ -87,8 +87,8 @@ cdef class Column: c_children.push_back(( child).view()) return column_view( - self.data_type.c_obj, self.size, data, null_mask, - self.null_count, self.offset, c_children + self._data_type.c_obj, self._size, data, null_mask, + self._null_count, self._offset, c_children ) @staticmethod @@ -137,7 +137,7 @@ cdef class Column: cpdef DataType type(self): """The type of data in the column.""" - return self.data_type + return self._data_type cpdef Column child(self, size_type index) noexcept: """Get a child column of this column. @@ -152,7 +152,7 @@ cdef class Column: Column The child column. """ - return self.children[index] + return self._children[index] cpdef size_type num_children(self) noexcept: """The number of children of this column.""" @@ -161,6 +161,12 @@ cdef class Column: cpdef list_view(self): return ListColumnView(self) + cpdef gpumemoryview data(self): + return self._data + + cpdef gpumemoryview null_mask(self): + return self._mask + cdef class ListColumnView: """Accessor for methods of a Column that are specific to lists.""" From c0816e9c8c7e84a861eacfa53ceb8442fd87995a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 17 Aug 2023 11:58:03 -0700 Subject: [PATCH 5/6] Use pylibcudf only in pure Python mode --- python/cudf/cudf/_lib/column.pxd | 3 +-- python/cudf/cudf/_lib/column.pyx | 13 +++++-------- python/cudf/cudf/_lib/copying.pyx | 4 ++-- python/cudf/cudf/_lib/types.pxd | 3 +-- python/cudf/cudf/_lib/types.pyx | 9 ++++----- python/cudf/cudf/_lib/utils.pxd | 3 +-- python/cudf/cudf/_lib/utils.pyx | 4 +--- 7 files changed, 15 insertions(+), 24 deletions(-) diff --git a/python/cudf/cudf/_lib/column.pxd b/python/cudf/cudf/_lib/column.pxd index fbdf6288538..7ffb55a6cc6 100644 --- a/python/cudf/cudf/_lib/column.pxd +++ b/python/cudf/cudf/_lib/column.pxd @@ -7,7 +7,6 @@ from libcpp.memory cimport unique_ptr from rmm._lib.device_buffer cimport device_buffer -from cudf._lib cimport pylibcudf from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.column.column_view cimport column_view, mutable_column_view from cudf._lib.cpp.types cimport size_type @@ -30,7 +29,7 @@ cdef class Column: cdef column_view _view(self, size_type null_count) except * cdef column_view view(self) except * cdef mutable_column_view mutable_view(self) except * - cpdef pylibcudf.Column to_pylibcudf(self, mode: Literal["read", "write"]) + cpdef to_pylibcudf(self, mode: Literal["read", "write"]) @staticmethod cdef Column from_unique_ptr( diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx index 36a5ccd9140..a320f1c3425 100644 --- a/python/cudf/cudf/_lib/column.pyx +++ b/python/cudf/cudf/_lib/column.pyx @@ -11,9 +11,6 @@ import rmm import cudf import cudf._lib as libcudf from cudf._lib import pylibcudf - -from cudf._lib cimport pylibcudf - from cudf.api.types import is_categorical_dtype, is_datetime64tz_dtype from cudf.core.buffer import ( Buffer, @@ -447,7 +444,7 @@ cdef class Column: # underlying buffers as exposed before this function can itself be exposed # publicly. User requests to convert to pylibcudf must assume that the # data may be modified afterwards. - cpdef pylibcudf.Column to_pylibcudf(self, mode: Literal["read", "write"]): + cpdef to_pylibcudf(self, mode: Literal["read", "write"]): """Convert this Column to a pylibcudf.Column. This function will generate a pylibcudf Column pointing to the same @@ -477,9 +474,9 @@ cdef class Column: else: col = self - cdef pylibcudf.DataType dtype = dtype_to_pylibcudf_type(col.dtype) + dtype = dtype_to_pylibcudf_type(col.dtype) - cdef pylibcudf.gpumemoryview data = None + data = None if col.base_data is not None: cai = cuda_array_interface_wrapper( ptr=col.base_data.get_ptr(mode=mode), @@ -488,7 +485,7 @@ cdef class Column: ) data = pylibcudf.gpumemoryview(cai) - cdef pylibcudf.gpumemoryview mask = None + mask = None if self.nullable: # TODO: Are we intentionally use self's mask instead of col's? # Where is the mask stored for categoricals? @@ -587,7 +584,7 @@ cdef class Column: # TODO: Actually support exposed data pointers. @staticmethod def from_pylibcudf( - pylibcudf.Column col, bint data_ptr_exposed=False + col, bint data_ptr_exposed=False ): """Create a Column from a pylibcudf.Column. diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index 944a80158df..f57bc15ed57 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -11,9 +11,9 @@ from libcpp.vector cimport vector from rmm._lib.device_buffer cimport DeviceBuffer import cudf +from cudf._lib import pylibcudf from cudf.core.buffer import Buffer, acquire_spill_lock, as_buffer -from cudf._lib cimport pylibcudf from cudf._lib.column cimport Column from cudf._lib.scalar import as_device_scalar @@ -174,7 +174,7 @@ def gather( Column gather_map, bool nullify=False ): - cdef pylibcudf.Table tbl = pylibcudf.copying.gather( + tbl = pylibcudf.copying.gather( pylibcudf.Table([col.to_pylibcudf(mode="read") for col in columns]), gather_map.to_pylibcudf(mode="read"), pylibcudf.copying.OutOfBoundsPolicy.NULLIFY if nullify diff --git a/python/cudf/cudf/_lib/types.pxd b/python/cudf/cudf/_lib/types.pxd index 1eeaa23c260..a95db84ceff 100644 --- a/python/cudf/cudf/_lib/types.pxd +++ b/python/cudf/cudf/_lib/types.pxd @@ -4,7 +4,6 @@ from libc.stdint cimport int32_t from libcpp cimport bool cimport cudf._lib.cpp.types as libcudf_types -from cudf._lib cimport pylibcudf from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view @@ -18,5 +17,5 @@ ctypedef bool underlying_type_t_null_policy cdef dtype_from_column_view(column_view cv) cdef libcudf_types.data_type dtype_to_data_type(dtype) except * -cpdef pylibcudf.DataType dtype_to_pylibcudf_type(dtype) +cpdef dtype_to_pylibcudf_type(dtype) cdef bool is_decimal_type_id(libcudf_types.type_id tid) except * diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx index d6396157af4..8594e37ac4a 100644 --- a/python/cudf/cudf/_lib/types.pyx +++ b/python/cudf/cudf/_lib/types.pyx @@ -17,7 +17,6 @@ from cudf._lib.types cimport ( import cudf from cudf._lib import pylibcudf -from cudf._lib cimport pylibcudf size_type_dtype = np.dtype("int32") @@ -257,7 +256,7 @@ cdef libcudf_types.data_type dtype_to_data_type(dtype) except *: else: return libcudf_types.data_type(tid) -cpdef pylibcudf.DataType dtype_to_pylibcudf_type(dtype): +cpdef dtype_to_pylibcudf_type(dtype): if cudf.api.types.is_list_dtype(dtype): return pylibcudf.DataType(pylibcudf.TypeId.LIST) elif cudf.api.types.is_struct_dtype(dtype): @@ -282,7 +281,7 @@ cdef bool is_decimal_type_id(libcudf_types.type_id tid) except *: ) -def dtype_from_pylibcudf_lists_column(pylibcudf.Column col): +def dtype_from_pylibcudf_lists_column(col): child = col.list_view().child() tid = child.type().id() @@ -296,7 +295,7 @@ def dtype_from_pylibcudf_lists_column(pylibcudf.Column col): ) -def dtype_from_pylibcudf_structs_column(pylibcudf.Column col): +def dtype_from_pylibcudf_structs_column(col): fields = { str(i): dtype_from_pylibcudf_column(col.child(i)) for i in range(col.num_children()) @@ -304,7 +303,7 @@ def dtype_from_pylibcudf_structs_column(pylibcudf.Column col): return cudf.StructDtype(fields) -def dtype_from_pylibcudf_column(pylibcudf.Column col): +def dtype_from_pylibcudf_column(col): type_ = col.type() tid = type_.id() diff --git a/python/cudf/cudf/_lib/utils.pxd b/python/cudf/cudf/_lib/utils.pxd index f2cdc110b64..653fa8f2b8b 100644 --- a/python/cudf/cudf/_lib/utils.pxd +++ b/python/cudf/cudf/_lib/utils.pxd @@ -4,7 +4,6 @@ from libcpp.memory cimport unique_ptr from libcpp.string cimport string from libcpp.vector cimport vector -from cudf._lib cimport pylibcudf from cudf._lib.cpp.column.column cimport column_view from cudf._lib.cpp.table.table cimport table, table_view @@ -19,4 +18,4 @@ cdef table_view table_view_from_columns(columns) except * cdef table_view table_view_from_table(tbl, ignore_index=*) except* cdef columns_from_unique_ptr(unique_ptr[table] c_tbl) cdef columns_from_table_view(table_view tv, object owners) -cdef columns_from_pylibcudf_table(pylibcudf.Table table) +cdef columns_from_pylibcudf_table(tbl) diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx index 4815c705958..03982a58517 100644 --- a/python/cudf/cudf/_lib/utils.pyx +++ b/python/cudf/cudf/_lib/utils.pyx @@ -11,7 +11,6 @@ from libcpp.string cimport string from libcpp.utility cimport move from libcpp.vector cimport vector -from cudf._lib cimport pylibcudf from cudf._lib.column cimport Column from cudf._lib.cpp.column.column cimport column, column_view from cudf._lib.cpp.table.table cimport table @@ -247,7 +246,7 @@ cdef columns_from_unique_ptr( return columns -cdef columns_from_pylibcudf_table(pylibcudf.Table tbl): +cdef columns_from_pylibcudf_table(tbl): """Convert a pylibcudf table into list of columns. Parameters @@ -260,7 +259,6 @@ cdef columns_from_pylibcudf_table(pylibcudf.Table tbl): list[Column] A list of columns. """ - cdef pylibcudf.Column plc return [Column.from_pylibcudf(plc) for plc in tbl.columns()] From c18ced3f52884b0dfe4388d5279a896d84275c81 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 17 Aug 2023 16:12:56 -0700 Subject: [PATCH 6/6] Fix remaining issues --- python/cudf/cudf/_lib/column.pyx | 8 ++++---- python/cudf/cudf/_lib/pylibcudf/column.pxd | 10 +++++++--- python/cudf/cudf/_lib/pylibcudf/column.pyx | 17 ++++++++++++++--- .../cudf/cudf/_lib/pylibcudf/gpumemoryview.pxd | 7 +++++-- 4 files changed, 30 insertions(+), 12 deletions(-) diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx index a320f1c3425..4db3761b1b8 100644 --- a/python/cudf/cudf/_lib/column.pyx +++ b/python/cudf/cudf/_lib/column.pyx @@ -610,15 +610,15 @@ cdef class Column: return cudf.core.column.build_column( data=as_buffer(col.data().obj) if col.data() is not None else None, dtype=dtype, - size=col.size, + size=col.size(), mask=as_buffer( col.null_mask().obj ) if col.null_mask() is not None else None, - offset=col.offset, - null_count=col.null_count, + offset=col.offset(), + null_count=col.null_count(), children=tuple([ Column.from_pylibcudf(child) - for child in col.children + for child in col.children() ]) ) diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pxd b/python/cudf/cudf/_lib/pylibcudf/column.pxd index 6d7d9f4019d..2af87db5b03 100644 --- a/python/cudf/cudf/_lib/pylibcudf/column.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/column.pxd @@ -30,11 +30,15 @@ cdef class Column: @staticmethod cdef Column from_libcudf(unique_ptr[column] libcudf_col) - cpdef DataType type(self) noexcept - cpdef Column child(self, size_type index) noexcept - cpdef size_type num_children(self) noexcept + cpdef DataType type(self) + cpdef Column child(self, size_type index) + cpdef size_type num_children(self) + cpdef size_type size(self) + cpdef size_type null_count(self) + cpdef size_type offset(self) cpdef gpumemoryview data(self) cpdef gpumemoryview null_mask(self) + cpdef list children(self) cpdef list_view(self) diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pyx b/python/cudf/cudf/_lib/pylibcudf/column.pyx index 07c8d162afc..d9b2ca98ead 100644 --- a/python/cudf/cudf/_lib/pylibcudf/column.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/column.pyx @@ -74,7 +74,7 @@ cdef class Column: cdef vector[column_view] c_children with gil: if self._children is not None: - for child in self.children: + for child in self._children: # Need to cast to Column here so that Cython knows that # `view` returns a typed object, not a Python object. We # cannot use a typed variable for `child` because cdef @@ -139,7 +139,7 @@ cdef class Column: """The type of data in the column.""" return self._data_type - cpdef Column child(self, size_type index) noexcept: + cpdef Column child(self, size_type index): """Get a child column of this column. Parameters @@ -154,7 +154,7 @@ cdef class Column: """ return self._children[index] - cpdef size_type num_children(self) noexcept: + cpdef size_type num_children(self): """The number of children of this column.""" return self._num_children @@ -167,6 +167,17 @@ cdef class Column: cpdef gpumemoryview null_mask(self): return self._mask + cpdef size_type size(self): + return self._size + + cpdef size_type offset(self): + return self._offset + + cpdef size_type null_count(self): + return self._null_count + + cpdef list children(self): + return self._children cdef class ListColumnView: """Accessor for methods of a Column that are specific to lists.""" diff --git a/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pxd b/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pxd index 5e656744a8c..713697bd139 100644 --- a/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pxd @@ -2,5 +2,8 @@ cdef class gpumemoryview: - cdef Py_ssize_t ptr - cdef object obj + # TODO: Eventually probably want to make this opaque, but for now it's fine + # to treat this object as something like a POD struct + cdef readonly: + Py_ssize_t ptr + object obj