From 984a67a6c512b1dbecc61e196b4acd86b4b79822 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 17 Aug 2023 11:56:45 -0700 Subject: [PATCH] Make pylibcudf columns more opaque --- python/cudf/cudf/_lib/column.pyx | 6 ++-- python/cudf/cudf/_lib/pylibcudf/column.pxd | 16 +++++---- python/cudf/cudf/_lib/pylibcudf/column.pyx | 38 +++++++++++++--------- 3 files changed, 35 insertions(+), 25 deletions(-) diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx index 50b56f1d484..36a5ccd9140 100644 --- a/python/cudf/cudf/_lib/column.pyx +++ b/python/cudf/cudf/_lib/column.pyx @@ -611,10 +611,12 @@ cdef class Column: dtype = dtype_from_pylibcudf_column(col) return cudf.core.column.build_column( - data=as_buffer(col.data.obj) if col.data is not None else None, + data=as_buffer(col.data().obj) if col.data() is not None else None, dtype=dtype, size=col.size, - mask=as_buffer(col.mask.obj) if col.mask is not None else None, + mask=as_buffer( + col.null_mask().obj + ) if col.null_mask() is not None else None, offset=col.offset, null_count=col.null_count, children=tuple([ diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pxd b/python/cudf/cudf/_lib/pylibcudf/column.pxd index 20d11350904..6d7d9f4019d 100644 --- a/python/cudf/cudf/_lib/pylibcudf/column.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/column.pxd @@ -15,14 +15,14 @@ cdef class Column: # TODO: Should we document these attributes? Should we mark them readonly? cdef: # Core data - DataType data_type - size_type size - gpumemoryview data - gpumemoryview mask - size_type null_count - size_type offset + DataType _data_type + size_type _size + gpumemoryview _data + gpumemoryview _mask + size_type _null_count + size_type _offset # children: List[Column] - list children + list _children size_type _num_children cdef column_view view(self) nogil @@ -33,6 +33,8 @@ cdef class Column: cpdef DataType type(self) noexcept cpdef Column child(self, size_type index) noexcept cpdef size_type num_children(self) noexcept + cpdef gpumemoryview data(self) + cpdef gpumemoryview null_mask(self) cpdef list_view(self) diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pyx b/python/cudf/cudf/_lib/pylibcudf/column.pyx index bb958bead9d..07c8d162afc 100644 --- a/python/cudf/cudf/_lib/pylibcudf/column.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/column.pyx @@ -45,13 +45,13 @@ cdef class Column: gpumemoryview mask, size_type null_count, size_type offset, list children ): - self.data_type = data_type - self.size = size - self.data = data - self.mask = mask - self.null_count = null_count - self.offset = offset - self.children = children + self._data_type = data_type + self._size = size + self._data = data + self._mask = mask + self._null_count = null_count + self._offset = offset + self._children = children self._num_children = len(children) cdef column_view view(self) nogil: @@ -64,16 +64,16 @@ cdef class Column: cdef const void * data = NULL cdef const bitmask_type * null_mask = NULL - if self.data is not None: - data = int_to_void_ptr(self.data.ptr) - if self.mask is not None: - null_mask = int_to_bitmask_ptr(self.mask.ptr) + if self._data is not None: + data = int_to_void_ptr(self._data.ptr) + if self._mask is not None: + null_mask = int_to_bitmask_ptr(self._mask.ptr) # TODO: Check if children can ever change. If not, this could be # computed once in the constructor and always be reused. cdef vector[column_view] c_children with gil: - if self.children is not None: + if self._children is not None: for child in self.children: # Need to cast to Column here so that Cython knows that # `view` returns a typed object, not a Python object. We @@ -87,8 +87,8 @@ cdef class Column: c_children.push_back(( child).view()) return column_view( - self.data_type.c_obj, self.size, data, null_mask, - self.null_count, self.offset, c_children + self._data_type.c_obj, self._size, data, null_mask, + self._null_count, self._offset, c_children ) @staticmethod @@ -137,7 +137,7 @@ cdef class Column: cpdef DataType type(self): """The type of data in the column.""" - return self.data_type + return self._data_type cpdef Column child(self, size_type index) noexcept: """Get a child column of this column. @@ -152,7 +152,7 @@ cdef class Column: Column The child column. """ - return self.children[index] + return self._children[index] cpdef size_type num_children(self) noexcept: """The number of children of this column.""" @@ -161,6 +161,12 @@ cdef class Column: cpdef list_view(self): return ListColumnView(self) + cpdef gpumemoryview data(self): + return self._data + + cpdef gpumemoryview null_mask(self): + return self._mask + cdef class ListColumnView: """Accessor for methods of a Column that are specific to lists."""