Skip to content

Commit

Permalink
Make pylibcudf columns more opaque
Browse files Browse the repository at this point in the history
  • Loading branch information
vyasr committed Aug 17, 2023
1 parent 9f7bea6 commit 984a67a
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 25 deletions.
6 changes: 4 additions & 2 deletions python/cudf/cudf/_lib/column.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -611,10 +611,12 @@ cdef class Column:
dtype = dtype_from_pylibcudf_column(col)

return cudf.core.column.build_column(
data=as_buffer(col.data.obj) if col.data is not None else None,
data=as_buffer(col.data().obj) if col.data() is not None else None,
dtype=dtype,
size=col.size,
mask=as_buffer(col.mask.obj) if col.mask is not None else None,
mask=as_buffer(
col.null_mask().obj
) if col.null_mask() is not None else None,
offset=col.offset,
null_count=col.null_count,
children=tuple([
Expand Down
16 changes: 9 additions & 7 deletions python/cudf/cudf/_lib/pylibcudf/column.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@ cdef class Column:
# TODO: Should we document these attributes? Should we mark them readonly?
cdef:
# Core data
DataType data_type
size_type size
gpumemoryview data
gpumemoryview mask
size_type null_count
size_type offset
DataType _data_type
size_type _size
gpumemoryview _data
gpumemoryview _mask
size_type _null_count
size_type _offset
# children: List[Column]
list children
list _children
size_type _num_children

cdef column_view view(self) nogil
Expand All @@ -33,6 +33,8 @@ cdef class Column:
cpdef DataType type(self) noexcept
cpdef Column child(self, size_type index) noexcept
cpdef size_type num_children(self) noexcept
cpdef gpumemoryview data(self)
cpdef gpumemoryview null_mask(self)

cpdef list_view(self)

Expand Down
38 changes: 22 additions & 16 deletions python/cudf/cudf/_lib/pylibcudf/column.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@ cdef class Column:
gpumemoryview mask, size_type null_count, size_type offset,
list children
):
self.data_type = data_type
self.size = size
self.data = data
self.mask = mask
self.null_count = null_count
self.offset = offset
self.children = children
self._data_type = data_type
self._size = size
self._data = data
self._mask = mask
self._null_count = null_count
self._offset = offset
self._children = children
self._num_children = len(children)

cdef column_view view(self) nogil:
Expand All @@ -64,16 +64,16 @@ cdef class Column:
cdef const void * data = NULL
cdef const bitmask_type * null_mask = NULL

if self.data is not None:
data = int_to_void_ptr(self.data.ptr)
if self.mask is not None:
null_mask = int_to_bitmask_ptr(self.mask.ptr)
if self._data is not None:
data = int_to_void_ptr(self._data.ptr)
if self._mask is not None:
null_mask = int_to_bitmask_ptr(self._mask.ptr)

# TODO: Check if children can ever change. If not, this could be
# computed once in the constructor and always be reused.
cdef vector[column_view] c_children
with gil:
if self.children is not None:
if self._children is not None:
for child in self.children:
# Need to cast to Column here so that Cython knows that
# `view` returns a typed object, not a Python object. We
Expand All @@ -87,8 +87,8 @@ cdef class Column:
c_children.push_back((<Column> child).view())

return column_view(
self.data_type.c_obj, self.size, data, null_mask,
self.null_count, self.offset, c_children
self._data_type.c_obj, self._size, data, null_mask,
self._null_count, self._offset, c_children
)

@staticmethod
Expand Down Expand Up @@ -137,7 +137,7 @@ cdef class Column:

cpdef DataType type(self):
"""The type of data in the column."""
return self.data_type
return self._data_type

cpdef Column child(self, size_type index) noexcept:
"""Get a child column of this column.
Expand All @@ -152,7 +152,7 @@ cdef class Column:
Column
The child column.
"""
return self.children[index]
return self._children[index]

cpdef size_type num_children(self) noexcept:
"""The number of children of this column."""
Expand All @@ -161,6 +161,12 @@ cdef class Column:
cpdef list_view(self):
return ListColumnView(self)

cpdef gpumemoryview data(self):
return self._data

cpdef gpumemoryview null_mask(self):
return self._mask


cdef class ListColumnView:
"""Accessor for methods of a Column that are specific to lists."""
Expand Down

0 comments on commit 984a67a

Please sign in to comment.