Skip to content

Commit

Permalink
Clean up misc, unneeded pylibcudf.libcudf in cudf._lib (rapidsai#17309)
Browse files Browse the repository at this point in the history
* Removed `ctypedef const scalar constscalar` usage
* Use `dtype_to_pylibcudf_type` where appropriate
* Use pylibcudf enums instead of `pylibcudf.libcudf` types

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: rapidsai#17309
  • Loading branch information
mroeschke authored Nov 13, 2024
1 parent 13c7115 commit 353d2de
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 44 deletions.
4 changes: 0 additions & 4 deletions python/cudf/cudf/_lib/copying.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,10 @@ from libcpp.memory cimport make_unique
cimport pylibcudf.libcudf.contiguous_split as cpp_contiguous_split
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.scalar.scalar cimport scalar
from pylibcudf.libcudf.types cimport size_type

from cudf._lib.utils cimport columns_from_pylibcudf_table, data_from_table_view

# workaround for https://github.com/cython/cython/issues/3885
ctypedef const scalar constscalar


def _gather_map_is_valid(
gather_map: "cudf.core.column.ColumnBase",
Expand Down
4 changes: 0 additions & 4 deletions python/cudf/cudf/_lib/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ from cudf._lib.utils cimport columns_from_pylibcudf_table

from cudf._lib.scalar import as_device_scalar

from pylibcudf.libcudf.scalar.scalar cimport scalar

import pylibcudf

from cudf._lib.aggregation import make_aggregation
Expand Down Expand Up @@ -53,8 +51,6 @@ _DECIMAL_AGGS = {
"NUNIQUE",
"SUM",
}
# workaround for https://github.com/cython/cython/issues/3885
ctypedef const scalar constscalar


@singledispatch
Expand Down
32 changes: 7 additions & 25 deletions python/cudf/cudf/_lib/json.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Copyright (c) 2019-2024, NVIDIA CORPORATION.

import io
import os
from collections import abc

Expand All @@ -9,12 +8,9 @@ from cudf.core.buffer import acquire_spill_lock

from libcpp cimport bool

from pylibcudf.libcudf.types cimport data_type, type_id
from pylibcudf.types cimport DataType

from cudf._lib.column cimport Column
from cudf._lib.io.utils cimport add_df_col_struct_names
from cudf._lib.types cimport dtype_to_data_type
from cudf._lib.types cimport dtype_to_pylibcudf_type
from cudf._lib.utils cimport _data_from_columns, data_from_pylibcudf_io

import pylibcudf as plc
Expand Down Expand Up @@ -42,13 +38,9 @@ cpdef read_json(object filepaths_or_buffers,
# the encoded memoryview externally to ensure the encoded buffer
# isn't destroyed before calling libcudf `read_json()`

for idx in range(len(filepaths_or_buffers)):
if isinstance(filepaths_or_buffers[idx], io.StringIO):
filepaths_or_buffers[idx] = \
filepaths_or_buffers[idx].read().encode()
elif isinstance(filepaths_or_buffers[idx], str) and \
not os.path.isfile(filepaths_or_buffers[idx]):
filepaths_or_buffers[idx] = filepaths_or_buffers[idx].encode()
for idx, source in enumerate(filepaths_or_buffers):
if isinstance(source, str) and not os.path.isfile(source):
filepaths_or_buffers[idx] = source.encode()

# Setup arguments
if compression is not None:
Expand Down Expand Up @@ -181,15 +173,15 @@ def write_json(
)


cdef _get_cudf_schema_element_from_dtype(object dtype) except *:
def _get_cudf_schema_element_from_dtype(object dtype):
dtype = cudf.dtype(dtype)
if isinstance(dtype, cudf.CategoricalDtype):
raise NotImplementedError(
"CategoricalDtype as dtype is not yet "
"supported in JSON reader"
)

lib_type = DataType.from_libcudf(dtype_to_data_type(dtype))
lib_type = dtype_to_pylibcudf_type(dtype)
child_types = []

if isinstance(dtype, cudf.StructDtype):
Expand All @@ -202,23 +194,13 @@ cdef _get_cudf_schema_element_from_dtype(object dtype) except *:
_get_cudf_schema_element_from_dtype(dtype.element_type)

child_types = [
("offsets", DataType.from_libcudf(data_type(type_id.INT32)), []),
("offsets", plc.DataType(plc.TypeId.INT32), []),
("element", child_lib_type, grandchild_types)
]

return lib_type, child_types


cdef data_type _get_cudf_data_type_from_dtype(object dtype) except *:
dtype = cudf.dtype(dtype)
if isinstance(dtype, cudf.CategoricalDtype):
raise NotImplementedError(
"CategoricalDtype as dtype is not yet "
"supported in JSON reader"
)
return dtype_to_data_type(dtype)


def _dtype_to_names_list(col):
if isinstance(col.dtype, cudf.StructDtype):
return [(name, _dtype_to_names_list(child))
Expand Down
30 changes: 19 additions & 11 deletions python/cudf/cudf/_lib/lists.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,13 @@ from cudf.core.buffer import acquire_spill_lock

from libcpp cimport bool

from pylibcudf.libcudf.types cimport (
nan_equality, null_equality, null_order, order, size_type
)
from pylibcudf.libcudf.types cimport size_type

from cudf._lib.column cimport Column
from cudf._lib.utils cimport columns_from_pylibcudf_table

import pylibcudf as plc

from pylibcudf cimport Scalar


@acquire_spill_lock()
def count_elements(Column col):
Expand All @@ -39,8 +35,16 @@ def distinct(Column col, bool nulls_equal, bool nans_all_equal):
return Column.from_pylibcudf(
plc.lists.distinct(
col.to_pylibcudf(mode="read"),
null_equality.EQUAL if nulls_equal else null_equality.UNEQUAL,
nan_equality.ALL_EQUAL if nans_all_equal else nan_equality.UNEQUAL,
(
plc.types.NullEquality.EQUAL
if nulls_equal
else plc.types.NullEquality.UNEQUAL
),
(
plc.types.NanEquality.ALL_EQUAL
if nans_all_equal
else plc.types.NanEquality.UNEQUAL
),
)
)

Expand All @@ -50,8 +54,12 @@ def sort_lists(Column col, bool ascending, str na_position):
return Column.from_pylibcudf(
plc.lists.sort_lists(
col.to_pylibcudf(mode="read"),
order.ASCENDING if ascending else order.DESCENDING,
null_order.BEFORE if na_position == "first" else null_order.AFTER,
plc.types.Order.ASCENDING if ascending else plc.types.Order.DESCENDING,
(
plc.types.NullOrder.BEFORE
if na_position == "first"
else plc.types.NullOrder.AFTER
),
False,
)
)
Expand Down Expand Up @@ -82,7 +90,7 @@ def contains_scalar(Column col, py_search_key):
return Column.from_pylibcudf(
plc.lists.contains(
col.to_pylibcudf(mode="read"),
<Scalar> py_search_key.device_value.c_value,
py_search_key.device_value.c_value,
)
)

Expand All @@ -92,7 +100,7 @@ def index_of_scalar(Column col, object py_search_key):
return Column.from_pylibcudf(
plc.lists.index_of(
col.to_pylibcudf(mode="read"),
<Scalar> py_search_key.device_value.c_value,
py_search_key.device_value.c_value,
plc.lists.DuplicateFindOption.FIND_FIRST,
)
)
Expand Down
2 changes: 2 additions & 0 deletions python/pylibcudf/pylibcudf/io/types.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ cdef class SourceInfo:
raise FileNotFoundError(
errno.ENOENT, os.strerror(errno.ENOENT), src
)
# TODO: Keep the sources alive (self.byte_sources = sources)
# for str data (e.g. read_json)?
c_files.push_back(<string> str(src).encode())

self.c_obj = move(source_info(c_files))
Expand Down

0 comments on commit 353d2de

Please sign in to comment.