Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add json APIs to pylibcudf #17025

Merged
merged 5 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ This page provides API documentation for pylibcudf.
groupby
interop
join
json
labeling
lists
merge
Expand Down
6 changes: 6 additions & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/json.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
====
json
====

.. automodule:: pylibcudf.json
:members:
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/strings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
)
from cudf._lib.strings.find_multiple import find_multiple
from cudf._lib.strings.findall import find_re, findall
from cudf._lib.strings.json import GetJsonObjectOptions, get_json_object
from cudf._lib.strings.json import get_json_object
from cudf._lib.strings.padding import center, ljust, pad, rjust, zfill
from cudf._lib.strings.repeat import repeat_scalar, repeat_sequence
from cudf._lib.strings.replace import (
Expand Down
80 changes: 11 additions & 69 deletions python/cudf/cudf/_lib/strings/json.pyx
Original file line number Diff line number Diff line change
@@ -1,84 +1,26 @@
# Copyright (c) 2021-2024, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
import pylibcudf as plc
from pylibcudf.json cimport GetJsonObjectOptions

from cudf.core.buffer import acquire_spill_lock

from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.scalar.scalar cimport string_scalar
from pylibcudf.libcudf.strings.json cimport (
get_json_object as cpp_get_json_object,
get_json_object_options,
)

from cudf._lib.column cimport Column
from cudf._lib.scalar cimport DeviceScalar


@acquire_spill_lock()
def get_json_object(
Column col, object py_json_path, GetJsonObjectOptions options):
Column col,
object py_json_path,
GetJsonObjectOptions options
):
"""
Apply a JSONPath string to all rows in an input column
of json strings.
"""
cdef unique_ptr[column] c_result

cdef column_view col_view = col.view()
cdef DeviceScalar json_path = py_json_path.device_value

cdef const string_scalar* scalar_json_path = <const string_scalar*>(
json_path.get_raw_ptr()
plc_column = plc.json.get_json_object(
col.to_pylibcudf(mode="read"),
py_json_path.device_value.c_value,
options
)

with nogil:
c_result = move(cpp_get_json_object(
col_view,
scalar_json_path[0],
options.options,
))

return Column.from_unique_ptr(move(c_result))


cdef class GetJsonObjectOptions:
cdef get_json_object_options options

def __init__(
self,
*,
allow_single_quotes=False,
strip_quotes_from_single_strings=True,
missing_fields_as_nulls=False
):
self.options.set_allow_single_quotes(allow_single_quotes)
self.options.set_strip_quotes_from_single_strings(
strip_quotes_from_single_strings
)
self.options.set_missing_fields_as_nulls(missing_fields_as_nulls)

@property
def allow_single_quotes(self):
return self.options.get_allow_single_quotes()

@property
def strip_quotes_from_single_strings(self):
return self.options.get_strip_quotes_from_single_strings()

@property
def missing_fields_as_nulls(self):
return self.options.get_missing_fields_as_nulls()

@allow_single_quotes.setter
def allow_single_quotes(self, val):
self.options.set_allow_single_quotes(val)

@strip_quotes_from_single_strings.setter
def strip_quotes_from_single_strings(self, val):
self.options.set_strip_quotes_from_single_strings(val)

@missing_fields_as_nulls.setter
def missing_fields_as_nulls(self, val):
self.options.set_missing_fields_as_nulls(val)
return Column.from_pylibcudf(plc_column)
3 changes: 1 addition & 2 deletions python/cudf/cudf/core/column/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -2385,8 +2385,7 @@ def get_json_object(
0 [\n { "category": "reference",\n ...
dtype: object
"""

options = libstrings.GetJsonObjectOptions(
options = plc.json.GetJsonObjectOptions(
allow_single_quotes=allow_single_quotes,
strip_quotes_from_single_strings=(
strip_quotes_from_single_strings
Expand Down
1 change: 1 addition & 0 deletions python/pylibcudf/pylibcudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ set(cython_sources
groupby.pyx
interop.pyx
join.pyx
json.pyx
labeling.pyx
lists.pyx
merge.pyx
Expand Down
2 changes: 2 additions & 0 deletions python/pylibcudf/pylibcudf/__init__.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ from . cimport (
filling,
groupby,
join,
json,
labeling,
lists,
merge,
Expand Down Expand Up @@ -60,6 +61,7 @@ __all__ = [
"gpumemoryview",
"groupby",
"join",
"json",
"lists",
"merge",
"null_mask",
Expand Down
2 changes: 2 additions & 0 deletions python/pylibcudf/pylibcudf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
interop,
io,
join,
json,
labeling,
lists,
merge,
Expand Down Expand Up @@ -73,6 +74,7 @@
"interop",
"io",
"join",
"json",
"labeling",
"lists",
"merge",
Expand Down
16 changes: 16 additions & 0 deletions python/pylibcudf/pylibcudf/json.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from pylibcudf.column cimport Column
from pylibcudf.libcudf.json cimport get_json_object_options
from pylibcudf.scalar cimport Scalar


cdef class GetJsonObjectOptions:
cdef get_json_object_options options


cpdef Column get_json_object(
Column col,
Scalar json_path,
GetJsonObjectOptions options=*
)
154 changes: 154 additions & 0 deletions python/pylibcudf/pylibcudf/json.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from cython.operator cimport dereference
from libcpp cimport bool
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from pylibcudf.column cimport Column
from pylibcudf.libcudf cimport json as cpp_json
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.scalar.scalar cimport string_scalar
from pylibcudf.scalar cimport Scalar


cdef class GetJsonObjectOptions:
"""Settings for ``get_json_object()``"""
def __init__(
self,
*,
allow_single_quotes=False,
strip_quotes_from_single_strings=True,
missing_fields_as_nulls=False
):
self.set_allow_single_quotes(allow_single_quotes)
self.set_strip_quotes_from_single_strings(
strip_quotes_from_single_strings
)
self.set_missing_fields_as_nulls(missing_fields_as_nulls)

def get_allow_single_quotes(self):
"""
Returns true/false depending on whether single-quotes for representing strings
are allowed.

Returns
-------
bool
true if single-quotes are allowed, false otherwise.
"""
return self.options.get_allow_single_quotes()

def get_strip_quotes_from_single_strings(self):
"""
Returns true/false depending on whether individually returned string values have
their quotes stripped.

Returns
-------
bool
true if individually returned string values have their quotes stripped.
"""
return self.options.get_strip_quotes_from_single_strings()

def get_missing_fields_as_nulls(self):
"""
Whether a field not contained by an object is to be interpreted as null.

Returns
-------
bool
true if missing fields are interpreted as null.
"""
return self.options.get_missing_fields_as_nulls()

def set_allow_single_quotes(self, bool val):
"""
Set whether single-quotes for strings are allowed.

Parameters
----------
val : bool
Whether to allow single quotes

Returns
-------
None
"""
self.options.set_allow_single_quotes(val)

def set_strip_quotes_from_single_strings(self, bool val):
"""
Set whether individually returned string values have their quotes stripped.

Parameters
----------
val : bool
Whether to strip quotes from single strings.

Returns
-------
None
"""
self.options.set_strip_quotes_from_single_strings(val)

def set_missing_fields_as_nulls(self, bool val):
"""
Set whether missing fields are interpreted as null.

Parameters
----------
val : bool
Whether to treat missing fields as nulls.

Returns
-------
None
"""
self.options.set_missing_fields_as_nulls(val)


cpdef Column get_json_object(
Column col,
Scalar json_path,
GetJsonObjectOptions options=None
):
"""
Apply a JSONPath string to all rows in an input strings column.

For details, see :cpp:func:`cudf::get_json_object`

Parameters
----------
col : Column
The input strings column. Each row must contain a valid json string.

json_path : Scalar
The JSONPath string to be applied to each row.

options : GetJsonObjectOptions
Options for controlling the behavior of the function.

Returns
-------
Column
New strings column containing the retrieved json object strings.
"""
cdef unique_ptr[column] c_result
cdef string_scalar* c_json_path = <string_scalar*>(
json_path.c_obj.get()
)
if options is None:
options = GetJsonObjectOptions()

cdef cpp_json.get_json_object_options c_options = options.options

with nogil:
c_result = move(
cpp_json.get_json_object(
col.view(),
dereference(c_json_path),
c_options
)
)

return Column.from_libcudf(move(c_result))
4 changes: 4 additions & 0 deletions python/pylibcudf/pylibcudf/strings/__init__.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ from . cimport (
padding,
regex_flags,
regex_program,
repeat,
replace,
side_type,
slice,
Expand All @@ -33,9 +34,12 @@ __all__ = [
"convert",
"extract",
"find",
"find_multiple",
"findall",
"padding",
"regex_flags",
"regex_program",
"repeat",
"replace",
"slice",
"strip",
Expand Down
3 changes: 3 additions & 0 deletions python/pylibcudf/pylibcudf/strings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,12 @@
"convert",
"extract",
"find",
"find_multiple",
"findall",
"padding",
"regex_flags",
"regex_program",
"repeat",
"replace",
"slice",
"strip",
Expand Down
Loading
Loading