Skip to content

Commit

Permalink
Add string.convert.convert_ipv4 APIs to pylibcudf (#16994)
Browse files Browse the repository at this point in the history
Contributes to #15162

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #16994
  • Loading branch information
mroeschke authored Oct 5, 2024
1 parent c958d8e commit 33b8dfa
Show file tree
Hide file tree
Showing 8 changed files with 151 additions and 34 deletions.
42 changes: 12 additions & 30 deletions python/cudf/cudf/_lib/string_casting.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,6 @@ from pylibcudf.libcudf.strings.convert.convert_integers cimport (
is_hex as cpp_is_hex,
to_integers as cpp_to_integers,
)
from pylibcudf.libcudf.strings.convert.convert_ipv4 cimport (
integers_to_ipv4 as cpp_integers_to_ipv4,
ipv4_to_integers as cpp_ipv4_to_integers,
is_ipv4 as cpp_is_ipv4,
)
from pylibcudf.libcudf.types cimport data_type, type_id

from cudf._lib.types cimport underlying_type_t_type_id
Expand Down Expand Up @@ -569,14 +564,10 @@ def int2ip(Column input_col):
A Column with integer represented in string ipv4 format
"""

cdef column_view input_column_view = input_col.view()
cdef unique_ptr[column] c_result
with nogil:
c_result = move(
cpp_integers_to_ipv4(input_column_view))

return Column.from_unique_ptr(move(c_result))
plc_column = plc.strings.convert.convert_ipv4.integers_to_ipv4(
input_col.to_pylibcudf(mode="read")
)
return Column.from_pylibcudf(plc_column)


def ip2int(Column input_col):
Expand All @@ -592,14 +583,10 @@ def ip2int(Column input_col):
A Column with ipv4 represented as integer
"""

cdef column_view input_column_view = input_col.view()
cdef unique_ptr[column] c_result
with nogil:
c_result = move(
cpp_ipv4_to_integers(input_column_view))

return Column.from_unique_ptr(move(c_result))
plc_column = plc.strings.convert.convert_ipv4.ipv4_to_integers(
input_col.to_pylibcudf(mode="read")
)
return Column.from_pylibcudf(plc_column)


def is_ipv4(Column source_strings):
Expand All @@ -608,15 +595,10 @@ def is_ipv4(Column source_strings):
that have strings in IPv4 format. This format is nnn.nnn.nnn.nnn
where nnn is integer digits in [0,255].
"""
cdef unique_ptr[column] c_result
cdef column_view source_view = source_strings.view()

with nogil:
c_result = move(cpp_is_ipv4(
source_view
))

return Column.from_unique_ptr(move(c_result))
plc_column = plc.strings.convert.convert_ipv4.is_ipv4(
source_strings.to_pylibcudf(mode="read")
)
return Column.from_pylibcudf(plc_column)


def htoi(Column input_col, **kwargs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ from pylibcudf.libcudf.column.column_view cimport column_view
cdef extern from "cudf/strings/convert/convert_ipv4.hpp" namespace \
"cudf::strings" nogil:
cdef unique_ptr[column] ipv4_to_integers(
column_view input_col) except +
column_view input) except +

cdef unique_ptr[column] integers_to_ipv4(
column_view input_col) except +
column_view integers) except +

cdef unique_ptr[column] is_ipv4(
column_view source_strings
column_view input
) except +
2 changes: 1 addition & 1 deletion python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# =============================================================================

set(cython_sources convert_booleans.pyx convert_datetime.pyx convert_durations.pyx
convert_fixed_point.pyx
convert_fixed_point.pyx convert_ipv4.pyx
)

set(linked_libraries cudf::cudf)
Expand Down
1 change: 1 addition & 0 deletions python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ from . cimport (
convert_datetime,
convert_durations,
convert_fixed_point,
convert_ipv4,
)
1 change: 1 addition & 0 deletions python/pylibcudf/pylibcudf/strings/convert/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
convert_datetime,
convert_durations,
convert_fixed_point,
convert_ipv4,
)
10 changes: 10 additions & 0 deletions python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from pylibcudf.column cimport Column


cpdef Column ipv4_to_integers(Column input)

cpdef Column integers_to_ipv4(Column integers)

cpdef Column is_ipv4(Column input)
92 changes: 92 additions & 0 deletions python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from pylibcudf.column cimport Column
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.strings.convert cimport convert_ipv4 as cpp_convert_ipv4


cpdef Column ipv4_to_integers(Column input):
"""
Converts IPv4 addresses into integers.
For details, see cpp:func:`cudf::strings::ipv4_to_integers`
Parameters
----------
input : Column
Strings instance for this operation
Returns
-------
Column
New uint32 column converted from strings.
"""
cdef unique_ptr[column] c_result

with nogil:
c_result = move(
cpp_convert_ipv4.ipv4_to_integers(
input.view()
)
)

return Column.from_libcudf(move(c_result))


cpdef Column integers_to_ipv4(Column integers):
"""
Converts integers into IPv4 addresses as strings.
For details, see cpp:func:`cudf::strings::integers_to_ipv4`
Parameters
----------
integers : Column
Integer (uint32) column to convert.
Returns
-------
Column
New strings column.
"""
cdef unique_ptr[column] c_result

with nogil:
c_result = move(
cpp_convert_ipv4.integers_to_ipv4(
integers.view()
)
)

return Column.from_libcudf(move(c_result))


cpdef Column is_ipv4(Column input):
"""
Returns a boolean column identifying strings in which all
characters are valid for conversion to integers from IPv4 format.
For details, see cpp:func:`cudf::strings::is_ipv4`
Parameters
----------
input : Column
Strings instance for this operation.
Returns
-------
Column
New column of boolean results for each string.
"""
cdef unique_ptr[column] c_result

with nogil:
c_result = move(
cpp_convert_ipv4.is_ipv4(
input.view()
)
)

return Column.from_libcudf(move(c_result))
31 changes: 31 additions & 0 deletions python/pylibcudf/pylibcudf/tests/test_string_convert_ipv4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
import pyarrow as pa
import pylibcudf as plc
from utils import assert_column_eq


def test_ipv4_to_integers():
arr = pa.array(["123.45.67.890", None])
result = plc.strings.convert.convert_ipv4.ipv4_to_integers(
plc.interop.from_arrow(arr)
)
expected = pa.array([2066564730, None], type=pa.uint32())
assert_column_eq(result, expected)


def test_integers_to_ipv4():
arr = pa.array([1, 0, None], type=pa.uint32())
result = plc.strings.convert.convert_ipv4.integers_to_ipv4(
plc.interop.from_arrow(arr)
)
expected = pa.array(["0.0.0.1", "0.0.0.0", None])
assert_column_eq(result, expected)


def test_is_ipv4():
arr = pa.array(["0.0.0.1", "1.2.34", "A", None])
result = plc.strings.convert.convert_ipv4.is_ipv4(
plc.interop.from_arrow(arr)
)
expected = pa.array([True, False, False, None])
assert_column_eq(result, expected)

0 comments on commit 33b8dfa

Please sign in to comment.