Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python API for LIstMethods.len() #7283

Merged
merged 6 commits into from
Feb 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions python/cudf/cudf/_lib/cpp/lists/count_elements.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Copyright (c) 2021, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr

from cudf._lib.cpp.column.column cimport column
from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view

cdef extern from "cudf/lists/count_elements.hpp" namespace "cudf::lists" nogil:
cdef unique_ptr[column] count_elements(const lists_column_view) except +
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/cpp/lists/lists_column_view.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ from cudf._lib.cpp.column.column_view cimport (

cdef extern from "cudf/lists/lists_column_view.hpp" namespace "cudf" nogil:
cdef cppclass lists_column_view(column_view):
lists_column_view() except +
lists_column_view(const column_view& lists_column) except +
column_view parent() except +
column_view offsets() except +
Expand Down
34 changes: 34 additions & 0 deletions python/cudf/cudf/_lib/lists.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Copyright (c) 2021, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr, shared_ptr, make_shared
isVoid marked this conversation as resolved.
Show resolved Hide resolved
from libcpp.utility cimport move

from cudf._lib.cpp.lists.count_elements cimport (
count_elements as cpp_count_elements
)
from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view
from cudf._lib.cpp.column.column_view cimport column_view
from cudf._lib.cpp.column.column cimport column

from cudf._lib.column cimport Column


from cudf.core.dtypes import ListDtype


def count_elements(Column col):
if not isinstance(col.dtype, ListDtype):
raise TypeError("col is not a list column.")

# shared_ptr required because lists_column_view has no default
# ctor
cdef shared_ptr[lists_column_view] list_view = (
make_shared[lists_column_view](col.view())
)
cdef unique_ptr[column] c_result

with nogil:
c_result = move(cpp_count_elements(list_view.get()[0]))

result = Column.from_unique_ptr(move(c_result))
return result
25 changes: 25 additions & 0 deletions python/cudf/cudf/core/column/lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pyarrow as pa

import cudf
from cudf._lib.lists import count_elements
from cudf.core.buffer import Buffer
from cudf.core.column import ColumnBase, column
from cudf.core.column.methods import ColumnMethodsMixin
Expand Down Expand Up @@ -203,3 +204,27 @@ def leaves(self):
return self._return_or_inplace(
self._column.elements, retain_index=False
)

def len(self):
"""
Computes the length of each element in the Series/Index.

Returns
-------
Series or Index

Examples
--------
>>> s = cudf.Series([[1, 2, 3], None, [4, 5]])
>>> s
0 [1, 2, 3]
1 None
2 [4, 5]
dtype: list
>>> s.list.len()
0 3
1 <NA>
2 2
dtype: int32
"""
return self._return_or_inplace(count_elements(self._column))
23 changes: 23 additions & 0 deletions python/cudf/cudf/tests/test_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,26 @@ def test_listdtype_hash():
c = cudf.core.dtypes.ListDtype("int32")

assert hash(a) != hash(c)


@pytest.mark.parametrize(
"data",
[
[[]],
[[1, 2, 3], [4, 5]],
[[1, 2, 3], [], [4, 5]],
[[1, 2, 3], None, [4, 5]],
[[None, None], [None]],
[[[[[[1, 2, 3]]]]]],
cudf.Series([[1, 2]]).iloc[0:0],
cudf.Series([None, [1, 2]]).iloc[0:1],
],
)
def test_len(data):
gsr = cudf.Series(data)
psr = gsr.to_pandas()

expect = psr.map(lambda x: len(x) if x is not None else None)
got = gsr.list.len()

assert_eq(expect, got, check_dtype=False)