rapidsai · rapids-bot · Jun 30, 2021 · Apr 20, 2021 · Apr 26, 2021 · Apr 27, 2021
@@ -11,7 +11,6 @@
     Optional,
     Sequence,
     Tuple,
-    Union,
     cast,
 )
 
@@ -26,7 +25,7 @@
 from cudf._typing import ColumnLike, Dtype, ScalarLike
 from cudf.core.buffer import Buffer
 from cudf.core.column import column
-from cudf.core.column.methods import ColumnMethodsMixin
+from cudf.core.column.methods import ColumnMethodsMixin, ParentType
 from cudf.core.dtypes import CategoricalDtype
 from cudf.utils.dtypes import (
     is_categorical_dtype,
@@ -45,9 +44,6 @@
     )
 
 
-ParentType = Union["cudf.Series", "cudf.Index"]
-
-
 class CategoricalAccessor(ColumnMethodsMixin):
     _column: CategoricalColumn
 

@@ -14,13 +14,17 @@
     extract_element,
     sort_lists,
 )
+from cudf._typing import ColumnLike, ScalarLike
 from cudf.core.buffer import Buffer
 from cudf.core.column import ColumnBase, as_column, column
-from cudf.core.column.methods import ColumnMethodsMixin
+from cudf.core.column.methods import ColumnMethodsMixin, ParentType
+from cudf.core.dtypes import ListDtype
 from cudf.utils.dtypes import is_list_dtype, is_numerical_dtype
 
 
 class ListColumn(ColumnBase):
+    dtype: ListDtype
+
     def __init__(
         self, size, dtype, mask=None, offset=0, null_count=None, children=(),
     ):
@@ -177,14 +181,16 @@ class ListMethods(ColumnMethodsMixin):
     List methods for Series
     """
 
-    def __init__(self, column, parent=None):
+    _column: ListColumn
+
+    def __init__(self, column: ListColumn, parent: ParentType = None):
         if not is_list_dtype(column.dtype):
             raise AttributeError(
                 "Can only use .list accessor with a 'list' dtype"
             )
         super().__init__(column=column, parent=parent)
 
-    def get(self, index):
+    def get(self, index: int) -> ParentType:
         """
         Extract element at the given index from each component
 
@@ -216,10 +222,10 @@ def get(self, index):
         else:
             raise IndexError("list index out of range")
 
-    def contains(self, search_key):
+    def contains(self, search_key: ScalarLike) -> ParentType:
         """
-        Creates a column of bool values indicating whether the specified scalar
-        is an element of each row of a list column.
+        Returns boolean values indicating whether the specified scalar
+        is an element of each row.
 
         Parameters
         ----------
@@ -228,7 +234,7 @@ def contains(self, search_key):
 
         Returns
         -------
-        Column
+        Series or Index
 
         Examples
         --------
@@ -256,14 +262,14 @@ def contains(self, search_key):
             return res
 
     @property
-    def leaves(self):
+    def leaves(self) -> ParentType:
         """
         From a Series of (possibly nested) lists, obtain the elements from
         the innermost lists as a flat Series (one value per row).
 
         Returns
         -------
-        Series
+        Series or Index
 
         Examples
         --------
@@ -284,7 +290,7 @@ def leaves(self):
                 self._column.elements, retain_index=False
             )
 
-    def len(self):
+    def len(self) -> ParentType:
         """
         Computes the length of each element in the Series/Index.
 
@@ -308,18 +314,18 @@ def len(self):
         """
         return self._return_or_inplace(count_elements(self._column))
 
-    def take(self, lists_indices):
+    def take(self, lists_indices: ColumnLike) -> ParentType:
         """
         Collect list elements based on given indices.
 
         Parameters
         ----------
-        lists_indices: List type arrays
+        lists_indices: Series-like of lists
             Specifies what to collect from each row
 
         Returns
         -------
-        ListColumn
+        Series or Index
 
         Examples
         --------
@@ -363,14 +369,14 @@ def take(self, lists_indices):
         else:
             return res
 
-    def unique(self):
+    def unique(self) -> ParentType:
         """
-        Returns unique element for each list in the column, order for each
-        unique element is not guaranteed.
+        Returns the unique elements in each list.
+        The ordering of elements is not guaranteed.
 
         Returns
         -------
-        ListColumn
+        Series or Index
 
         Examples
         --------
@@ -400,12 +406,12 @@ def unique(self):
 
     def sort_values(
         self,
-        ascending=True,
-        inplace=False,
-        kind="quicksort",
-        na_position="last",
-        ignore_index=False,
-    ):
+        ascending: bool = True,
+        inplace: bool = False,
+        kind: str = "quicksort",
+        na_position: str = "last",
+        ignore_index: bool = False,
+    ) -> ParentType:
         """
         Sort each list by the values.
 
@@ -422,7 +428,7 @@ def sort_values(
 
         Returns
         -------
-        ListColumn with each list sorted
+        Series or Index with each list sorted
 
         Notes
         -----
@@ -451,3 +457,56 @@ def sort_values(
             sort_lists(self._column, ascending, na_position),
             retain_index=not ignore_index,
         )
+
+    def ravel(self) -> ParentType:
+        """
+        Removes one level of nesting from each row of the list Series.
+
+        Returns
+        -------
+        Series or Index with one level of nesting removed from each row.
+
+        Examples
+        --------
+        >>> s1
+        0      [[1.0, 2.0], [3.0, 4.0, 5.0]]
+        1    [[6.0, nan], [7.0], [8.0, 9.0]]
+        dtype: list
+        >>> s1.list.ravel()
+        0    [1.0, 2.0, 3.0, 4.0, 5.0]
+        1    [6.0, nan, 7.0, 8.0, 9.0]
+        dtype: list
+
+        Null values at the top-level in each row are dropped:
+
+        >>> s2
+        0    [[1.0, 2.0], None, [3.0, 4.0, 5.0]]
+        1        [[6.0, nan], [7.0], [8.0, 9.0]]
+        dtype: list
+        >>> s2.list.ravel()
+        0    [1.0, 2.0, 3.0, 4.0, 5.0]
+        1    [6.0, nan, 7.0, 8.0, 9.0]
+        dtype: list
+        """
+        result_dtype = self._column.dtype.element_type
+        if not isinstance(result_dtype, ListDtype):
+            return self._return_or_inplace(self._column)
+
+        self_offsets = self._column.children[0]
+        child_offsets = self._column.children[1].children[0]
+        result_offsets = child_offsets[self_offsets]
+        result_children = (
+            result_offsets,
+            self._column.children[1].children[1],
+        )
+
+        return self._return_or_inplace(
+            ListColumn(
+                self._column.size,
+                self._column.dtype.element_type,
+                self._column.mask,
+                self._column.offset,
+                self._column.null_count,
+                result_children,
+            )
+        )
@@ -11,6 +11,8 @@
 if TYPE_CHECKING:
     from cudf.core.column import ColumnBase
 
+ParentType = Union["cudf.Series", "cudf.Index"]
+
 
 class ColumnMethodsMixin:
     _column: ColumnBase

@@ -154,7 +154,7 @@
 from cudf._typing import ColumnLike, Dtype, ScalarLike
 from cudf.core.buffer import Buffer
 from cudf.core.column import column, datetime
-from cudf.core.column.methods import ColumnMethodsMixin
+from cudf.core.column.methods import ColumnMethodsMixin, ParentType
 from cudf.utils import utils
 from cudf.utils.docutils import copy_docstring
 from cudf.utils.dtypes import (
@@ -209,9 +209,6 @@
 }
 
 
-ParentType = Union["cudf.Series", "cudf.Index"]
-
-
 class StringMethods(ColumnMethodsMixin):
     def __init__(self, column, parent=None):
         """

@@ -1,5 +1,6 @@
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
 import functools
+import itertools
 
 import numpy as np
 import pandas as pd
@@ -315,3 +316,31 @@ def test_contains_null_search_key(data, expect):
     expect = cudf.Series(expect, dtype="bool")
     got = sr.list.contains(cudf.Scalar(cudf.NA, sr.dtype.element_type))
     assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "row",
+    [
+        [[]],
+        [[1]],
+        [[1, 2]],
+        [[1, 2], [3, 4, 5]],
+        [[1, 2, None], [3, 4, 5]],
+        [[[1, 2], [3, 4]], [[5, 6, 7], [8, 9]]],
+        [[["a", "c", "de", None], None, ["fg"]], [["abc", "de"], None]],
+    ],
+)
+def test_ravel(row):
+    def ravel(row):
+        return list(
+            itertools.chain.from_iterable([x for x in row if x is not None])
+        )
+
+    expect = pd.Series([ravel(row)])
+    got = cudf.Series([row]).list.ravel()
+    assert_eq(expect, got)
+
+
+def test_ravel_no_nesting():
+    s = cudf.Series([[1, 2], [3, 4, 5]])
+    assert_eq(s, s.list.ravel())