rapidsai · rapids-bot · Mar 24, 2021 · Mar 19, 2021 · Mar 19, 2021 · Mar 19, 2021
diff --git a/docs/cudf/source/api.rst b/docs/cudf/source/api.rst
@@ -20,6 +20,13 @@ Series
     :inherited-members:
     :exclude-members: serialize, deserialize, logical_not, logical_or, logical_and, remainder, sum_of_squares, fill, merge, iteritems, items, device_deserialize, device_serialize, host_deserialize, host_serialize, to_dict, tolist, to_list
 
+Lists
+-----
+.. currentmodule:: cudf.core.column.lists
+
+.. autoclass:: ListMethods
+    :members:
+
 Strings
 -------
 .. currentmodule:: cudf.core.column.string
@@ -253,4 +260,4 @@ GpuArrowReader
 .. currentmodule:: cudf.comm.gpuarrow
 .. autoclass:: GpuArrowReader
     :members:
-    :exclude-members: count, index
+    :exclude-members: count, index
@@ -0,0 +1,15 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+
+from cudf._lib.cpp.types cimport order, null_order
+from cudf._lib.cpp.column.column cimport column
+from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view
+
+
+cdef extern from "cudf/lists/sorting.hpp" namespace "cudf::lists" nogil:
+    cdef unique_ptr[column] sort_lists(
+        const lists_column_view source_column,
+        order column_order,
+        null_order null_precedence
+    ) except +
@@ -10,23 +10,28 @@ from cudf._lib.cpp.lists.count_elements cimport (
 from cudf._lib.cpp.lists.explode cimport (
     explode_outer as cpp_explode_outer
 )
+from cudf._lib.cpp.lists.sorting cimport (
+    sort_lists as cpp_sort_lists
+)
 from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.column.column cimport column
 
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
-from cudf._lib.cpp.types cimport size_type
+from cudf._lib.cpp.types cimport size_type, order, null_order
 
 from cudf._lib.column cimport Column
 from cudf._lib.table cimport Table
 
+from cudf._lib.types cimport (
+    underlying_type_t_null_order, underlying_type_t_order
+)
+from cudf._lib.types import Order, NullOrder
 from cudf.core.dtypes import ListDtype
 
 
 def count_elements(Column col):
-    if not isinstance(col.dtype, ListDtype):
-        raise TypeError("col is not a list column.")
 
     # shared_ptr required because lists_column_view has no default
     # ctor
@@ -58,3 +63,22 @@ def explode_outer(Table tbl, int explode_column_idx, bool ignore_index=False):
         column_names=tbl._column_names,
         index_names=None if ignore_index else tbl._index_names
     )
+
+
+def sort_lists(Column col, object order_enum, object null_order_enum):
+    cdef shared_ptr[lists_column_view] list_view = (
+        make_shared[lists_column_view](col.view())
+    )
+    cdef order c_sort_order = <order><underlying_type_t_order>order_enum.value
+    cdef null_order c_null_prec = (
+        <null_order><underlying_type_t_null_order>null_order_enum.value
+    )
+
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_sort_lists(list_view.get()[0], c_sort_order, c_null_prec)
+        )
+
+    return Column.from_unique_ptr(move(c_result))
@@ -7,7 +7,8 @@
 
 import cudf
 from cudf._lib.copying import segmented_gather
-from cudf._lib.lists import count_elements
+from cudf._lib.lists import count_elements, sort_lists
+from cudf._lib.types import NullOrder, Order
 from cudf.core.buffer import Buffer
 from cudf.core.column import ColumnBase, as_column, column
 from cudf.core.column.methods import ColumnMethodsMixin
@@ -285,3 +286,62 @@ def take(self, lists_indices):
             raise
         else:
             return res
+
+    def sort_values(
+        self,
+        ascending=True,
+        inplace=False,
+        kind="quicksort",
+        na_position="last",
+        ignore_index=False,
+    ):
+        """
+        Sort each list by the values.
+
+        Sort the lists in ascending or descending order by some criterion.
+
+        Parameters
+        ----------
+        ascending : bool, default True
+            If True, sort values in ascending order, otherwise descending.
+        na_position : {'first', 'last'}, default 'last'
+            'first' puts nulls at the beginning, 'last' puts nulls at the end.
+        ignore_index : bool, default False
+            If True, the resulting axis will be labeled 0, 1, ..., n - 1.
+
+        Returns
+        -------
+        ListColumn with each list sorted
+
+        Notes
+        -----
+        Difference from pandas:
+          * Not supporting: `inplace`, `kind`
+
+        Examples
+        --------
+        >>> s = cudf.Series([[4, 2, None, 9], [8, 8, 2], [2, 1]])
+        >>> s.list.sort_values(ascending=True, na_position="last")
+        0    [2.0, 4.0, 9.0, nan]
+        1         [2.0, 8.0, 8.0]
+        2              [1.0, 2.0]
+        dtype: list
+        """
+        if inplace:
+            raise NotImplementedError("`inplace` not currently implemented.")
+        if kind != "quicksort":
+            raise NotImplementedError("`kind` not currently implemented.")
+        if na_position not in {"first", "last"}:
+            raise ValueError(f"Unknown `na_position` value {na_position}")
+        if is_list_dtype(self._column.children[1].dtype):
+            raise NotImplementedError("Nested lists sort is not supported.")
+
+        sort_order = Order.ASCENDING if ascending else Order.DESCENDING
+        null_order = (
+            NullOrder.BEFORE if na_position == "first" else NullOrder.AFTER
+        )
+
+        return self._return_or_inplace(
+            sort_lists(self._column, sort_order, null_order),
+            retain_index=not ignore_index,
+        )
@@ -3571,6 +3571,7 @@ def sort_values(
         4    3
         3    4
         1    5
+        dtype: int64
         """
 
         if inplace:

@@ -1,4 +1,5 @@
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
+import functools
 
 import pandas as pd
 import pyarrow as pa
@@ -159,3 +160,54 @@ def test_take_invalid(invalid, exception):
     gs = cudf.Series([[0, 1], [2, 3]])
     with exception:
         gs.list.take(invalid)
+
+
+def key_func_builder(x, na_position):
+    if x is None:
+        if na_position == "first":
+            return -1e8
+        else:
+            return 1e8
+    else:
+        return x
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [[4, 2, None, 9], [8, 8, 2], [2, 1]],
+        [[4, 2, None, 9], [8, 8, 2], None],
+        [[4, 2, None, 9], [], None],
+    ],
+)
+@pytest.mark.parametrize(
+    "index",
+    [
+        None,
+        pd.Index(["a", "b", "c"]),
+        pd.MultiIndex.from_tuples(
+            [(0, "a"), (0, "b"), (1, "a")], names=["l0", "l1"]
+        ),
+    ],
+)
+@pytest.mark.parametrize("ascending", [True, False])
+@pytest.mark.parametrize("na_position", ["first", "last"])
+@pytest.mark.parametrize("ignore_index", [True, False])
+def test_sort_values(data, index, ascending, na_position, ignore_index):
+    key_func = functools.partial(key_func_builder, na_position=na_position)
+
+    ps = pd.Series(data, index=index)
+    gs = cudf.from_pandas(ps)
+
+    expected = ps.apply(
+        lambda x: sorted(x, key=key_func, reverse=not ascending)
+        if x is not None
+        else None
+    )
+    if ignore_index:
+        expected.reset_index(drop=True, inplace=True)
+    got = gs.list.sort_values(
+        ascending=ascending, na_position=na_position, ignore_index=ignore_index
+    )
+
+    assert_eq(expected, got)
-Original file line number
+Diff line change
@@ Expand Up / @@ -3571,6 +3571,7 @@ def sort_values( @@
 3
 4
 5
+            dtype: int64
             """
             if inplace:
@@ Expand Down @@