rapidsai · rapids-bot · Jul 20, 2021 · Jul 13, 2021 · Jul 13, 2021 · Jul 13, 2021
@@ -2026,7 +2026,6 @@ def as_column(
         mask = bools_to_mask(as_column(mask).unary_operator("not"))
 
         data = data.set_mask(mask)
-
     else:
         try:
             data = as_column(
@@ -2098,6 +2097,17 @@ def as_column(
                 elif is_interval_dtype(dtype):
                     sr = pd.Series(arbitrary, dtype="interval")
                     data = as_column(sr, nan_as_null=nan_as_null, dtype=dtype)
+                elif (
+                    isinstance(arbitrary, Sequence)
+                    and len(arbitrary) > 0
+                    and any(
+                        cudf.utils.dtypes.is_column_like(arb)
+                        for arb in arbitrary
+                    )
+                ):
+                    return cudf.core.column.ListColumn.from_sequences(
+                        arbitrary
+                    )
                 else:
                     data = as_column(
                         _construct_array(arbitrary, dtype),

@@ -1,6 +1,7 @@
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
 import pickle
+from typing import Sequence, cast
 
 import numpy as np
 import pyarrow as pa
@@ -278,6 +279,46 @@ def leaves(self):
         else:
             return self.elements
 
+    @classmethod
+    def from_sequences(cls, arbitrary: Sequence[ColumnLike]):
+        """
+        Create a list column for list of column-like sequences
+        """
+        data_col = column.column_empty(0)
+        mask_col = []
+        lengths_col = []
+
+        # Build Data & Mask
+        for data in arbitrary:
+            if cudf._lib.scalar._is_null_host_scalar(data):
+                mask_col.append(False)
+                lengths_col.append(0)
+            else:
+                mask_col.append(True)
+                data_col = data_col.append(as_column(data))
+                lengths_col.append(len(data))
+
+        # Build offsets
+        offset_col = column.column_empty(
+            row_count=len(arbitrary) + 1, dtype="int32"
+        )
+        offset_col[0] = 0
+        offset_col[1:] = lengths_col
+        offset_col = cast(
+            cudf.core.column.NumericalColumn, offset_col
+        )._apply_scan_op("sum")
+
+        # Build ListColumn
+        res = cls(
+            size=len(arbitrary),
+            dtype=cudf.ListDtype(data_col.dtype),
+            mask=cudf._lib.transform.bools_to_mask(as_column(mask_col)),
+            offset=0,
+            null_count=0,
+            children=(offset_col, data_col),
+        )
+        return res
+
 
 class ListMethods(ColumnMethods):
     """

@@ -241,7 +241,7 @@ def __init__(
         if isinstance(data, dict):
             index = data.keys()
             data = column.as_column(
-                data.values(), nan_as_null=nan_as_null, dtype=dtype
+                list(data.values()), nan_as_null=nan_as_null, dtype=dtype
             )
 
         if data is None:

@@ -4,6 +4,7 @@
 import re
 from string import ascii_letters, digits
 
+import cupy as cp
 import numpy as np
 import pandas as pd
 import pytest
@@ -1203,3 +1204,29 @@ def test_explode(data, ignore_index, p_index):
             assert_eq(expect, got, check_dtype=False)
     else:
         assert_eq(expect, got, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "data, expected",
+    [
+        (
+            [cudf.Series([1, 2, 3]), cudf.Series([10, 20])],
+            cudf.Series([[1, 2, 3], [10, 20]]),
+        ),
+        (
+            [cudf.Series([1, 2, 3]), None, cudf.Series([10, 20, np.nan])],
+            cudf.Series([[1, 2, 3], None, [10, 20, np.nan]]),
+        ),
+        (
+            [cp.array([5, 6]), cudf.NA, cp.array([1])],
+            cudf.Series([[5, 6], None, [1]]),
+        ),
+        (
+            [None, None, None, None, None, cudf.Series([10, 20])],
+            cudf.Series([None, None, None, None, None, [10, 20]]),
+        ),
+    ],
+)
+def test_nested_series_from_sequence_data(data, expected):
+    actual = cudf.Series(data)
+    assert_eq(actual, expected)
@@ -580,3 +580,17 @@ def test_groupby_agg_redirect(aggregations):
 )
 def test_is_supported(arg):
     assert _is_supported(arg, {"supported"}) is False
+
+
+def test_groupby_unique_lists():
+    df = pd.DataFrame({"a": [0, 0, 0, 1, 1, 1], "b": [10, 10, 10, 7, 8, 9]})
+    ddf = dd.from_pandas(df, 2)
+    gdf = cudf.from_pandas(df)
+    gddf = dask_cudf.from_cudf(gdf, 2)
+    dd.assert_eq(
+        ddf.groupby("a").b.unique().compute(),
+        gddf.groupby("a").b.unique().compute(),
+    )
+    dd.assert_eq(
+        gdf.groupby("a").b.unique(), gddf.groupby("a").b.unique().compute(),
+    )