rapidsai · rapids-bot · Jul 20, 2021 · Jul 13, 2021 · Jul 13, 2021 · Jul 13, 2021
@@ -2026,7 +2026,12 @@ def as_column(
         mask = bools_to_mask(as_column(mask).unary_operator("not"))
 
         data = data.set_mask(mask)
-
+    elif (
+        isinstance(arbitrary, list)
+        and len(arbitrary) > 0
+        and cudf.utils.dtypes.is_column_like(arbitrary[0])
+    ):
+        return _create_list_column_from_sequences_list(arbitrary)
     else:
         try:
             data = as_column(
@@ -2371,3 +2376,43 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
                 ) from e
             raise
     return col
+
+
+def _create_list_column_from_sequences_list(arbitrary: List[ColumnLike]):
+    """
+    Create a list column for list of column-like sequences
+    """
+    data_col = as_column(arbitrary[0])
+    lengths_col = [len(data_col)]
+    mask_col = [True]
+
+    # Build Data & Mask
+    for data in arbitrary[1:]:
+        if cudf._lib.scalar._is_null_host_scalar(data):
+            mask_col.append(False)
+            lengths_col.append(0)
+        else:
+            mask_col.append(True)
+            data_col = data_col.append(as_column(data))
+            lengths_col.append(len(data))
+
+    # Build offsets
+    offset_col = cudf.core.column.column_empty(
+        row_count=len(arbitrary) + 1, dtype="int32"
+    )
+    offset_col[0] = 0
+    offset_col[1:] = lengths_col
+    offset_col = cast(
+        cudf.core.column.NumericalColumn, offset_col
+    )._apply_scan_op("sum")
+
+    # Build ListColumn
+    res = cudf.core.column.ListColumn(
+        size=len(arbitrary),
+        dtype=cudf.ListDtype(data_col.dtype),
+        mask=cudf._lib.transform.bools_to_mask(as_column(mask_col)),
+        offset=0,
+        null_count=0,
+        children=(offset_col, data_col),
+    )
+    return res
@@ -241,7 +241,7 @@ def __init__(
         if isinstance(data, dict):
             index = data.keys()
             data = column.as_column(
-                data.values(), nan_as_null=nan_as_null, dtype=dtype
+                list(data.values()), nan_as_null=nan_as_null, dtype=dtype
             )
 
         if data is None:

@@ -4,6 +4,7 @@
 import re
 from string import ascii_letters, digits
 
+import cupy as cp
 import numpy as np
 import pandas as pd
 import pytest
@@ -1203,3 +1204,25 @@ def test_explode(data, ignore_index, p_index):
             assert_eq(expect, got, check_dtype=False)
     else:
         assert_eq(expect, got, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "data, expected",
+    [
+        (
+            [cudf.Series([1, 2, 3]), cudf.Series([10, 20])],
+            cudf.Series([[1, 2, 3], [10, 20]]),
+        ),
+        (
+            [cudf.Series([1, 2, 3]), None, cudf.Series([10, 20, np.nan])],
+            cudf.Series([[1, 2, 3], None, [10, 20, np.nan]]),
+        ),
+        (
+            [cp.array([5, 6]), cudf.NA, cp.array([1])],
+            cudf.Series([[5, 6], None, [1]]),
+        ),
+    ],
+)
+def test_nested_series_from_sequence_data(data, expected):
+    actual = cudf.Series(data)
+    assert_eq(actual, expected)
@@ -7,12 +7,12 @@
 import dask
 from dask import dataframe as dd
 
-import cudf
-from cudf.core._compat import PANDAS_GE_120
-
 import dask_cudf
 from dask_cudf.groupby import _is_supported
 
+import cudf
+from cudf.core._compat import PANDAS_GE_120
+
 
 @pytest.mark.parametrize("aggregation", ["sum", "mean", "count", "min", "max"])
 def test_groupby_basic_aggs(aggregation):
@@ -580,3 +580,14 @@ def test_groupby_agg_redirect(aggregations):
 )
 def test_is_supported(arg):
     assert _is_supported(arg, {"supported"}) is False
+
+
+def test_groupby_unique_lists():
+    df = pd.DataFrame({"a": [0, 0, 0, 1, 1, 1], "b": [10, 10, 10, 7, 8, 9]})
+    ddf = dd.from_pandas(df, 2)
+    gdf = cudf.from_pandas(df)
+    gddf = dask_cudf.from_cudf(gdf, 2)
+    dd.assert_eq(
+        ddf.groupby("a").b.unique().compute(),
+        gddf.groupby("a").b.unique().compute(),
+    )