rapidsai · rapids-bot · Jul 20, 2021 · Jul 13, 2021 · Jul 13, 2021 · Jul 13, 2021
@@ -2026,7 +2026,6 @@ def as_column(
         mask = bools_to_mask(as_column(mask).unary_operator("not"))
 
         data = data.set_mask(mask)
-
     else:
         try:
             data = as_column(
@@ -2098,6 +2097,15 @@ def as_column(
                 elif is_interval_dtype(dtype):
                     sr = pd.Series(arbitrary, dtype="interval")
                     data = as_column(sr, nan_as_null=nan_as_null, dtype=dtype)
+                elif (
+                    isinstance(arbitrary, list)
+                    and len(arbitrary) > 0
+                    and any(
+                        cudf.utils.dtypes.is_column_like(arb)
+                        for arb in arbitrary
+                    )
+                ):
+                    return _create_list_column_from_sequences_list(arbitrary)
                 else:
                     data = as_column(
                         _construct_array(arbitrary, dtype),
@@ -2371,3 +2379,46 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
                 ) from e
             raise
     return col
+
+
+def _create_list_column_from_sequences_list(arbitrary: List[ColumnLike]):
+    """
+    Create a list column for list of column-like sequences
+    """
+    if cudf.utils.dtypes.is_column_like(arbitrary[0]):
+        data_col = as_column(arbitrary[0])
+        mask_col = [True]
+    else:
+        data_col = column_empty(row_count=0)
+        mask_col = [False]
+
+    lengths_col = [len(data_col)]
+
+    # Build Data & Mask
+    for data in arbitrary[1:]:
+        if cudf._lib.scalar._is_null_host_scalar(data):
+            mask_col.append(False)
+            lengths_col.append(0)
+        else:
+            mask_col.append(True)
+            data_col = data_col.append(as_column(data))
+            lengths_col.append(len(data))
+
+    # Build offsets
+    offset_col = column_empty(row_count=len(arbitrary) + 1, dtype="int32")
+    offset_col[0] = 0
+    offset_col[1:] = lengths_col
+    offset_col = cast(
+        cudf.core.column.NumericalColumn, offset_col
+    )._apply_scan_op("sum")
+
+    # Build ListColumn
+    res = cudf.core.column.ListColumn(
+        size=len(arbitrary),
+        dtype=cudf.ListDtype(data_col.dtype),
+        mask=cudf._lib.transform.bools_to_mask(as_column(mask_col)),
+        offset=0,
+        null_count=0,
+        children=(offset_col, data_col),
+    )
+    return res
@@ -241,7 +241,7 @@ def __init__(
         if isinstance(data, dict):
             index = data.keys()
             data = column.as_column(
-                data.values(), nan_as_null=nan_as_null, dtype=dtype
+                list(data.values()), nan_as_null=nan_as_null, dtype=dtype
             )
 
         if data is None:

@@ -4,6 +4,7 @@
 import re
 from string import ascii_letters, digits
 
+import cupy as cp
 import numpy as np
 import pandas as pd
 import pytest
@@ -1203,3 +1204,29 @@ def test_explode(data, ignore_index, p_index):
             assert_eq(expect, got, check_dtype=False)
     else:
         assert_eq(expect, got, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "data, expected",
+    [
+        (
+            [cudf.Series([1, 2, 3]), cudf.Series([10, 20])],
+            cudf.Series([[1, 2, 3], [10, 20]]),
+        ),
+        (
+            [cudf.Series([1, 2, 3]), None, cudf.Series([10, 20, np.nan])],
+            cudf.Series([[1, 2, 3], None, [10, 20, np.nan]]),
+        ),
+        (
+            [cp.array([5, 6]), cudf.NA, cp.array([1])],
+            cudf.Series([[5, 6], None, [1]]),
+        ),
+        (
+            [None, None, None, None, None, cudf.Series([10, 20])],
+            cudf.Series([None, None, None, None, None, [10, 20]]),
+        ),
+    ],
+)
+def test_nested_series_from_sequence_data(data, expected):
+    actual = cudf.Series(data)
+    assert_eq(actual, expected)
@@ -580,3 +580,17 @@ def test_groupby_agg_redirect(aggregations):
 )
 def test_is_supported(arg):
     assert _is_supported(arg, {"supported"}) is False
+
+
+def test_groupby_unique_lists():
+    df = pd.DataFrame({"a": [0, 0, 0, 1, 1, 1], "b": [10, 10, 10, 7, 8, 9]})
+    ddf = dd.from_pandas(df, 2)
+    gdf = cudf.from_pandas(df)
+    gddf = dask_cudf.from_cudf(gdf, 2)
+    dd.assert_eq(
+        ddf.groupby("a").b.unique().compute(),
+        gddf.groupby("a").b.unique().compute(),
+    )
+    dd.assert_eq(
+        gdf.groupby("a").b.unique(), gddf.groupby("a").b.unique().compute(),
+    )