rapidsai · rapids-bot · Jan 20, 2021 · Jan 16, 2021 · Jan 18, 2021 · Jan 18, 2021
@@ -1,4 +1,5 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+
 import copy
 import functools
 import operator
@@ -2056,20 +2057,44 @@ def from_arrow(cls, data):
         else:
             result = cudf_category_frame
 
-        # In a scenario where column is of type list/other non
-        # pandas types, there will be no pandas metadata associated with
-        # given arrow table as those types can only originate from
-        # arrow.
+        # There are some special cases that need to be handled
+        # based on metadata.
         if pandas_dtypes:
             for name in result._data.names:
-                if pandas_dtypes[name] == "categorical":
+                dtype = None
+                if (
+                    len(result._data[name]) == 0
+                    and pandas_dtypes[name] == "categorical"
+                ):
+                    # When pandas_dtype is a categorical column and the size
+                    # of column is 0(i.e., empty) then we will have an
+                    # int8 column in result._data[name] returned by libcudf,
+                    # which needs to be type-casted to 'category' dtype.
                     dtype = "category"
-                elif pandas_dtypes[name] == "bool":
-                    dtype = pandas_dtypes[name]
-                else:
+                elif (
+                    pandas_dtypes[name] == "empty"
+                    and np_dtypes[name] == "object"
+                ):
+                    # When a string column has all null values, pandas_dtype is
+                    # is specified as 'empty' and np_dtypes as 'object',
+                    # hence handling this special case to type-cast the empty
+                    # float column to str column.
                     dtype = np_dtypes[name]
+                elif pandas_dtypes[
+                    name
+                ] == "object" and cudf.utils.dtypes.is_struct_dtype(
+                    np_dtypes[name]
+                ):
+                    # Incase of struct column, libcudf is not aware of names of
+                    # struct fields, hence renaming the struct fields is
+                    # necessary by extracting the field names from arrow
+                    # struct types.
+                    result._data[name] = result._data[name]._rename_fields(
+                        [field.name for field in data[name].type]
+                    )
 
-                result._data[name] = result._data[name].astype(dtype)
+                if dtype is not None:
+                    result._data[name] = result._data[name].astype(dtype)
 
         result = libcudf.table.Table(
             result._data.select_by_label(column_names)

@@ -1,4 +1,5 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
+
 import array as arr
 import io
 import operator
@@ -8191,3 +8192,61 @@ def test_agg_for_dataframe_with_string_columns(aggs):
         ),
     ):
         gdf.agg(aggs)
+
+
+@pytest.mark.parametrize(
+    "gdf",
+    [
+        gd.DataFrame({"a": [[1], [2], [3]]}),
+        gd.DataFrame(
+            {
+                "left-a": [0, 1, 2],
+                "a": [[1], None, [3]],
+                "right-a": ["abc", "def", "ghi"],
+            }
+        ),
+        gd.DataFrame(
+            {
+                "left-a": [[], None, None],
+                "a": [[1], None, [3]],
+                "right-a": ["abc", "def", "ghi"],
+            }
+        ),
+    ],
+)
+def test_dataframe_roundtrip_arrow_list_dtype(gdf):
+    table = gdf.to_arrow()
+    expected = gd.DataFrame.from_arrow(table)
+
+    assert_eq(gdf, expected)
+
+
+@pytest.mark.parametrize(
+    "gdf",
+    [
+        gd.DataFrame({"a": [{"one": 3, "two": 4, "three": 10}]}),
+        gd.DataFrame(
+            {
+                "left-a": [0, 1, 2],
+                "a": [{"x": 0.23, "y": 43}, None, {"x": 23.9, "y": 4.3}],
+                "right-a": ["abc", "def", "ghi"],
+            }
+        ),
+        gd.DataFrame(
+            {
+                "left-a": [{"a": 1}, None, None],
+                "a": [
+                    {"one": 324, "two": 23432, "three": 324},
+                    None,
+                    {"one": 3.24, "two": 1, "three": 324},
+                ],
+                "right-a": ["abc", "def", "ghi"],
+            }
+        ),
+    ],
+)
+def test_dataframe_roundtrip_arrow_struct_dtype(gdf):
+    table = gdf.to_arrow()
+    expected = gd.DataFrame.from_arrow(table)
+
+    assert_eq(gdf, expected)