rapidsai · rapids-bot · Oct 15, 2021 · Oct 15, 2021 · galipremsagar · Oct 15, 2021
@@ -228,9 +228,8 @@ def read_partition(
 
         if index and (index[0] in df.columns):
             df = df.set_index(index[0])
-        elif index is False and set(df.index.names).issubset(columns):
-            # If index=False, we need to make sure all of the
-            # names in `columns` are actually in `df.columns`
+        elif index is False and df.index.names != (None,):
+            # If index=False, we shouldn't have a named index
             df.reset_index(inplace=True)
 
         return df
@@ -331,6 +330,11 @@ def set_object_dtypes_from_pa_schema(df, schema):
     # pyarrow schema.
     if schema:
         for col_name, col in df._data.items():
+            if col_name is None:
+                # Pyarrow cannot handle `None` as a field name.
+                # However, this should be a simple range index that
+                # we can ignore anyway
+                continue
             typ = cudf_dtype_from_pa_type(schema.field(col_name).type)
             if (
                 col_name in schema.names

@@ -86,6 +86,17 @@ def test_roundtrip_from_dask_index_false(tmpdir):
     dd.assert_eq(ddf.reset_index(drop=False), ddf2)
 
 
+def test_roundtrip_from_dask_none_index_false(tmpdir):
+    tmpdir = str(tmpdir)
+    path = os.path.join(tmpdir, "test.parquet")
+
+    df2 = ddf.reset_index(drop=True).compute()
+    df2.to_parquet(path, engine="pyarrow")
+
+    ddf3 = dask_cudf.read_parquet(path, index=False)
+    dd.assert_eq(df2, ddf3)
-    tmpdir = str(tmpdir)
-    path = os.path.join(tmpdir, "test.parquet")
-
-    df2 = ddf.reset_index(drop=True).compute()
-    df2.to_parquet(path, engine="pyarrow")
-
-    ddf3 = dask_cudf.read_parquet(path, index=False)
-    dd.assert_eq(df2, ddf3)
+    bytes_buf = BytesIO()
+    df2 = ddf.reset_index(drop=True).compute()
+    df2.to_parquet(bytes_buf, engine="pyarrow")
+
+    ddf3 = dask_cudf.read_parquet(bytes_buf, index=False)
+    dd.assert_eq(df2, ddf3)
-    tmpdir = str(tmpdir)
-    path = os.path.join(tmpdir, "test.parquet")
-
-    df2 = ddf.reset_index(drop=True).compute()
-    df2.to_parquet(path, engine="pyarrow")
-
-    ddf3 = dask_cudf.read_parquet(path, index=False)
-    dd.assert_eq(df2, ddf3)
+    bytes_buf = BytesIO()
+    df2 = ddf.reset_index(drop=True).compute()
+    df2.to_parquet(bytes_buf, engine="pyarrow")
+
+    ddf3 = dask_cudf.read_parquet(bytes_buf, index=False)
+    dd.assert_eq(df2, ddf3)
+
+
 @pytest.mark.parametrize("write_meta", [True, False])
 def test_roundtrip_from_dask_cudf(tmpdir, write_meta):
     tmpdir = str(tmpdir)