rapidsai · rapids-bot · Sep 14, 2021 · Sep 9, 2021 · Sep 9, 2021 · Sep 9, 2021
@@ -235,7 +235,7 @@ cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
                     idx = idx[skiprows:]
                 if num_rows is not None:
                     idx = idx[:num_rows]
-            df.index = idx
+            df._index = idx
         elif set(index_col).issubset(column_names):
             index_data = df[index_col]
             actual_index_names = list(index_col_names.values())
@@ -250,7 +250,7 @@ cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
                     names=actual_index_names
                 )
             df.drop(columns=index_col, inplace=True)
-            df.index = idx
+            df._index = idx
         else:
             if use_pandas_metadata:
                 df.index.names = index_col

@@ -562,7 +562,7 @@ def _get_column_selection(self, arg):
 
 
 def _normalize_dtypes(df):
-    if len(df.columns) > 0:
+    if df._num_columns > 0:
         dtypes = df.dtypes.values.tolist()
         normalized_dtype = np.result_type(*dtypes)
         for name, col in df._data.items():

@@ -28,23 +28,22 @@ def read_metadata(*args, **kwargs):
         meta, stats, parts, index = ArrowDatasetEngine.read_metadata(
             *args, **kwargs
         )
+        new_meta = cudf.from_pandas(meta)
         if parts:
             # Re-set "object" dtypes align with pa schema
             set_object_dtypes_from_pa_schema(
-                meta, parts[0].get("common_kwargs", {}).get("schema", None),
+                new_meta,
+                parts[0].get("common_kwargs", {}).get("schema", None),
             )
 
         # If `strings_to_categorical==True`, convert objects to int32
         strings_to_cats = kwargs.get("strings_to_categorical", False)
-
-        new_meta = cudf.DataFrame(index=meta.index)
-        for col in meta.columns:
-            if meta[col].dtype == "O":
-                new_meta[col] = as_column(
-                    meta[col], dtype="int32" if strings_to_cats else "object"
-                )
-            else:
-                new_meta[col] = as_column(meta[col])
+        for col in new_meta._data.names:
+            if (
+                isinstance(new_meta._data[col], cudf.core.column.StringColumn)
+                and strings_to_cats
+            ):
+                new_meta._data[col] = new_meta._data[col].astype("int32")
 
         return (new_meta, stats, parts, index)
 
@@ -338,9 +337,11 @@ def set_object_dtypes_from_pa_schema(df, schema):
     # "object" dtypes to agree with a specific
     # pyarrow schema.
     if schema:
-        for name in df.columns:
-            if name in schema.names and df[name].dtype == "O":
-                df[name] = df[name].astype(
+        for name in df._data.names:
+            if name in schema.names and isinstance(
+                df._data[name], cudf.core.column.StringColumn
+            ):
+                df._data[name] = df._data[name].astype(
                     cudf_dtype_from_pa_type(schema.field(name).type)
                 )