Skip to content

Commit

Permalink
Correct pandas metadata returned by pyarrow for list/struct dtypes
Browse files Browse the repository at this point in the history
  • Loading branch information
shwina committed Oct 6, 2020
1 parent 18877bd commit 5a17311
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
12 changes: 9 additions & 3 deletions python/cudf/cudf/_lib/parquet.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,15 @@ cpdef generate_pandas_metadata(Table table, index):
types,
)

md = metadata[b'pandas']
json_str = md.decode("utf-8")
return json_str
md_dict = json.loads(metadata[b"pandas"])

# correct metadata for list and struct types
for col_meta in md_dict["columns"]:
if col_meta["numpy_type"] in ("list", "struct"):
col_meta["numpy_type"] = "object"

return json.dumps(md_dict)


cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
skiprows=None, num_rows=None, strings_to_categorical=False,
Expand Down
3 changes: 0 additions & 3 deletions python/cudf/cudf/core/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,6 @@ def __repr__(self):
else:
return f"ListDtype({self.element_type})"

def __str__(self):
return "object"


class StructDtype(ExtensionDtype):

Expand Down

0 comments on commit 5a17311

Please sign in to comment.