Skip to content

Commit

Permalink
Fix DataFrame initialization from list of dicts (#6632)
Browse files Browse the repository at this point in the history
closes #6628
* support instantiation from a list of dicts

Co-authored-by: GALI PREM SAGAR <[email protected]>
Co-authored-by: GALI PREM SAGAR <[email protected]>
  • Loading branch information
brandon-b-miller and galipremsagar authored Nov 13, 2020
1 parent 1d5eec6 commit bd564a0
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 9 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@
- PR #6728 Fix cudf python docs and associated build warnings
- PR #6732 Fix cuDF benchmarks build with static Arrow lib and fix rapids-compose cuDF JNI build
- PR #6742 Fix concat bug in dask_cudf Series/Index creation
- PR #6632 Fix DataFrame initialization from list of dicts


# cuDF 0.16.0 (21 Oct 2020)
Expand Down
24 changes: 15 additions & 9 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,18 +398,24 @@ def _init_from_list_like(self, data, index=None, columns=None):
index = as_index(index)

self._index = as_index(index)
data = list(itertools.zip_longest(*data))

if columns is not None and len(data) == 0:
data = [
cudf.core.column.column_empty(row_count=0, dtype=None)
for _ in columns
]
# list-of-dicts case
if len(data) > 0 and isinstance(data[0], dict):
data = DataFrame.from_pandas(pd.DataFrame(data))
self._data = data._data
else:
data = list(itertools.zip_longest(*data))

for col_name, col in enumerate(data):
self._data[col_name] = column.as_column(col)
if columns is not None and len(data) == 0:
data = [
cudf.core.column.column_empty(row_count=0, dtype=None)
for _ in columns
]

self.columns = columns
for col_name, col in enumerate(data):
self._data[col_name] = column.as_column(col)
if columns:
self.columns = columns

def _init_from_dict_like(self, data, index=None, columns=None):
data = data.copy()
Expand Down
18 changes: 18 additions & 0 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -7892,6 +7892,24 @@ def test_dataframe_to_pandas_nullable_dtypes(df, expected_pdf):
assert_eq(actual_pdf, expected_pdf)


@pytest.mark.parametrize(
"data",
[
[{"a": 1, "b": 2, "c": 3}, {"a": 4, "b": 5, "c": 6}],
[{"a": 1, "b": 2, "c": None}, {"a": None, "b": 5, "c": 6}],
[{"a": 1, "b": 2}, {"a": 1, "b": 5, "c": 6}],
[{"a": 1, "b": 2}, {"b": 5, "c": 6}],
[{}, {"a": 1, "b": 5, "c": 6}],
[{"a": 1, "b": 2, "c": 3}, {"a": 4.5, "b": 5.5, "c": 6.5}],
],
)
def test_dataframe_init_from_list_of_dicts(data):
expect = pd.DataFrame(data)
got = gd.DataFrame(data)

assert_eq(expect, got)


def test_dataframe_pipe():
pdf = pd.DataFrame()
gdf = gd.DataFrame()
Expand Down

0 comments on commit bd564a0

Please sign in to comment.