diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 672e663d316..9d14d4bde7f 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -907,14 +907,24 @@ def _init_from_dict_like( if index is None: num_rows = 0 if data: - col_name = next(iter(data)) - if is_scalar(data[col_name]): - num_rows = num_rows or 1 - else: - data[col_name] = column.as_column( - data[col_name], nan_as_null=nan_as_null + keys, values, lengths = zip( + *( + (k, v, 1) + if is_scalar(v) + else ( + k, + vc := as_column(v, nan_as_null=nan_as_null), + len(vc), + ) + for k, v in data.items() ) - num_rows = len(data[col_name]) + ) + data = dict(zip(keys, values)) + try: + (num_rows,) = (set(lengths) - {1}) or {1} + except ValueError: + raise ValueError("All arrays must be the same length") + self._index = RangeIndex(0, num_rows) else: self._index = as_index(index) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 6a79555d43e..d71d0954624 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -10043,3 +10043,33 @@ def test_dataframe_from_arrow_slice(): actual = cudf.DataFrame.from_arrow(table_slice) assert_eq(expected, actual) + + +@pytest.mark.parametrize( + "data", + [ + {"a": [1, 2, 3], "b": ["x", "y", "z"], "c": 4}, + {"c": 4, "a": [1, 2, 3], "b": ["x", "y", "z"]}, + {"a": [1, 2, 3], "c": 4}, + ], +) +def test_dataframe_init_from_scalar_and_lists(data): + actual = cudf.DataFrame(data) + expected = pd.DataFrame(data) + + assert_eq(expected, actual) + + +def test_dataframe_init_length_error(): + assert_exceptions_equal( + lfunc=pd.DataFrame, + rfunc=cudf.DataFrame, + lfunc_args_and_kwargs=( + [], + {"data": {"a": [1, 2, 3], "b": ["x", "y", "z", "z"], "c": 4}}, + ), + rfunc_args_and_kwargs=( + [], + {"data": {"a": [1, 2, 3], "b": ["x", "y", "z", "z"], "c": 4}}, + ), + )