Skip to content

Commit

Permalink
Add a test
Browse files Browse the repository at this point in the history
  • Loading branch information
vyasr committed May 29, 2024
1 parent afd5522 commit 9f0c30b
Showing 1 changed file with 12 additions and 0 deletions.
12 changes: 12 additions & 0 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1984,6 +1984,18 @@ def test_from_arrow(nelem, data_type):
np.testing.assert_array_equal(s.to_pandas(), gs.to_numpy())


def test_from_arrow_chunked_categories():
# Verify that categories are properly deduplicated across chunked arrays.
indices = pa.array([0, 1, 0, 1, 2, 0, None, 2])
dictionary = pa.array(["foo", "bar", "baz"])
dict_array = pa.DictionaryArray.from_arrays(indices, dictionary)
chunked_array = pa.chunked_array([dict_array, dict_array])
table = pa.table({"a": chunked_array})
df = cudf.DataFrame.from_arrow(table)
final_dictionary = df["a"].dtype.categories.to_arrow().to_pylist()
assert sorted(final_dictionary) == sorted(dictionary.to_pylist())


@pytest.mark.parametrize("nelem", [0, 2, 3, 100, 1000])
@pytest.mark.parametrize("data_type", dtypes)
def test_to_arrow(nelem, data_type):
Expand Down

0 comments on commit 9f0c30b

Please sign in to comment.