Skip to content
/ cudf Public
forked from rapidsai/cudf

Commit

Permalink
Allow pd.ArrowDtype in cudf.from_pandas (rapidsai#13465)
Browse files Browse the repository at this point in the history
closes rapidsai#13003

Allow pandas objects with `ArrowDtype` to be accepted by `cudf.from_pandas`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: rapidsai#13465
  • Loading branch information
mroeschke authored Jun 14, 2023
1 parent 649cf5e commit 02be87b
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 8 deletions.
11 changes: 7 additions & 4 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -1990,10 +1990,13 @@ def as_column(
return col

elif isinstance(arbitrary, (pd.Series, pd.Categorical)):
if isinstance(arbitrary, pd.Series) and isinstance(
arbitrary.array, pd.core.arrays.masked.BaseMaskedArray
):
return as_column(arbitrary.array)
if isinstance(arbitrary, pd.Series):
if isinstance(
arbitrary.array, pd.core.arrays.masked.BaseMaskedArray
):
return as_column(arbitrary.array)
elif PANDAS_GE_150 and isinstance(arbitrary.dtype, pd.ArrowDtype):
return as_column(pa.array(arbitrary.array, from_pandas=True))
if is_categorical_dtype(arbitrary):
data = as_column(pa.array(arbitrary, from_pandas=True))
elif is_interval_dtype(arbitrary.dtype):
Expand Down
20 changes: 16 additions & 4 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1694,18 +1694,30 @@ def test_nonmatching_index_setitem(nrows):
assert_eq(gdf["c"].to_pandas(), gdf_series.to_pandas())


def test_from_pandas():
df = pd.DataFrame({"x": [1, 2, 3]}, index=[4.0, 5.0, 6.0])
@pytest.mark.parametrize(
"dtype",
[
"int",
pytest.param(
"int64[pyarrow]",
marks=pytest.mark.skipif(
not PANDAS_GE_150, reason="pyarrow support only in >=1.5"
),
),
],
)
def test_from_pandas(dtype):
df = pd.DataFrame({"x": [1, 2, 3]}, index=[4.0, 5.0, 6.0], dtype=dtype)
gdf = cudf.DataFrame.from_pandas(df)
assert isinstance(gdf, cudf.DataFrame)

assert_eq(df, gdf)
assert_eq(df, gdf, check_dtype="pyarrow" not in dtype)

s = df.x
gs = cudf.Series.from_pandas(s)
assert isinstance(gs, cudf.Series)

assert_eq(s, gs)
assert_eq(s, gs, check_dtype="pyarrow" not in dtype)


@pytest.mark.parametrize("dtypes", [int, float])
Expand Down

0 comments on commit 02be87b

Please sign in to comment.