diff --git a/python/cudf/benchmarks/API/bench_dataframe.py b/python/cudf/benchmarks/API/bench_dataframe.py index 59d73015962..ba243eb6a7c 100644 --- a/python/cudf/benchmarks/API/bench_dataframe.py +++ b/python/cudf/benchmarks/API/bench_dataframe.py @@ -4,6 +4,7 @@ import string +import numba.cuda import numpy import pytest import pytest_cases @@ -16,6 +17,12 @@ def bench_construction(benchmark, N): benchmark(cudf.DataFrame, {None: cupy.random.rand(N)}) +@pytest.mark.parametrize("N", [100, 100_000]) +@pytest.mark.pandas_incompatible +def bench_construction_numba_device_array(benchmark, N): + benchmark(cudf.DataFrame, numba.cuda.to_device(numpy.ones((100, N)))) + + @benchmark_with_object(cls="dataframe", dtype="float", cols=6) @pytest.mark.parametrize( "expr", ["a+b", "a+b+c+d+e", "a / (sin(a) + cos(b)) * tanh(d*e*f)"] diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 32e6aade65b..7e0d8ced595 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1843,8 +1843,7 @@ def as_column( else: mask = None - arbitrary = cupy.asarray(arbitrary) - arbitrary = cupy.ascontiguousarray(arbitrary) + arbitrary = cupy.asarray(arbitrary, order="C") data = as_buffer(arbitrary, exposed=cudf.get_option("copy_on_write")) col = build_column(data, dtype=arbitrary.dtype, mask=mask) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 1d7136e61e3..dca0c0b821a 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -782,7 +782,6 @@ def __init__( ) elif hasattr(data, "__cuda_array_interface__"): arr_interface = data.__cuda_array_interface__ - # descr is an optional field of the _cuda_ary_iface_ if "descr" in arr_interface: if len(arr_interface["descr"]) == 1: @@ -5835,17 +5834,18 @@ def from_records( @_performance_tracking def _from_arrays( cls, - data: np.ndarray | cupy.ndarray, + data, index=None, columns=None, nan_as_null=False, ): - """Convert a numpy/cupy array to DataFrame. + """ + Convert an object implementing an array interface to DataFrame. Parameters ---------- - data : numpy/cupy array of ndim 1 or 2, - dimensions greater than 2 are not supported yet. + data : object of ndim 1 or 2, + Object implementing ``__array_interface__`` or ``__cuda_array_interface__`` index : Index or array-like Index to use for resulting frame. Will default to RangeIndex if no indexing information part of input data and @@ -5857,13 +5857,23 @@ def _from_arrays( ------- DataFrame """ - if data.ndim != 1 and data.ndim != 2: + array_data: np.ndarray | cupy.ndarray + if hasattr(data, "__cuda_array_interface__"): + array_data = cupy.asarray(data, order="F") + elif hasattr(data, "__array_interface__"): + array_data = np.asarray(data, order="F") + else: raise ValueError( - f"records dimension expected 1 or 2 but found: {data.ndim}" + "data must be an object implementing __cuda_array_interface__ or __array_interface__" + ) + + if array_data.ndim not in {1, 2}: + raise ValueError( + f"records dimension expected 1 or 2 but found: {array_data.ndim}" ) if data.ndim == 2: - num_cols = data.shape[1] + num_cols = array_data.shape[1] else: # Since we validate ndim to be either 1 or 2 above, # this case can be assumed to be ndim == 1. @@ -5881,14 +5891,14 @@ def _from_arrays( raise ValueError("Duplicate column names are not allowed") names = columns - if data.ndim == 2: + if array_data.ndim == 2: ca_data = { - k: column.as_column(data[:, i], nan_as_null=nan_as_null) + k: column.as_column(array_data[:, i], nan_as_null=nan_as_null) for i, k in enumerate(names) } - elif data.ndim == 1: + elif array_data.ndim == 1: ca_data = { - names[0]: column.as_column(data, nan_as_null=nan_as_null) + names[0]: column.as_column(array_data, nan_as_null=nan_as_null) } if index is not None: