Skip to content

Commit

Permalink
Handle some zero-sized corner cases in dlpack interop (NVIDIA#11449)
Browse files Browse the repository at this point in the history
Previously, we did not explicitly check for zero-sized inputs in to/from_dlpack.
cupy>=11 changes behaviour in how it provides information for zero-sized arrays
(strides of empty tensors were previously reported as one, but now are zero).
This broke some of the validity checking in `from_dlpack`. To handle such cases,
check the shape of the incoming tensor as well as strides.

Authors:
  - Lawrence Mitchell (https://github.com/wence-)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Benjamin Zaitlen (https://github.com/quasiben)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)
  - Bradley Dice (https://github.com/bdice)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: rapidsai/cudf#11449
  • Loading branch information
wence- authored Sep 6, 2022
1 parent ba4f715 commit bc7109e
Show file tree
Hide file tree
Showing 11 changed files with 98 additions and 36 deletions.
2 changes: 1 addition & 1 deletion conda/environments/cudf_dev_cuda11.5.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ dependencies:
- cxx-compiler
- clang=11.1.0
- clang-tools=11.1.0
- cupy>=9.5.0,<11.0.0a0
- cupy>=9.5.0,<12.0.0a0
- rmm=22.10.*
- cmake>=3.20.1,!=3.23.0
- cmake_setuptools>=0.1.3
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ requirements:
- python
- typing_extensions
- pandas >=1.0,<1.5.0dev0
- cupy >=9.5.0,<11.0.0a0
- cupy >=9.5.0,<12.0.0a0
- numba >=0.54
- numpy
- {{ pin_compatible('pyarrow', max_pin='x.x.x') }}
Expand Down
28 changes: 16 additions & 12 deletions cpp/src/interop/dlpack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,18 +152,22 @@ std::unique_ptr<table> from_dlpack(DLManagedTensor const* managed_tensor,

// We only support 1D and 2D tensors with some restrictions on layout
if (tensor.ndim == 1) {
// 1D tensors must have dense layout (strides == nullptr <=> dense row-major)
CUDF_EXPECTS(nullptr == tensor.strides || tensor.strides[0] == 1,
// 1D tensors must have dense layout (strides == nullptr <=> dense layout), or have shape (0,)
CUDF_EXPECTS(nullptr == tensor.strides || tensor.strides[0] == 1 || tensor.shape[0] == 0,
"from_dlpack of 1D DLTensor only for unit-stride data");
} else if (tensor.ndim == 2) {
// 2D tensors must have column-major layout and the fastest dimension must have dense layout
CUDF_EXPECTS((
// 1D tensor reshaped into (N, 1) is fine
tensor.shape[1] == 1 && (nullptr == tensor.strides || tensor.strides[0] == 1))
// General case
|| (nullptr != tensor.strides && tensor.strides[0] == 1 &&
tensor.strides[1] >= tensor.shape[0]),
"from_dlpack of 2D DLTensor only for column-major unit-stride data");
CUDF_EXPECTS(
// Empty array is fine. If ncols == 0 then we get an empty dataframe
// irrespective of nrows, which is slightly different behaviour from
// cudf.DataFrame(np.empty((3, 0))) because there's no way to communicate
// the index information out with a table view if no columns exist.
(tensor.shape[0] == 0 || tensor.shape[1] == 0)
// (N, 1) is fine as long as the 1D array has dense layout
|| (tensor.shape[1] == 1 && (nullptr == tensor.strides || tensor.strides[0] == 1))
// Column major is fine as long as the fastest dimension has dense layout
|| (nullptr != tensor.strides && tensor.strides[0] == 1 &&
tensor.strides[1] >= tensor.shape[0]),
"from_dlpack of 2D DLTensor only for column-major unit-stride data");
} else {
CUDF_FAIL("DLTensor must be 1D or 2D");
}
Expand Down Expand Up @@ -217,7 +221,7 @@ DLManagedTensor* to_dlpack(table_view const& input,
{
auto const num_rows = input.num_rows();
auto const num_cols = input.num_columns();
if (num_rows == 0) { return nullptr; }
if (num_rows == 0 && num_cols == 0) { return nullptr; }

// Ensure that type is convertible to DLDataType
data_type const type = input.column(0).type();
Expand Down Expand Up @@ -245,7 +249,7 @@ DLManagedTensor* to_dlpack(table_view const& input,
if (tensor.ndim > 1) {
tensor.shape[1] = num_cols;
tensor.strides = context->strides;
tensor.strides[0] = 1;
tensor.strides[0] = num_rows > 1 ? 1 : 0;
tensor.strides[1] = num_rows;
}

Expand Down
16 changes: 14 additions & 2 deletions cpp/tests/interop/dlpack_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class DLPackUntypedTests : public BaseFixture {
TEST_F(DLPackUntypedTests, EmptyTableToDlpack)
{
cudf::table_view empty(std::vector<cudf::column_view>{});
// No type information to construct a correct empty dlpack object
EXPECT_EQ(nullptr, cudf::to_dlpack(empty));
}

Expand All @@ -75,7 +76,18 @@ TEST_F(DLPackUntypedTests, EmptyColsToDlpack)
fixed_width_column_wrapper<int32_t> col1({});
fixed_width_column_wrapper<int32_t> col2({});
cudf::table_view input({col1, col2});
EXPECT_EQ(nullptr, cudf::to_dlpack(input));
unique_managed_tensor tensor(cudf::to_dlpack(input));
validate_dtype<int32_t>(tensor->dl_tensor.dtype);
EXPECT_NE(nullptr, tensor);
EXPECT_EQ(nullptr, tensor->dl_tensor.data);
EXPECT_EQ(2, tensor->dl_tensor.ndim);
EXPECT_EQ(0, tensor->dl_tensor.strides[0]);
EXPECT_EQ(0, tensor->dl_tensor.strides[1]);
EXPECT_EQ(0, tensor->dl_tensor.shape[0]);
EXPECT_EQ(2, tensor->dl_tensor.shape[1]);
EXPECT_EQ(kDLCUDA, tensor->dl_tensor.device.device_type);
auto result = cudf::from_dlpack(tensor.get());
CUDF_TEST_EXPECT_TABLES_EQUAL(input, result->view());
}

TEST_F(DLPackUntypedTests, NullTensorFromDlpack)
Expand Down Expand Up @@ -481,6 +493,6 @@ TYPED_TEST(DLPackNumericTests, FromDlpackEmpty1D)
cudf::table_view input(std::vector<cudf::column_view>{});
unique_managed_tensor tensor(cudf::to_dlpack(input));

// Verify that from_dlpack(to_dlpack(input)) == input
EXPECT_EQ(nullptr, tensor.get());
EXPECT_THROW(cudf::from_dlpack(tensor.get()), cudf::logic_error);
}
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -1131,7 +1131,7 @@ def values(self):
[4, 2],
[5, 1]])
>>> type(midx.values)
<class 'cupy._core.core.ndarray'>
<class 'cupy...ndarray'>
"""
return self.to_frame(index=False).values

Expand Down
3 changes: 0 additions & 3 deletions python/cudf/cudf/io/dlpack.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,6 @@ def to_dlpack(cudf_obj):
cuDF to_dlpack() produces column-major (Fortran order) output. If the
output tensor needs to be row major, transpose the output of this function.
"""
if len(cudf_obj) == 0:
raise ValueError("Cannot create DLPack tensor of 0 size")

if isinstance(cudf_obj, (cudf.DataFrame, cudf.Series, cudf.BaseIndex)):
gdf = cudf_obj
elif isinstance(cudf_obj, ColumnBase):
Expand Down
9 changes: 6 additions & 3 deletions python/cudf/cudf/tests/test_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@ def test_buffer_from_cuda_iface_contiguous(data):
def test_buffer_from_cuda_iface_dtype(data, dtype):
data = data.astype(dtype)
buf = as_device_buffer_like(data)
ary = cp.array(buf).flatten().view("uint8")
assert (ary == buf).all()
got = cp.array(buf).reshape(-1).view("uint8")
expect = data.reshape(-1).view("uint8")
assert (expect == got).all()


@pytest.mark.parametrize("creator", [Buffer, as_device_buffer_like])
Expand Down Expand Up @@ -83,7 +84,9 @@ def test_buffer_repr(size, expect):
def test_buffer_slice(idx):
ary = cp.arange(arr_len, dtype="uint8")
buf = as_device_buffer_like(ary)
assert (ary[idx] == buf[idx]).all()
expect = ary[idx]
got = cp.array(buf[idx])
assert (expect == got).all()


@pytest.mark.parametrize(
Expand Down
23 changes: 21 additions & 2 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import pyarrow as pa
import pytest
from numba import cuda
from packaging import version

import cudf
from cudf.core._compat import (
Expand Down Expand Up @@ -2021,8 +2022,26 @@ def gdf(pdf):
"y": [np.nan, np.nan, np.nan],
"z": [np.nan, np.nan, np.nan],
},
{"x": [], "y": [], "z": []},
{"x": []},
pytest.param(
{"x": [], "y": [], "z": []},
marks=pytest.mark.xfail(
condition=version.parse("11")
<= version.parse(cupy.__version__)
< version.parse("11.1"),
reason="Zero-sized array passed to cupy reduction, "
"https://github.com/cupy/cupy/issues/6937",
),
),
pytest.param(
{"x": []},
marks=pytest.mark.xfail(
condition=version.parse("11")
<= version.parse(cupy.__version__)
< version.parse("11.1"),
reason="Zero-sized array passed to cupy reduction, "
"https://github.com/cupy/cupy/issues/6937",
),
),
],
)
@pytest.mark.parametrize("axis", [0, 1])
Expand Down
45 changes: 36 additions & 9 deletions python/cudf/cudf/tests/test_dlpack.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# Copyright (c) 2019, NVIDIA CORPORATION.
# Copyright (c) 2019-2022, NVIDIA CORPORATION.

import itertools
from contextlib import ExitStack as does_not_raise

import cupy
import numpy as np
import pytest
from packaging import version

import cudf
from cudf.testing._utils import assert_eq
Expand All @@ -19,14 +20,21 @@
params_2d = itertools.product(ncols, nelems, dtype, nulls)


if version.parse(cupy.__version__) < version.parse("10"):
# fromDlpack deprecated in cupy version 10, replaced by from_dlpack
cupy_from_dlpack = cupy.fromDlpack
else:
cupy_from_dlpack = cupy.from_dlpack


def data_size_expectation_builder(data, nan_null_param=False):
if nan_null_param and np.isnan(data).any():
return pytest.raises((ValueError,))

if data.size > 0:
return does_not_raise()
else:
if len(data.shape) == 2 and data.size == 0:
return pytest.raises((ValueError, IndexError))
else:
return does_not_raise()


@pytest.fixture(params=params_1d)
Expand Down Expand Up @@ -107,7 +115,7 @@ def test_to_dlpack_cupy_1d(data_1d):
cudf_host_array = gs.to_numpy(na_value=np.nan)
dlt = gs.to_dlpack()

cupy_array = cupy.fromDlpack(dlt)
cupy_array = cupy_from_dlpack(dlt)
cupy_host_array = cupy_array.get()

assert_eq(cudf_host_array, cupy_host_array)
Expand All @@ -121,7 +129,7 @@ def test_to_dlpack_cupy_2d(data_2d):
cudf_host_array = np.array(gdf.to_pandas()).flatten()
dlt = gdf.to_dlpack()

cupy_array = cupy.fromDlpack(dlt)
cupy_array = cupy_from_dlpack(dlt)
cupy_host_array = cupy_array.get().flatten()

assert_eq(cudf_host_array, cupy_host_array)
Expand Down Expand Up @@ -157,7 +165,7 @@ def test_to_dlpack_cupy_2d_null(data_2d):
cudf_host_array = np.array(gdf.to_pandas()).flatten()
dlt = gdf.to_dlpack()

cupy_array = cupy.fromDlpack(dlt)
cupy_array = cupy_from_dlpack(dlt)
cupy_host_array = cupy_array.get().flatten()

assert_eq(cudf_host_array, cupy_host_array)
Expand All @@ -171,7 +179,7 @@ def test_to_dlpack_cupy_1d_null(data_1d):
cudf_host_array = gs.to_numpy(na_value=np.nan)
dlt = gs.to_dlpack()

cupy_array = cupy.fromDlpack(dlt)
cupy_array = cupy_from_dlpack(dlt)
cupy_host_array = cupy_array.get()

assert_eq(cudf_host_array, cupy_host_array)
Expand All @@ -183,7 +191,26 @@ def test_to_dlpack_mixed_dtypes():
cudf_host_array = df.to_numpy()
dlt = df.to_dlpack()

cupy_array = cupy.fromDlpack(dlt)
cupy_array = cupy_from_dlpack(dlt)
cupy_host_array = cupy_array.get()

assert_eq(cudf_host_array, cupy_host_array)


@pytest.mark.parametrize(
"shape",
[
(0, 3),
pytest.param(
(3, 0),
marks=pytest.mark.xfail(
reason="Index information not available via from_dlpack"
),
),
(0, 0),
],
)
def test_from_dlpack_zero_sizes(shape):
arr = cupy.empty(shape, dtype=float)
df = cudf.io.dlpack.from_dlpack(arr.__dlpack__())
assert_eq(df, cudf.DataFrame(arr))
2 changes: 1 addition & 1 deletion python/cudf/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""):
install_requires.append(
"cupy-cuda"
+ get_cuda_version_from_header(cuda_include_dir)
+ ">=9.5.0,<11.0.0a0"
+ ">=9.5.0,<12.0.0a0"
)


Expand Down
2 changes: 1 addition & 1 deletion python/dask_cudf/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""):
install_requires.append(
"cupy-cuda"
+ get_cuda_version_from_header(cuda_include_dir)
+ ">=9.5.0,<11.0.0a0"
+ ">=9.5.0,<12.0.0a0"
)


Expand Down

0 comments on commit bc7109e

Please sign in to comment.