Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle some zero-sized corner cases in dlpack interop #11449

Merged
merged 22 commits into from
Sep 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
305ca9b
Remove use of deprecated cupy.fromDlPack
wence- Aug 2, 2022
d71e561
dlpack: Handle corner case of size zero arrays
wence- Aug 1, 2022
1fb41f2
Remove unnecessary comment
wence- Aug 18, 2022
77ce0bb
dlpack: Report zero strides for empty 2D tensors
wence- Aug 18, 2022
e085b3a
DO NOT MERGE: allow cupy>=11
wence- Aug 18, 2022
bab34c8
DO NOT MERGE: test cupy 11 in build environment
wence- Aug 19, 2022
52a6106
Update meta.yaml
galipremsagar Aug 19, 2022
cbe1b48
Update setup.py
galipremsagar Aug 19, 2022
e63870a
Update setup.py
galipremsagar Aug 19, 2022
933548d
Update conda/environments/cudf_dev_cuda11.5.yml
galipremsagar Aug 19, 2022
6c51839
Update build.sh
galipremsagar Aug 19, 2022
8034704
Update build.sh
galipremsagar Aug 19, 2022
1648140
dlpack/test: Zero-sized tensors have strides = 0
wence- Aug 22, 2022
74a0b6a
Merge remote-tracking branch 'origin/branch-22.10' into wence/fix/dlp…
wence- Aug 22, 2022
2200b3a
Update buffer interop tests for cupy 11
wence- Aug 22, 2022
ec60ae9
xfail empty dataframe reduction on cupy 11.0
wence- Aug 23, 2022
5c0d698
update multiindex cupy docstring for cupy11
quasiben Aug 23, 2022
58152a5
Revert changes to CI build scripts
wence- Aug 25, 2022
4568162
Handle both cupy 10 and 11 in multiindex doctest
wence- Aug 26, 2022
0879cf2
Backwards-compat for cupy<10 in tests of dlpack interop
wence- Sep 1, 2022
48937ba
rename from_dlpack alias to cupy_from_dlpack
wence- Sep 1, 2022
e24dc97
Merge remote-tracking branch 'origin/branch-22.10' into wence/fix/dlp…
wence- Sep 1, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion conda/environments/cudf_dev_cuda11.5.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ dependencies:
- cxx-compiler
- clang=11.1.0
- clang-tools=11.1.0
- cupy>=9.5.0,<11.0.0a0
- cupy>=9.5.0,<12.0.0a0
- rmm=22.10.*
- cmake>=3.20.1,!=3.23.0
- cmake_setuptools>=0.1.3
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ requirements:
- python
- typing_extensions
- pandas >=1.0,<1.5.0dev0
- cupy >=9.5.0,<11.0.0a0
- cupy >=9.5.0,<12.0.0a0
- numba >=0.54
- numpy
- {{ pin_compatible('pyarrow', max_pin='x.x.x') }}
Expand Down
28 changes: 16 additions & 12 deletions cpp/src/interop/dlpack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,18 +152,22 @@ std::unique_ptr<table> from_dlpack(DLManagedTensor const* managed_tensor,

// We only support 1D and 2D tensors with some restrictions on layout
if (tensor.ndim == 1) {
// 1D tensors must have dense layout (strides == nullptr <=> dense row-major)
CUDF_EXPECTS(nullptr == tensor.strides || tensor.strides[0] == 1,
// 1D tensors must have dense layout (strides == nullptr <=> dense layout), or have shape (0,)
CUDF_EXPECTS(nullptr == tensor.strides || tensor.strides[0] == 1 || tensor.shape[0] == 0,
"from_dlpack of 1D DLTensor only for unit-stride data");
} else if (tensor.ndim == 2) {
// 2D tensors must have column-major layout and the fastest dimension must have dense layout
CUDF_EXPECTS((
// 1D tensor reshaped into (N, 1) is fine
tensor.shape[1] == 1 && (nullptr == tensor.strides || tensor.strides[0] == 1))
// General case
|| (nullptr != tensor.strides && tensor.strides[0] == 1 &&
tensor.strides[1] >= tensor.shape[0]),
"from_dlpack of 2D DLTensor only for column-major unit-stride data");
CUDF_EXPECTS(
// Empty array is fine. If ncols == 0 then we get an empty dataframe
// irrespective of nrows, which is slightly different behaviour from
// cudf.DataFrame(np.empty((3, 0))) because there's no way to communicate
// the index information out with a table view if no columns exist.
(tensor.shape[0] == 0 || tensor.shape[1] == 0)
wence- marked this conversation as resolved.
Show resolved Hide resolved
// (N, 1) is fine as long as the 1D array has dense layout
|| (tensor.shape[1] == 1 && (nullptr == tensor.strides || tensor.strides[0] == 1))
// Column major is fine as long as the fastest dimension has dense layout
|| (nullptr != tensor.strides && tensor.strides[0] == 1 &&
tensor.strides[1] >= tensor.shape[0]),
"from_dlpack of 2D DLTensor only for column-major unit-stride data");
} else {
CUDF_FAIL("DLTensor must be 1D or 2D");
}
Expand Down Expand Up @@ -217,7 +221,7 @@ DLManagedTensor* to_dlpack(table_view const& input,
{
auto const num_rows = input.num_rows();
auto const num_cols = input.num_columns();
if (num_rows == 0) { return nullptr; }
if (num_rows == 0 && num_cols == 0) { return nullptr; }
wence- marked this conversation as resolved.
Show resolved Hide resolved

// Ensure that type is convertible to DLDataType
data_type const type = input.column(0).type();
Expand Down Expand Up @@ -245,7 +249,7 @@ DLManagedTensor* to_dlpack(table_view const& input,
if (tensor.ndim > 1) {
tensor.shape[1] = num_cols;
tensor.strides = context->strides;
tensor.strides[0] = 1;
tensor.strides[0] = num_rows > 1 ? 1 : 0;
tensor.strides[1] = num_rows;
}

Expand Down
16 changes: 14 additions & 2 deletions cpp/tests/interop/dlpack_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class DLPackUntypedTests : public BaseFixture {
TEST_F(DLPackUntypedTests, EmptyTableToDlpack)
{
cudf::table_view empty(std::vector<cudf::column_view>{});
// No type information to construct a correct empty dlpack object
EXPECT_EQ(nullptr, cudf::to_dlpack(empty));
}

Expand All @@ -75,7 +76,18 @@ TEST_F(DLPackUntypedTests, EmptyColsToDlpack)
fixed_width_column_wrapper<int32_t> col1({});
fixed_width_column_wrapper<int32_t> col2({});
cudf::table_view input({col1, col2});
wence- marked this conversation as resolved.
Show resolved Hide resolved
EXPECT_EQ(nullptr, cudf::to_dlpack(input));
unique_managed_tensor tensor(cudf::to_dlpack(input));
validate_dtype<int32_t>(tensor->dl_tensor.dtype);
EXPECT_NE(nullptr, tensor);
EXPECT_EQ(nullptr, tensor->dl_tensor.data);
EXPECT_EQ(2, tensor->dl_tensor.ndim);
EXPECT_EQ(0, tensor->dl_tensor.strides[0]);
EXPECT_EQ(0, tensor->dl_tensor.strides[1]);
EXPECT_EQ(0, tensor->dl_tensor.shape[0]);
EXPECT_EQ(2, tensor->dl_tensor.shape[1]);
EXPECT_EQ(kDLCUDA, tensor->dl_tensor.device.device_type);
auto result = cudf::from_dlpack(tensor.get());
CUDF_TEST_EXPECT_TABLES_EQUAL(input, result->view());
}

TEST_F(DLPackUntypedTests, NullTensorFromDlpack)
Expand Down Expand Up @@ -481,6 +493,6 @@ TYPED_TEST(DLPackNumericTests, FromDlpackEmpty1D)
cudf::table_view input(std::vector<cudf::column_view>{});
unique_managed_tensor tensor(cudf::to_dlpack(input));

// Verify that from_dlpack(to_dlpack(input)) == input
EXPECT_EQ(nullptr, tensor.get());
wence- marked this conversation as resolved.
Show resolved Hide resolved
EXPECT_THROW(cudf::from_dlpack(tensor.get()), cudf::logic_error);
}
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -1131,7 +1131,7 @@ def values(self):
[4, 2],
[5, 1]])
>>> type(midx.values)
<class 'cupy._core.core.ndarray'>
<class 'cupy...ndarray'>
"""
return self.to_frame(index=False).values

Expand Down
3 changes: 0 additions & 3 deletions python/cudf/cudf/io/dlpack.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,6 @@ def to_dlpack(cudf_obj):
cuDF to_dlpack() produces column-major (Fortran order) output. If the
output tensor needs to be row major, transpose the output of this function.
"""
if len(cudf_obj) == 0:
raise ValueError("Cannot create DLPack tensor of 0 size")

wence- marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(cudf_obj, (cudf.DataFrame, cudf.Series, cudf.BaseIndex)):
gdf = cudf_obj
elif isinstance(cudf_obj, ColumnBase):
Expand Down
9 changes: 6 additions & 3 deletions python/cudf/cudf/tests/test_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@ def test_buffer_from_cuda_iface_contiguous(data):
def test_buffer_from_cuda_iface_dtype(data, dtype):
data = data.astype(dtype)
buf = as_device_buffer_like(data)
ary = cp.array(buf).flatten().view("uint8")
assert (ary == buf).all()
got = cp.array(buf).reshape(-1).view("uint8")
expect = data.reshape(-1).view("uint8")
assert (expect == got).all()


@pytest.mark.parametrize("creator", [Buffer, as_device_buffer_like])
Expand Down Expand Up @@ -83,7 +84,9 @@ def test_buffer_repr(size, expect):
def test_buffer_slice(idx):
ary = cp.arange(arr_len, dtype="uint8")
buf = as_device_buffer_like(ary)
assert (ary[idx] == buf[idx]).all()
expect = ary[idx]
got = cp.array(buf[idx])
assert (expect == got).all()


@pytest.mark.parametrize(
Expand Down
23 changes: 21 additions & 2 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import pyarrow as pa
import pytest
from numba import cuda
from packaging import version

import cudf
from cudf.core._compat import (
Expand Down Expand Up @@ -2021,8 +2022,26 @@ def gdf(pdf):
"y": [np.nan, np.nan, np.nan],
"z": [np.nan, np.nan, np.nan],
},
{"x": [], "y": [], "z": []},
{"x": []},
pytest.param(
{"x": [], "y": [], "z": []},
marks=pytest.mark.xfail(
condition=version.parse("11")
<= version.parse(cupy.__version__)
< version.parse("11.1"),
reason="Zero-sized array passed to cupy reduction, "
"https://github.com/cupy/cupy/issues/6937",
),
),
pytest.param(
{"x": []},
marks=pytest.mark.xfail(
condition=version.parse("11")
<= version.parse(cupy.__version__)
< version.parse("11.1"),
reason="Zero-sized array passed to cupy reduction, "
"https://github.com/cupy/cupy/issues/6937",
),
),
],
)
@pytest.mark.parametrize("axis", [0, 1])
Expand Down
45 changes: 36 additions & 9 deletions python/cudf/cudf/tests/test_dlpack.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# Copyright (c) 2019, NVIDIA CORPORATION.
# Copyright (c) 2019-2022, NVIDIA CORPORATION.

import itertools
from contextlib import ExitStack as does_not_raise

import cupy
import numpy as np
import pytest
from packaging import version

import cudf
from cudf.testing._utils import assert_eq
Expand All @@ -19,14 +20,21 @@
params_2d = itertools.product(ncols, nelems, dtype, nulls)


if version.parse(cupy.__version__) < version.parse("10"):
# fromDlpack deprecated in cupy version 10, replaced by from_dlpack
cupy_from_dlpack = cupy.fromDlpack
else:
cupy_from_dlpack = cupy.from_dlpack


def data_size_expectation_builder(data, nan_null_param=False):
if nan_null_param and np.isnan(data).any():
return pytest.raises((ValueError,))

if data.size > 0:
return does_not_raise()
else:
if len(data.shape) == 2 and data.size == 0:
return pytest.raises((ValueError, IndexError))
else:
return does_not_raise()


@pytest.fixture(params=params_1d)
Expand Down Expand Up @@ -107,7 +115,7 @@ def test_to_dlpack_cupy_1d(data_1d):
cudf_host_array = gs.to_numpy(na_value=np.nan)
dlt = gs.to_dlpack()

cupy_array = cupy.fromDlpack(dlt)
cupy_array = cupy_from_dlpack(dlt)
cupy_host_array = cupy_array.get()

assert_eq(cudf_host_array, cupy_host_array)
Expand All @@ -121,7 +129,7 @@ def test_to_dlpack_cupy_2d(data_2d):
cudf_host_array = np.array(gdf.to_pandas()).flatten()
dlt = gdf.to_dlpack()

cupy_array = cupy.fromDlpack(dlt)
cupy_array = cupy_from_dlpack(dlt)
cupy_host_array = cupy_array.get().flatten()

assert_eq(cudf_host_array, cupy_host_array)
Expand Down Expand Up @@ -157,7 +165,7 @@ def test_to_dlpack_cupy_2d_null(data_2d):
cudf_host_array = np.array(gdf.to_pandas()).flatten()
dlt = gdf.to_dlpack()

cupy_array = cupy.fromDlpack(dlt)
cupy_array = cupy_from_dlpack(dlt)
cupy_host_array = cupy_array.get().flatten()

assert_eq(cudf_host_array, cupy_host_array)
Expand All @@ -171,7 +179,7 @@ def test_to_dlpack_cupy_1d_null(data_1d):
cudf_host_array = gs.to_numpy(na_value=np.nan)
dlt = gs.to_dlpack()

cupy_array = cupy.fromDlpack(dlt)
cupy_array = cupy_from_dlpack(dlt)
cupy_host_array = cupy_array.get()

assert_eq(cudf_host_array, cupy_host_array)
Expand All @@ -183,7 +191,26 @@ def test_to_dlpack_mixed_dtypes():
cudf_host_array = df.to_numpy()
dlt = df.to_dlpack()

cupy_array = cupy.fromDlpack(dlt)
cupy_array = cupy_from_dlpack(dlt)
cupy_host_array = cupy_array.get()

assert_eq(cudf_host_array, cupy_host_array)


@pytest.mark.parametrize(
"shape",
[
(0, 3),
pytest.param(
(3, 0),
marks=pytest.mark.xfail(
reason="Index information not available via from_dlpack"
),
),
(0, 0),
],
)
def test_from_dlpack_zero_sizes(shape):
arr = cupy.empty(shape, dtype=float)
df = cudf.io.dlpack.from_dlpack(arr.__dlpack__())
assert_eq(df, cudf.DataFrame(arr))
2 changes: 1 addition & 1 deletion python/cudf/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""):
install_requires.append(
"cupy-cuda"
+ get_cuda_version_from_header(cuda_include_dir)
+ ">=9.5.0,<11.0.0a0"
+ ">=9.5.0,<12.0.0a0"
)


Expand Down
2 changes: 1 addition & 1 deletion python/dask_cudf/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""):
install_requires.append(
"cupy-cuda"
+ get_cuda_version_from_header(cuda_include_dir)
+ ">=9.5.0,<11.0.0a0"
+ ">=9.5.0,<12.0.0a0"
)


Expand Down