diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml index bdc853f8a974..a697194edba9 100644 --- a/conda/environments/cudf_dev_cuda11.5.yml +++ b/conda/environments/cudf_dev_cuda11.5.yml @@ -12,7 +12,7 @@ dependencies: - cxx-compiler - clang=11.1.0 - clang-tools=11.1.0 - - cupy>=9.5.0,<11.0.0a0 + - cupy>=9.5.0,<12.0.0a0 - rmm=22.10.* - cmake>=3.20.1,!=3.23.0 - cmake_setuptools>=0.1.3 diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index 6a7554b99aa8..9e62c4fa6116 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -49,7 +49,7 @@ requirements: - python - typing_extensions - pandas >=1.0,<1.5.0dev0 - - cupy >=9.5.0,<11.0.0a0 + - cupy >=9.5.0,<12.0.0a0 - numba >=0.54 - numpy - {{ pin_compatible('pyarrow', max_pin='x.x.x') }} diff --git a/cpp/src/interop/dlpack.cpp b/cpp/src/interop/dlpack.cpp index 256d7bd17953..7b300924dd57 100644 --- a/cpp/src/interop/dlpack.cpp +++ b/cpp/src/interop/dlpack.cpp @@ -152,18 +152,22 @@ std::unique_ptr from_dlpack(DLManagedTensor const* managed_tensor, // We only support 1D and 2D tensors with some restrictions on layout if (tensor.ndim == 1) { - // 1D tensors must have dense layout (strides == nullptr <=> dense row-major) - CUDF_EXPECTS(nullptr == tensor.strides || tensor.strides[0] == 1, + // 1D tensors must have dense layout (strides == nullptr <=> dense layout), or have shape (0,) + CUDF_EXPECTS(nullptr == tensor.strides || tensor.strides[0] == 1 || tensor.shape[0] == 0, "from_dlpack of 1D DLTensor only for unit-stride data"); } else if (tensor.ndim == 2) { - // 2D tensors must have column-major layout and the fastest dimension must have dense layout - CUDF_EXPECTS(( - // 1D tensor reshaped into (N, 1) is fine - tensor.shape[1] == 1 && (nullptr == tensor.strides || tensor.strides[0] == 1)) - // General case - || (nullptr != tensor.strides && tensor.strides[0] == 1 && - tensor.strides[1] >= tensor.shape[0]), - "from_dlpack of 2D DLTensor only for column-major unit-stride data"); + CUDF_EXPECTS( + // Empty array is fine. If ncols == 0 then we get an empty dataframe + // irrespective of nrows, which is slightly different behaviour from + // cudf.DataFrame(np.empty((3, 0))) because there's no way to communicate + // the index information out with a table view if no columns exist. + (tensor.shape[0] == 0 || tensor.shape[1] == 0) + // (N, 1) is fine as long as the 1D array has dense layout + || (tensor.shape[1] == 1 && (nullptr == tensor.strides || tensor.strides[0] == 1)) + // Column major is fine as long as the fastest dimension has dense layout + || (nullptr != tensor.strides && tensor.strides[0] == 1 && + tensor.strides[1] >= tensor.shape[0]), + "from_dlpack of 2D DLTensor only for column-major unit-stride data"); } else { CUDF_FAIL("DLTensor must be 1D or 2D"); } @@ -217,7 +221,7 @@ DLManagedTensor* to_dlpack(table_view const& input, { auto const num_rows = input.num_rows(); auto const num_cols = input.num_columns(); - if (num_rows == 0) { return nullptr; } + if (num_rows == 0 && num_cols == 0) { return nullptr; } // Ensure that type is convertible to DLDataType data_type const type = input.column(0).type(); @@ -245,7 +249,7 @@ DLManagedTensor* to_dlpack(table_view const& input, if (tensor.ndim > 1) { tensor.shape[1] = num_cols; tensor.strides = context->strides; - tensor.strides[0] = 1; + tensor.strides[0] = num_rows > 1 ? 1 : 0; tensor.strides[1] = num_rows; } diff --git a/cpp/tests/interop/dlpack_test.cpp b/cpp/tests/interop/dlpack_test.cpp index a722f66951f3..da9f80cf3d7a 100644 --- a/cpp/tests/interop/dlpack_test.cpp +++ b/cpp/tests/interop/dlpack_test.cpp @@ -67,6 +67,7 @@ class DLPackUntypedTests : public BaseFixture { TEST_F(DLPackUntypedTests, EmptyTableToDlpack) { cudf::table_view empty(std::vector{}); + // No type information to construct a correct empty dlpack object EXPECT_EQ(nullptr, cudf::to_dlpack(empty)); } @@ -75,7 +76,18 @@ TEST_F(DLPackUntypedTests, EmptyColsToDlpack) fixed_width_column_wrapper col1({}); fixed_width_column_wrapper col2({}); cudf::table_view input({col1, col2}); - EXPECT_EQ(nullptr, cudf::to_dlpack(input)); + unique_managed_tensor tensor(cudf::to_dlpack(input)); + validate_dtype(tensor->dl_tensor.dtype); + EXPECT_NE(nullptr, tensor); + EXPECT_EQ(nullptr, tensor->dl_tensor.data); + EXPECT_EQ(2, tensor->dl_tensor.ndim); + EXPECT_EQ(0, tensor->dl_tensor.strides[0]); + EXPECT_EQ(0, tensor->dl_tensor.strides[1]); + EXPECT_EQ(0, tensor->dl_tensor.shape[0]); + EXPECT_EQ(2, tensor->dl_tensor.shape[1]); + EXPECT_EQ(kDLCUDA, tensor->dl_tensor.device.device_type); + auto result = cudf::from_dlpack(tensor.get()); + CUDF_TEST_EXPECT_TABLES_EQUAL(input, result->view()); } TEST_F(DLPackUntypedTests, NullTensorFromDlpack) @@ -481,6 +493,6 @@ TYPED_TEST(DLPackNumericTests, FromDlpackEmpty1D) cudf::table_view input(std::vector{}); unique_managed_tensor tensor(cudf::to_dlpack(input)); - // Verify that from_dlpack(to_dlpack(input)) == input + EXPECT_EQ(nullptr, tensor.get()); EXPECT_THROW(cudf::from_dlpack(tensor.get()), cudf::logic_error); } diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 10b220b3552c..3fb4238a8b62 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -1131,7 +1131,7 @@ def values(self): [4, 2], [5, 1]]) >>> type(midx.values) - + """ return self.to_frame(index=False).values diff --git a/python/cudf/cudf/io/dlpack.py b/python/cudf/cudf/io/dlpack.py index 644643db83c3..e1950c9f2503 100644 --- a/python/cudf/cudf/io/dlpack.py +++ b/python/cudf/cudf/io/dlpack.py @@ -68,9 +68,6 @@ def to_dlpack(cudf_obj): cuDF to_dlpack() produces column-major (Fortran order) output. If the output tensor needs to be row major, transpose the output of this function. """ - if len(cudf_obj) == 0: - raise ValueError("Cannot create DLPack tensor of 0 size") - if isinstance(cudf_obj, (cudf.DataFrame, cudf.Series, cudf.BaseIndex)): gdf = cudf_obj elif isinstance(cudf_obj, ColumnBase): diff --git a/python/cudf/cudf/tests/test_buffer.py b/python/cudf/cudf/tests/test_buffer.py index 16ba18581ed3..eaa615a28391 100644 --- a/python/cudf/cudf/tests/test_buffer.py +++ b/python/cudf/cudf/tests/test_buffer.py @@ -42,8 +42,9 @@ def test_buffer_from_cuda_iface_contiguous(data): def test_buffer_from_cuda_iface_dtype(data, dtype): data = data.astype(dtype) buf = as_device_buffer_like(data) - ary = cp.array(buf).flatten().view("uint8") - assert (ary == buf).all() + got = cp.array(buf).reshape(-1).view("uint8") + expect = data.reshape(-1).view("uint8") + assert (expect == got).all() @pytest.mark.parametrize("creator", [Buffer, as_device_buffer_like]) @@ -83,7 +84,9 @@ def test_buffer_repr(size, expect): def test_buffer_slice(idx): ary = cp.arange(arr_len, dtype="uint8") buf = as_device_buffer_like(ary) - assert (ary[idx] == buf[idx]).all() + expect = ary[idx] + got = cp.array(buf[idx]) + assert (expect == got).all() @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 3bea55875711..af719958c1af 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -18,6 +18,7 @@ import pyarrow as pa import pytest from numba import cuda +from packaging import version import cudf from cudf.core._compat import ( @@ -2021,8 +2022,26 @@ def gdf(pdf): "y": [np.nan, np.nan, np.nan], "z": [np.nan, np.nan, np.nan], }, - {"x": [], "y": [], "z": []}, - {"x": []}, + pytest.param( + {"x": [], "y": [], "z": []}, + marks=pytest.mark.xfail( + condition=version.parse("11") + <= version.parse(cupy.__version__) + < version.parse("11.1"), + reason="Zero-sized array passed to cupy reduction, " + "https://github.com/cupy/cupy/issues/6937", + ), + ), + pytest.param( + {"x": []}, + marks=pytest.mark.xfail( + condition=version.parse("11") + <= version.parse(cupy.__version__) + < version.parse("11.1"), + reason="Zero-sized array passed to cupy reduction, " + "https://github.com/cupy/cupy/issues/6937", + ), + ), ], ) @pytest.mark.parametrize("axis", [0, 1]) diff --git a/python/cudf/cudf/tests/test_dlpack.py b/python/cudf/cudf/tests/test_dlpack.py index b686d4d39302..6e34817c4fd6 100644 --- a/python/cudf/cudf/tests/test_dlpack.py +++ b/python/cudf/cudf/tests/test_dlpack.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. import itertools from contextlib import ExitStack as does_not_raise @@ -6,6 +6,7 @@ import cupy import numpy as np import pytest +from packaging import version import cudf from cudf.testing._utils import assert_eq @@ -19,14 +20,21 @@ params_2d = itertools.product(ncols, nelems, dtype, nulls) +if version.parse(cupy.__version__) < version.parse("10"): + # fromDlpack deprecated in cupy version 10, replaced by from_dlpack + cupy_from_dlpack = cupy.fromDlpack +else: + cupy_from_dlpack = cupy.from_dlpack + + def data_size_expectation_builder(data, nan_null_param=False): if nan_null_param and np.isnan(data).any(): return pytest.raises((ValueError,)) - if data.size > 0: - return does_not_raise() - else: + if len(data.shape) == 2 and data.size == 0: return pytest.raises((ValueError, IndexError)) + else: + return does_not_raise() @pytest.fixture(params=params_1d) @@ -107,7 +115,7 @@ def test_to_dlpack_cupy_1d(data_1d): cudf_host_array = gs.to_numpy(na_value=np.nan) dlt = gs.to_dlpack() - cupy_array = cupy.fromDlpack(dlt) + cupy_array = cupy_from_dlpack(dlt) cupy_host_array = cupy_array.get() assert_eq(cudf_host_array, cupy_host_array) @@ -121,7 +129,7 @@ def test_to_dlpack_cupy_2d(data_2d): cudf_host_array = np.array(gdf.to_pandas()).flatten() dlt = gdf.to_dlpack() - cupy_array = cupy.fromDlpack(dlt) + cupy_array = cupy_from_dlpack(dlt) cupy_host_array = cupy_array.get().flatten() assert_eq(cudf_host_array, cupy_host_array) @@ -157,7 +165,7 @@ def test_to_dlpack_cupy_2d_null(data_2d): cudf_host_array = np.array(gdf.to_pandas()).flatten() dlt = gdf.to_dlpack() - cupy_array = cupy.fromDlpack(dlt) + cupy_array = cupy_from_dlpack(dlt) cupy_host_array = cupy_array.get().flatten() assert_eq(cudf_host_array, cupy_host_array) @@ -171,7 +179,7 @@ def test_to_dlpack_cupy_1d_null(data_1d): cudf_host_array = gs.to_numpy(na_value=np.nan) dlt = gs.to_dlpack() - cupy_array = cupy.fromDlpack(dlt) + cupy_array = cupy_from_dlpack(dlt) cupy_host_array = cupy_array.get() assert_eq(cudf_host_array, cupy_host_array) @@ -183,7 +191,26 @@ def test_to_dlpack_mixed_dtypes(): cudf_host_array = df.to_numpy() dlt = df.to_dlpack() - cupy_array = cupy.fromDlpack(dlt) + cupy_array = cupy_from_dlpack(dlt) cupy_host_array = cupy_array.get() assert_eq(cudf_host_array, cupy_host_array) + + +@pytest.mark.parametrize( + "shape", + [ + (0, 3), + pytest.param( + (3, 0), + marks=pytest.mark.xfail( + reason="Index information not available via from_dlpack" + ), + ), + (0, 0), + ], +) +def test_from_dlpack_zero_sizes(shape): + arr = cupy.empty(shape, dtype=float) + df = cudf.io.dlpack.from_dlpack(arr.__dlpack__()) + assert_eq(df, cudf.DataFrame(arr)) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 2ca132e37cb4..d790a123671f 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -81,7 +81,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): install_requires.append( "cupy-cuda" + get_cuda_version_from_header(cuda_include_dir) - + ">=9.5.0,<11.0.0a0" + + ">=9.5.0,<12.0.0a0" ) diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py index f86cee2454be..3c49dad6a177 100644 --- a/python/dask_cudf/setup.py +++ b/python/dask_cudf/setup.py @@ -68,7 +68,7 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""): install_requires.append( "cupy-cuda" + get_cuda_version_from_header(cuda_include_dir) - + ">=9.5.0,<11.0.0a0" + + ">=9.5.0,<12.0.0a0" )