From f780add94732cfd92c6f5e282a8fe0411a0a4a49 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Fri, 3 Feb 2023 02:09:21 -0600 Subject: [PATCH] Fixes devices vector alloc to fix seg fault, removes unused RAFT code in PLC, re-enables full CI testing (#3167) closes #3124 * Adds check to avoid allocating and copying zero-length device vectors. This prevents the seg fault shown below. * Removes the special case to ignore seg faults in CI scripts * Adds a test to reproduce seg fault locally (see output below). This PR addresses the problem shown below: ``` ================================= test session starts ================================= platform linux -- Python 3.8.15, pytest-7.2.0, pluggy-1.0.0 -- /opt/conda/envs/test/bin/python3.8 cachedir: .pytest_cache rapids_pytest_benchmark: 0.0.14 benchmark: 3.2.3 (defaults: timer=time.perf_counter disable_gc=False min_rounds=5 min_time=0.000005 max_time=1.0 calibration_precision=10 warmup=False warmup_iterations=100000) rootdir: /cugraph/python/pylibcugraph, configfile: pytest.ini plugins: cov-4.0.0, rapids-pytest-benchmark-0.0.14, benchmark-3.2.3 collected 9 items / 8 deselected / 1 selected python/pylibcugraph/pylibcugraph/tests/test_graph_sg.py::test_SGGraph_create_from_cudf get edgelist...edgelist = src dst wgt 0 0 1 0.0 1 1 2 0.1 2 2 4 0.2 done create Graph...done created SGGraph plc_graph= PASSED =========================== 1 passed, 8 deselected in 1.69s =========================== Segmentation fault (core dumped) ``` @cjnolet found a work-around for us, so this should pass CI and can be merged after https://github.com/rapidsai/raft/pull/1224 Authors: - Rick Ratzel (https://github.com/rlratzel) - Chuck Hastings (https://github.com/ChuckHastings) - Corey J. Nolet (https://github.com/cjnolet) - AJ Schmidt (https://github.com/ajschmidt8) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - Chuck Hastings (https://github.com/ChuckHastings) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/3167 --- ci/test_notebooks.sh | 12 +-- ci/test_python.sh | 30 ++----- cpp/include/cugraph/utilities/misc_utils.cuh | 58 ++++++------ cpp/include/cugraph_c/sampling_algorithms.h | 14 ++- cpp/src/c_api/uniform_neighbor_sampling.cpp | 80 +++++++++++++---- .../community/legacy/spectral_clustering.cu | 4 +- python/pylibcugraph/CMakeLists.txt | 2 +- .../pylibcugraph/pylibcugraph/CMakeLists.txt | 1 - .../pylibcugraph/_cugraph_c/algorithms.pxd | 7 +- .../pylibcugraph/raft/__init__.py | 0 .../pylibcugraph/raft/common/CMakeLists.txt | 26 ------ .../pylibcugraph/raft/common/TODO | 6 -- .../pylibcugraph/raft/common/__init__.py | 0 .../pylibcugraph/raft/common/cuda.pxd | 22 ----- .../pylibcugraph/raft/common/cuda.pyx | 84 ----------------- .../pylibcugraph/raft/common/handle.pxd | 41 --------- .../pylibcugraph/raft/common/handle.pyx | 90 ------------------- .../pylibcugraph/tests/test_graph_sg.py | 53 ++++++++++- 18 files changed, 171 insertions(+), 359 deletions(-) delete mode 100644 python/pylibcugraph/pylibcugraph/raft/__init__.py delete mode 100644 python/pylibcugraph/pylibcugraph/raft/common/CMakeLists.txt delete mode 100644 python/pylibcugraph/pylibcugraph/raft/common/TODO delete mode 100644 python/pylibcugraph/pylibcugraph/raft/common/__init__.py delete mode 100644 python/pylibcugraph/pylibcugraph/raft/common/cuda.pxd delete mode 100644 python/pylibcugraph/pylibcugraph/raft/common/cuda.pyx delete mode 100644 python/pylibcugraph/pylibcugraph/raft/common/handle.pxd delete mode 100644 python/pylibcugraph/pylibcugraph/raft/common/handle.pyx diff --git a/ci/test_notebooks.sh b/ci/test_notebooks.sh index cc75a015dc6..3453627ce63 100755 --- a/ci/test_notebooks.sh +++ b/ci/test_notebooks.sh @@ -32,10 +32,7 @@ rapids-mamba-retry install \ NBTEST="$(realpath "$(dirname "$0")/utils/nbtest.sh")" NOTEBOOK_LIST="$(realpath "$(dirname "$0")/gpu/notebook_list.py")" EXITCODE=0 -# FIXME: This is temporary until a crash that occurs at cleanup is fixed. This -# allows PRs that pass tests to pass even if they crash with a Seg Fault or -# other error that results in 139. Remove this ASAP! -# trap "EXITCODE=1" ERR +trap "EXITCODE=1" ERR pushd notebooks @@ -52,13 +49,6 @@ for folder in ${TOPLEVEL_NB_FOLDERS}; do pushd "$(dirname "${nb}")" nvidia-smi ${NBTEST} "${nbBasename}" - # FIXME: This is temporary until a crash that occurs at cleanup is fixed. This - # allows PRs that pass tests to pass even if they crash with a Seg Fault or - # other error that results in 139. Remove this ASAP! - exitcode=$? - if (( (${exitcode} != 0) && (${exitcode} != 139) )); then - EXITCODE=1 - fi echo "Ran nbtest for $nb : return code was: $?, test script exit code is now: $EXITCODE" echo popd diff --git a/ci/test_python.sh b/ci/test_python.sh index 1a369b66c9b..fea55844cef 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -62,11 +62,7 @@ pytest \ tests exitcode=$? -# FIXME: This is temporary until a crash that occurs at cleanup is fixed. This -# allows PRs that pass tests to pass even if they crash with a Seg Fault or -# other error that results in 139. Remove this ASAP! -# if (( ${exitcode} != 0 )); then -if (( (${exitcode} != 0) && (${exitcode} != 139) )); then +if (( ${exitcode} != 0 )); then SUITEERROR=${exitcode} echo "FAILED: 1 or more tests in pylibcugraph" fi @@ -85,11 +81,7 @@ pytest \ tests exitcode=$? -# FIXME: This is temporary until a crash that occurs at cleanup is fixed. This -# allows PRs that pass tests to pass even if they crash with a Seg Fault or -# other error that results in 139. Remove this ASAP! -# if (( ${exitcode} != 0 )); then -if (( (${exitcode} != 0) && (${exitcode} != 139) )); then +if (( ${exitcode} != 0 )); then SUITEERROR=${exitcode} echo "FAILED: 1 or more tests in cugraph" fi @@ -105,11 +97,7 @@ pytest \ cugraph/pytest-based/bench_algos.py exitcode=$? -# FIXME: This is temporary until a crash that occurs at cleanup is fixed. This -# allows PRs that pass tests to pass even if they crash with a Seg Fault or -# other error that results in 139. Remove this ASAP! -# if (( ${exitcode} != 0 )); then -if (( (${exitcode} != 0) && (${exitcode} != 139) )); then +if (( ${exitcode} != 0 )); then SUITEERROR=${exitcode} echo "FAILED: 1 or more tests in cugraph benchmarks" fi @@ -130,11 +118,7 @@ pytest \ . exitcode=$? -# FIXME: This is temporary until a crash that occurs at cleanup is fixed. This -# allows PRs that pass tests to pass even if they crash with a Seg Fault or -# other error that results in 139. Remove this ASAP! -# if (( ${exitcode} != 0 )); then -if (( (${exitcode} != 0) && (${exitcode} != 139) )); then +if (( ${exitcode} != 0 )); then SUITEERROR=${exitcode} echo "FAILED: 1 or more tests in cugraph-pyg" fi @@ -157,11 +141,7 @@ pytest \ tests exitcode=$? -# FIXME: This is temporary until a crash that occurs at cleanup is fixed. This -# allows PRs that pass tests to pass even if they crash with a Seg Fault or -# other error that results in 139. Remove this ASAP! -# if (( ${exitcode} != 0 )); then -if (( (${exitcode} != 0) && (${exitcode} != 139) )); then +if (( ${exitcode} != 0 )); then SUITEERROR=${exitcode} echo "FAILED: 1 or more tests in cugraph-service" fi diff --git a/cpp/include/cugraph/utilities/misc_utils.cuh b/cpp/include/cugraph/utilities/misc_utils.cuh index 2bfcbc21abe..f08a3fda6e7 100644 --- a/cpp/include/cugraph/utilities/misc_utils.cuh +++ b/cpp/include/cugraph/utilities/misc_utils.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,33 +45,39 @@ std::tuple, std::vector> compute_offset_aligned_ed thrust::make_counting_iterator(size_t{1}), [approx_edge_chunk_size] __device__(auto i) { return i * approx_edge_chunk_size; }); auto num_chunks = (num_edges + approx_edge_chunk_size - 1) / approx_edge_chunk_size; - rmm::device_uvector d_vertex_offsets(num_chunks - 1, handle.get_stream()); - thrust::lower_bound(handle.get_thrust_policy(), - offsets, - offsets + num_vertices + 1, - search_offset_first, - search_offset_first + d_vertex_offsets.size(), - d_vertex_offsets.begin()); - rmm::device_uvector d_edge_offsets(d_vertex_offsets.size(), handle.get_stream()); - thrust::gather(handle.get_thrust_policy(), - d_vertex_offsets.begin(), - d_vertex_offsets.end(), - offsets, - d_edge_offsets.begin()); - std::vector h_edge_offsets(num_chunks + 1, edge_t{0}); - h_edge_offsets.back() = num_edges; - raft::update_host( - h_edge_offsets.data() + 1, d_edge_offsets.data(), d_edge_offsets.size(), handle.get_stream()); - std::vector h_vertex_offsets(num_chunks + 1, vertex_t{0}); - h_vertex_offsets.back() = num_vertices; - raft::update_host(h_vertex_offsets.data() + 1, - d_vertex_offsets.data(), - d_vertex_offsets.size(), - handle.get_stream()); - handle.sync_stream(); + if (num_chunks > 1) { + rmm::device_uvector d_vertex_offsets(num_chunks - 1, handle.get_stream()); + thrust::lower_bound(handle.get_thrust_policy(), + offsets, + offsets + num_vertices + 1, + search_offset_first, + search_offset_first + d_vertex_offsets.size(), + d_vertex_offsets.begin()); + rmm::device_uvector d_edge_offsets(d_vertex_offsets.size(), handle.get_stream()); + thrust::gather(handle.get_thrust_policy(), + d_vertex_offsets.begin(), + d_vertex_offsets.end(), + offsets, + d_edge_offsets.begin()); + std::vector h_edge_offsets(num_chunks + 1, edge_t{0}); + h_edge_offsets.back() = num_edges; + raft::update_host( + h_edge_offsets.data() + 1, d_edge_offsets.data(), d_edge_offsets.size(), handle.get_stream()); + std::vector h_vertex_offsets(num_chunks + 1, vertex_t{0}); + h_vertex_offsets.back() = num_vertices; + raft::update_host(h_vertex_offsets.data() + 1, + d_vertex_offsets.data(), + d_vertex_offsets.size(), + handle.get_stream()); - return std::make_tuple(h_vertex_offsets, h_edge_offsets); + handle.sync_stream(); + + return std::make_tuple(h_vertex_offsets, h_edge_offsets); + } else { + return std::make_tuple(std::vector{{0, num_vertices}}, + std::vector{{0, num_edges}}); + } } template diff --git a/cpp/include/cugraph_c/sampling_algorithms.h b/cpp/include/cugraph_c/sampling_algorithms.h index d33f0019a61..7a9bd93b079 100644 --- a/cpp/include/cugraph_c/sampling_algorithms.h +++ b/cpp/include/cugraph_c/sampling_algorithms.h @@ -354,8 +354,11 @@ void cugraph_sample_result_free(cugraph_sample_result_t* result); * @param [in] handle Handle for accessing resources * @param [in] srcs Device array view to populate srcs * @param [in] dsts Device array view to populate dsts - * @param [in] weights Device array view to populate weights - * @param [in] counts Device array view to populate counts + * @param [in] edge_id Device array view to populate edge_id (can be NULL) + * @param [in] edge_type Device array view to populate edge_type (can be NULL) + * @param [in] wgt Device array view to populate wgt (can be NULL) + * @param [in] hop Device array view to populate hop + * @param [in] label Device array view to populate label (can be NULL) * @param [out] result Pointer to the location to store the * cugraph_sample_result_t* * @param [out] error Pointer to an error object storing details of @@ -367,8 +370,11 @@ cugraph_error_code_t cugraph_test_sample_result_create( const cugraph_resource_handle_t* handle, const cugraph_type_erased_device_array_view_t* srcs, const cugraph_type_erased_device_array_view_t* dsts, - const cugraph_type_erased_device_array_view_t* weights, - const cugraph_type_erased_device_array_view_t* counts, + const cugraph_type_erased_device_array_view_t* edge_id, + const cugraph_type_erased_device_array_view_t* edge_type, + const cugraph_type_erased_device_array_view_t* wgt, + const cugraph_type_erased_device_array_view_t* hop, + const cugraph_type_erased_device_array_view_t* label, cugraph_sample_result_t** result, cugraph_error_t** error); diff --git a/cpp/src/c_api/uniform_neighbor_sampling.cpp b/cpp/src/c_api/uniform_neighbor_sampling.cpp index c0f3ae581d4..0383823317c 100644 --- a/cpp/src/c_api/uniform_neighbor_sampling.cpp +++ b/cpp/src/c_api/uniform_neighbor_sampling.cpp @@ -526,8 +526,11 @@ extern "C" cugraph_error_code_t cugraph_test_sample_result_create( const cugraph_resource_handle_t* handle, const cugraph_type_erased_device_array_view_t* srcs, const cugraph_type_erased_device_array_view_t* dsts, - const cugraph_type_erased_device_array_view_t* weights, - const cugraph_type_erased_device_array_view_t* counts, + const cugraph_type_erased_device_array_view_t* edge_id, + const cugraph_type_erased_device_array_view_t* edge_type, + const cugraph_type_erased_device_array_view_t* wgt, + const cugraph_type_erased_device_array_view_t* hop, + const cugraph_type_erased_device_array_view_t* label, cugraph_sample_result_t** result, cugraph_error_t** error) { @@ -567,23 +570,61 @@ extern "C" cugraph_error_code_t cugraph_test_sample_result_create( device_array_unique_ptr_t new_device_dsts(new_device_dsts_ptr, &cugraph_type_erased_device_array_free); - // copy weights to new device array - cugraph_type_erased_device_array_t* new_device_weights_ptr{nullptr}; - error_code = cugraph_type_erased_device_array_create_from_view( - handle, weights, &new_device_weights_ptr, error); - if (error_code != CUGRAPH_SUCCESS) return error_code; + // copy edge_id to new device array + cugraph_type_erased_device_array_t* new_device_edge_id_ptr{nullptr}; + + if (edge_id != NULL) { + error_code = cugraph_type_erased_device_array_create_from_view( + handle, edge_id, &new_device_edge_id_ptr, error); + if (error_code != CUGRAPH_SUCCESS) return error_code; + } - device_array_unique_ptr_t new_device_weights(new_device_weights_ptr, + device_array_unique_ptr_t new_device_edge_id(new_device_edge_id_ptr, &cugraph_type_erased_device_array_free); - // copy counts to new device array - cugraph_type_erased_device_array_t* new_device_counts_ptr{nullptr}; - error_code = cugraph_type_erased_device_array_create_from_view( - handle, counts, &new_device_counts_ptr, error); + // copy edge_type to new device array + cugraph_type_erased_device_array_t* new_device_edge_type_ptr{nullptr}; + + if (edge_type != NULL) { + error_code = cugraph_type_erased_device_array_create_from_view( + handle, edge_type, &new_device_edge_type_ptr, error); + if (error_code != CUGRAPH_SUCCESS) return error_code; + } + + device_array_unique_ptr_t new_device_edge_type(new_device_edge_type_ptr, + &cugraph_type_erased_device_array_free); + + // copy wgt to new device array + cugraph_type_erased_device_array_t* new_device_wgt_ptr{nullptr}; + if (wgt != NULL) { + error_code = + cugraph_type_erased_device_array_create_from_view(handle, wgt, &new_device_wgt_ptr, error); + if (error_code != CUGRAPH_SUCCESS) return error_code; + } + + device_array_unique_ptr_t new_device_wgt(new_device_wgt_ptr, + &cugraph_type_erased_device_array_free); + + // copy hop to new device array + cugraph_type_erased_device_array_t* new_device_hop_ptr{nullptr}; + error_code = + cugraph_type_erased_device_array_create_from_view(handle, hop, &new_device_hop_ptr, error); if (error_code != CUGRAPH_SUCCESS) return error_code; - device_array_unique_ptr_t new_device_counts(new_device_counts_ptr, - &cugraph_type_erased_device_array_free); + device_array_unique_ptr_t new_device_hop(new_device_hop_ptr, + &cugraph_type_erased_device_array_free); + + // copy label to new device array + cugraph_type_erased_device_array_t* new_device_label_ptr{nullptr}; + + if (label != NULL) { + error_code = cugraph_type_erased_device_array_create_from_view( + handle, label, &new_device_label_ptr, error); + if (error_code != CUGRAPH_SUCCESS) return error_code; + } + + device_array_unique_ptr_t new_device_label(new_device_label_ptr, + &cugraph_type_erased_device_array_free); // create new cugraph_sample_result_t *result = reinterpret_cast(new cugraph::c_api::cugraph_sample_result_t{ @@ -591,12 +632,15 @@ extern "C" cugraph_error_code_t cugraph_test_sample_result_create( new_device_srcs.release()), reinterpret_cast( new_device_dsts.release()), - nullptr, reinterpret_cast( - new_device_weights.release()), - nullptr, + new_device_edge_id.release()), reinterpret_cast( - new_device_counts.release())}); + new_device_edge_type.release()), + reinterpret_cast(new_device_wgt.release()), + reinterpret_cast( + new_device_label.release()), + reinterpret_cast(new_device_hop.release()), + nullptr}); return CUGRAPH_SUCCESS; } diff --git a/cpp/src/community/legacy/spectral_clustering.cu b/cpp/src/community/legacy/spectral_clustering.cu index 59067a35927..ea3885a801a 100644 --- a/cpp/src/community/legacy/spectral_clustering.cu +++ b/cpp/src/community/legacy/spectral_clustering.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,7 +24,9 @@ #include #include +#if defined RAFT_DISTANCE_COMPILED #include +#endif #include #include diff --git a/python/pylibcugraph/CMakeLists.txt b/python/pylibcugraph/CMakeLists.txt index eb0ecd42457..00ac3afe12b 100644 --- a/python/pylibcugraph/CMakeLists.txt +++ b/python/pylibcugraph/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at diff --git a/python/pylibcugraph/pylibcugraph/CMakeLists.txt b/python/pylibcugraph/pylibcugraph/CMakeLists.txt index 862f01e1034..b1b52128d99 100644 --- a/python/pylibcugraph/pylibcugraph/CMakeLists.txt +++ b/python/pylibcugraph/pylibcugraph/CMakeLists.txt @@ -13,7 +13,6 @@ # ============================================================================= add_subdirectory(components) -add_subdirectory(raft/common) add_subdirectory(internal_types) add_subdirectory(testing) diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd index 903ad4ce550..e20b68693fc 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd @@ -218,8 +218,11 @@ cdef extern from "cugraph_c/algorithms.h": const cugraph_resource_handle_t* handle, const cugraph_type_erased_device_array_view_t* srcs, const cugraph_type_erased_device_array_view_t* dsts, - const cugraph_type_erased_device_array_view_t* weights, - const cugraph_type_erased_device_array_view_t* counts, + const cugraph_type_erased_device_array_view_t* edge_id, + const cugraph_type_erased_device_array_view_t* edge_type, + const cugraph_type_erased_device_array_view_t* wgt, + const cugraph_type_erased_device_array_view_t* hop, + const cugraph_type_erased_device_array_view_t* label, cugraph_sample_result_t** result, cugraph_error_t** error ) diff --git a/python/pylibcugraph/pylibcugraph/raft/__init__.py b/python/pylibcugraph/pylibcugraph/raft/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/python/pylibcugraph/pylibcugraph/raft/common/CMakeLists.txt b/python/pylibcugraph/pylibcugraph/raft/common/CMakeLists.txt deleted file mode 100644 index 2daaa579bdc..00000000000 --- a/python/pylibcugraph/pylibcugraph/raft/common/CMakeLists.txt +++ /dev/null @@ -1,26 +0,0 @@ -# ============================================================================= -# Copyright (c) 2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. -# ============================================================================= - -set(cython_sources - cuda.pyx - handle.pyx -) -set(linked_libraries cugraph::cugraph) - -rapids_cython_create_modules( - CXX - SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" - ASSOCIATED_TARGETS cugraph -) diff --git a/python/pylibcugraph/pylibcugraph/raft/common/TODO b/python/pylibcugraph/pylibcugraph/raft/common/TODO deleted file mode 100644 index b5dfadc3abe..00000000000 --- a/python/pylibcugraph/pylibcugraph/raft/common/TODO +++ /dev/null @@ -1,6 +0,0 @@ -FIXME: The contents of this directory should be used from RAFT directly, rather than -being duplicated here. - -pylibcugraph should be able to remove the copy of raft code added here once the connected components -API is updated to use the new cugraph C API. The only reason RAFT is being pulled in is because the -legacy connected components in pylibcugraph is using it. \ No newline at end of file diff --git a/python/pylibcugraph/pylibcugraph/raft/common/__init__.py b/python/pylibcugraph/pylibcugraph/raft/common/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/python/pylibcugraph/pylibcugraph/raft/common/cuda.pxd b/python/pylibcugraph/pylibcugraph/raft/common/cuda.pxd deleted file mode 100644 index ae6246dee18..00000000000 --- a/python/pylibcugraph/pylibcugraph/raft/common/cuda.pxd +++ /dev/null @@ -1,22 +0,0 @@ -# -# Copyright (c) 2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from cuda.ccudart cimport cudaStream_t - -cdef class Stream: - cdef cudaStream_t s - - cdef cudaStream_t getStream(self) diff --git a/python/pylibcugraph/pylibcugraph/raft/common/cuda.pyx b/python/pylibcugraph/pylibcugraph/raft/common/cuda.pyx deleted file mode 100644 index 9b35aebdba6..00000000000 --- a/python/pylibcugraph/pylibcugraph/raft/common/cuda.pyx +++ /dev/null @@ -1,84 +0,0 @@ -# -# Copyright (c) 2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# cython: profile=False -# distutils: language = c++ -# cython: embedsignature = True -# cython: language_level = 3 - -from cuda.ccudart cimport( - cudaStream_t, - cudaError_t, - cudaSuccess, - cudaStreamCreate, - cudaStreamDestroy, - cudaStreamSynchronize, - cudaGetLastError, - cudaGetErrorString, - cudaGetErrorName -) - - -class CudaRuntimeError(RuntimeError): - def __init__(self, extraMsg=None): - cdef cudaError_t e = cudaGetLastError() - cdef bytes errMsg = cudaGetErrorString(e) - cdef bytes errName = cudaGetErrorName(e) - msg = "Error! %s reason='%s'" % (errName.decode(), errMsg.decode()) - if extraMsg is not None: - msg += " extraMsg='%s'" % extraMsg - super(CudaRuntimeError, self).__init__(msg) - - -cdef class Stream: - """ - Stream represents a thin-wrapper around cudaStream_t and its operations. - - Examples - -------- - - .. code-block:: python - - from pylibraft.common.cuda import Stream - stream = Stream() - stream.sync() - del stream # optional! - """ - def __cinit__(self): - cdef cudaStream_t stream - cdef cudaError_t e = cudaStreamCreate(&stream) - if e != cudaSuccess: - raise CudaRuntimeError("Stream create") - self.s = stream - - def __dealloc__(self): - self.sync() - cdef cudaError_t e = cudaStreamDestroy(self.s) - if e != cudaSuccess: - raise CudaRuntimeError("Stream destroy") - - def sync(self): - """ - Synchronize on the cudastream owned by this object. Note that this - could raise exception due to issues with previous asynchronous - launches - """ - cdef cudaError_t e = cudaStreamSynchronize(self.s) - if e != cudaSuccess: - raise CudaRuntimeError("Stream sync") - - cdef cudaStream_t getStream(self): - return self.s diff --git a/python/pylibcugraph/pylibcugraph/raft/common/handle.pxd b/python/pylibcugraph/pylibcugraph/raft/common/handle.pxd deleted file mode 100644 index ed162053e1d..00000000000 --- a/python/pylibcugraph/pylibcugraph/raft/common/handle.pxd +++ /dev/null @@ -1,41 +0,0 @@ -# -# Copyright (c) 2022-2023, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# cython: profile=False -# distutils: language = c++ -# cython: embedsignature = True -# cython: language_level = 3 - - -from libcpp.memory cimport shared_ptr -from rmm._lib.cuda_stream_view cimport cuda_stream_view -from rmm._lib.cuda_stream_pool cimport cuda_stream_pool -from libcpp.memory cimport shared_ptr -from libcpp.memory cimport unique_ptr - -cdef extern from "raft/core/handle.hpp" namespace "raft" nogil: - cdef cppclass handle_t: - handle_t() except + - handle_t(cuda_stream_view stream_view) except + - handle_t(cuda_stream_view stream_view, - shared_ptr[cuda_stream_pool] stream_pool) except + - cuda_stream_view get_stream() except + - void sync_stream() except + - -cdef class Handle: - cdef unique_ptr[handle_t] c_obj - cdef shared_ptr[cuda_stream_pool] stream_pool - cdef int n_streams diff --git a/python/pylibcugraph/pylibcugraph/raft/common/handle.pyx b/python/pylibcugraph/pylibcugraph/raft/common/handle.pyx deleted file mode 100644 index b9e4029eb01..00000000000 --- a/python/pylibcugraph/pylibcugraph/raft/common/handle.pyx +++ /dev/null @@ -1,90 +0,0 @@ -# -# Copyright (c) 2022-2023, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# cython: profile=False -# distutils: language = c++ -# cython: embedsignature = True -# cython: language_level = 3 - -# import raft -from libcpp.memory cimport shared_ptr -from rmm._lib.cuda_stream_view cimport cuda_stream_per_thread -from rmm._lib.cuda_stream_view cimport cuda_stream_view - -from .cuda cimport Stream -from .cuda import CudaRuntimeError - - -cdef class Handle: - """ - Handle is a lightweight python wrapper around the corresponding C++ class - of handle_t exposed by RAFT's C++ interface. Refer to the header file - raft/core/handle.hpp for interface level details of this struct - - Examples - -------- - - .. code-block:: python - - from pylibraft.common import Stream, Handle - stream = Stream() - handle = Handle(stream) - - # call algos here - - # final sync of all work launched in the stream of this handle - # this is same as `pylibraft.cuda.Stream.sync()` call, but safer in case - # the default stream inside the `handle_t` is being used - handle.sync() - del handle # optional! - """ - - def __cinit__(self, stream: Stream = None, n_streams=0): - self.n_streams = n_streams - if n_streams > 0: - self.stream_pool.reset(new cuda_stream_pool(n_streams)) - - cdef cuda_stream_view c_stream - if stream is None: - # this constructor will construct a "main" handle on - # per-thread default stream, which is non-blocking - self.c_obj.reset(new handle_t(cuda_stream_per_thread, - self.stream_pool)) - else: - # this constructor constructs a handle on user stream - c_stream = cuda_stream_view(stream.getStream()) - self.c_obj.reset(new handle_t(c_stream, - self.stream_pool)) - - def sync(self): - """ - Issues a sync on the stream set for this handle. - """ - self.c_obj.get()[0].sync_stream() - - def getHandle(self): - return self.c_obj.get() - - def __getstate__(self): - return self.n_streams - - def __setstate__(self, state): - self.n_streams = state - if self.n_streams > 0: - self.stream_pool.reset(new cuda_stream_pool(self.n_streams)) - - self.c_obj.reset(new handle_t(cuda_stream_per_thread, - self.stream_pool)) diff --git a/python/pylibcugraph/pylibcugraph/tests/test_graph_sg.py b/python/pylibcugraph/pylibcugraph/tests/test_graph_sg.py index 9d05232074a..4ebb6f1895e 100644 --- a/python/pylibcugraph/pylibcugraph/tests/test_graph_sg.py +++ b/python/pylibcugraph/pylibcugraph/tests/test_graph_sg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -109,3 +109,54 @@ def test_sg_graph(graph_data): renumber=False, do_expensive_check=False, ) + + +def test_SGGraph_create_from_cudf(): + """ + Smoke test to ensure an SGGraph can be created from a cuDF DataFrame + without raising exceptions, crashing, etc. This currently does not assert + correctness of the graph in any way. + """ + # FIXME: other PLC tests are using cudf so this does not add a new dependency, + # however, PLC tests should consider having fewer external dependencies, meaning + # this and other tests would be changed to not use cudf. + import cudf + + # Importing this cugraph class seems to cause a crash more reliably (2023-01-22) + # from cugraph.structure.graph_implementation import simpleGraphImpl + from pylibcugraph import ( + ResourceHandle, + GraphProperties, + SGGraph, + ) + + print("get edgelist...", end="", flush=True) + edgelist = cudf.DataFrame( + { + "src": [0, 1, 2], + "dst": [1, 2, 4], + "wgt": [0.0, 0.1, 0.2], + } + ) + + print("edgelist = ", edgelist) + print("done", flush=True) + print("create Graph...", end="", flush=True) + + graph_props = GraphProperties(is_multigraph=False, is_symmetric=False) + + plc_graph = SGGraph( + resource_handle=ResourceHandle(), + graph_properties=graph_props, + src_or_offset_array=edgelist["src"], + dst_or_index_array=edgelist["dst"], + weight_array=edgelist["wgt"], + edge_id_array=None, + edge_type_array=None, + store_transposed=False, + renumber=False, + do_expensive_check=True, + input_array_format="COO", + ) + print("done", flush=True) + print(f"created SGGraph {plc_graph=}", flush=True)