From b45fb8dabccd884d4e3852ef198826b6d14ce92b Mon Sep 17 00:00:00 2001 From: Naim <110031745+naimnv@users.noreply.github.com> Date: Wed, 29 Mar 2023 23:43:50 +0200 Subject: [PATCH] Replace CUDA_TRY with RAFT_CUDA_TRY (#3389) Replace CUDA_TRY with RAFT_CUDA_TRY in three files. Also replaced CHECK_CUDA with RAFT_CHECK_CUDA https://github.com/rapidsai/raft/pull/1304 removed the old macros, we still had a few references. Currently blocked by https://github.com/rapidsai/cugraph-ops/pull/448... once that is merged we should be able to finish building and merge this. Authors: - Naim (https://github.com/naimnv) - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/3389 --- .../include/hash/concurrent_unordered_map.cuh | 14 +++++++------- cpp/libcugraph_etl/src/renumbering.cu | 18 +++++++++--------- cpp/tests/structure/streams.cu | 4 ++-- cpp/tests/utilities/mg_utilities.cpp | 2 +- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh b/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh index c48ee2fb792..f097f9c43a2 100644 --- a/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh +++ b/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2023, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -437,7 +437,7 @@ class concurrent_unordered_map { m_hashtbl_values = m_allocator.allocate(m_capacity, stream); } - CUDA_TRY(cudaMemcpyAsync(m_hashtbl_values, + RAFT_CUDA_TRY(cudaMemcpyAsync(m_hashtbl_values, other.m_hashtbl_values, m_capacity * sizeof(value_type), cudaMemcpyDefault, @@ -465,10 +465,10 @@ class concurrent_unordered_map { cudaError_t status = cudaPointerGetAttributes(&hashtbl_values_ptr_attributes, m_hashtbl_values); if (cudaSuccess == status && isPtrManaged(hashtbl_values_ptr_attributes)) { - CUDA_TRY(cudaMemPrefetchAsync( + RAFT_CUDA_TRY(cudaMemPrefetchAsync( m_hashtbl_values, m_capacity * sizeof(value_type), dev_id, stream.value())); } - CUDA_TRY(cudaMemPrefetchAsync(this, sizeof(*this), dev_id, stream.value())); + RAFT_CUDA_TRY(cudaMemPrefetchAsync(this, sizeof(*this), dev_id, stream.value())); } /** @@ -537,14 +537,14 @@ class concurrent_unordered_map { if (cudaSuccess == status && isPtrManaged(hashtbl_values_ptr_attributes)) { int dev_id = 0; - CUDA_TRY(cudaGetDevice(&dev_id)); - CUDA_TRY(cudaMemPrefetchAsync( + RAFT_CUDA_TRY(cudaGetDevice(&dev_id)); + RAFT_CUDA_TRY(cudaMemPrefetchAsync( m_hashtbl_values, m_capacity * sizeof(value_type), dev_id, stream.value())); } } init_hashtbl<<<((m_capacity - 1) / block_size) + 1, block_size, 0, stream.value()>>>( m_hashtbl_values, m_capacity, m_unused_key, m_unused_element); - CUDA_TRY(cudaGetLastError()); + RAFT_CUDA_TRY(cudaGetLastError()); } }; diff --git a/cpp/libcugraph_etl/src/renumbering.cu b/cpp/libcugraph_etl/src/renumbering.cu index 89c8bd3d792..bbcdb69781b 100644 --- a/cpp/libcugraph_etl/src/renumbering.cu +++ b/cpp/libcugraph_etl/src/renumbering.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -801,7 +801,7 @@ struct renumber_functor { float load_factor = 0.7; rmm::device_uvector atomic_agg(32, exec_strm); // just padded to 32 - CHECK_CUDA(cudaMemsetAsync(atomic_agg.data(), 0, sizeof(accum_type), exec_strm)); + RAFT_CHECK_CUDA(cudaMemsetAsync(atomic_agg.data(), 0, sizeof(accum_type), exec_strm)); auto cuda_map_obj = cudf_map_type::create( std::max(static_cast(static_cast(num_rows) / load_factor), @@ -839,9 +839,9 @@ struct renumber_functor { *cuda_map_obj, atomic_agg.data()); - CHECK_CUDA(cudaMemcpy( + RAFT_CHECK_CUDA(cudaMemcpy( hist_insert_counter, atomic_agg.data(), sizeof(accum_type), cudaMemcpyDeviceToHost)); - CHECK_CUDA(cudaStreamSynchronize(exec_strm)); + RAFT_CHECK_CUDA(cudaStreamSynchronize(exec_strm)); accum_type key_value_count = hist_insert_counter[0]; // {row, count} pairs, sortDesecending on count w/ custom comparator @@ -931,7 +931,7 @@ struct renumber_functor { key_value_count, hist_insert_counter); - CHECK_CUDA(cudaStreamSynchronize(exec_strm)); + RAFT_CHECK_CUDA(cudaStreamSynchronize(exec_strm)); // allocate output columns buffers rmm::device_buffer unrenumber_col1_chars(hist_insert_counter[0], exec_strm); rmm::device_buffer unrenumber_col2_chars(hist_insert_counter[1], exec_strm); @@ -955,7 +955,7 @@ struct renumber_functor { reinterpret_cast(unrenumber_col2_chars.data()), out_col1_offsets.data(), out_col2_offsets.data()); - CHECK_CUDA(cudaStreamSynchronize(exec_strm)); // do we need sync here?? + RAFT_CHECK_CUDA(cudaStreamSynchronize(exec_strm)); // do we need sync here?? std::vector> renumber_table_vectors; @@ -1005,7 +1005,7 @@ struct renumber_functor { grid.x = (key_value_count - 1) / block.x + 1; create_mapping_histogram<<>>( sort_value.data(), sort_key.data(), *cuda_map_obj_mapping, key_value_count); - CHECK_CUDA(cudaStreamSynchronize(exec_strm)); + RAFT_CHECK_CUDA(cudaStreamSynchronize(exec_strm)); rmm::device_buffer src_buffer(sizeof(Dtype) * num_rows, exec_strm); rmm::device_buffer dst_buffer(sizeof(Dtype) * num_rows, exec_strm); @@ -1021,7 +1021,7 @@ struct renumber_functor { num_rows, *cuda_map_obj_mapping, reinterpret_cast(src_buffer.data())); - CHECK_CUDA(cudaStreamSynchronize(exec_strm)); + RAFT_CHECK_CUDA(cudaStreamSynchronize(exec_strm)); set_dst_vertex_idx<<>>( dst_vertex_chars_ptrs[0], dst_vertex_offset_ptrs[0], @@ -1042,7 +1042,7 @@ struct renumber_functor { cols_vector.push_back(std::unique_ptr( new cudf::column(cudf::data_type(cudf::type_id::INT32), num_rows, std::move(dst_buffer)))); - CHECK_CUDA(cudaDeviceSynchronize()); + RAFT_CHECK_CUDA(cudaDeviceSynchronize()); mr.deallocate(hist_insert_counter, hist_size); diff --git a/cpp/tests/structure/streams.cu b/cpp/tests/structure/streams.cu index 19371eb794f..3891f2b9b7a 100644 --- a/cpp/tests/structure/streams.cu +++ b/cpp/tests/structure/streams.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,7 +47,7 @@ TEST_F(StreamTest, basic_test) v.begin(), v.begin(), 2 * thrust::placeholders::_1 + thrust::placeholders::_2); - CUDA_TRY(cudaStreamSynchronize(handle.get_next_usable_stream(i))); + RAFT_CUDA_TRY(cudaStreamSynchronize(handle.get_next_usable_stream(i))); }, i); } diff --git a/cpp/tests/utilities/mg_utilities.cpp b/cpp/tests/utilities/mg_utilities.cpp index a22831a0043..9cac26da435 100644 --- a/cpp/tests/utilities/mg_utilities.cpp +++ b/cpp/tests/utilities/mg_utilities.cpp @@ -88,7 +88,7 @@ void enforce_p2p_initialization(raft::comms::comms_t const& comm, rmm::cuda_stre rx_ranks, stream); - CUDA_TRY(cudaStreamSynchronize(stream)); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); } } // namespace test