Skip to content

Commit

Permalink
Update references to CHECK_CUDA, CUDA_CHECK and CUDA_TRY to use new R…
Browse files Browse the repository at this point in the history
…AFT_ names (rapidsai#2000)

Replaces rapidsai#1975

Update all references to CHECK_CUDA, CUDA_CHECK and CUDA_TRY to use the new raft names

Also updated calls to `cudaStreamSynchronize` to use the `handle.sync_stream()` method.

Authors:
  - Chuck Hastings (https://github.com/ChuckHastings)

Approvers:
  - Seunghwa Kang (https://github.com/seunghwak)
  - Rick Ratzel (https://github.com/rlratzel)

URL: rapidsai#2000
  • Loading branch information
ChuckHastings authored Jan 6, 2022
1 parent c49f049 commit ba0e2d2
Show file tree
Hide file tree
Showing 76 changed files with 519 additions and 535 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -956,7 +956,7 @@ void update_frontier_v_push_if_out_nbr(
d_offsets.begin());
std::vector<vertex_t> h_offsets(d_offsets.size());
raft::update_host(h_offsets.data(), d_offsets.data(), d_offsets.size(), handle.get_stream());
CUDA_TRY(cudaStreamSynchronize(handle.get_stream()));
RAFT_CUDA_TRY(cudaStreamSynchronize(handle.get_stream()));
h_offsets.push_back(matrix_partition_frontier_size);
// FIXME: we may further improve performance by 1) concurrently running kernels on different
// segments; 2) individually tuning block sizes for different segments; and 3) adding one more
Expand Down
7 changes: 4 additions & 3 deletions cpp/src/centrality/betweenness_centrality.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -95,7 +95,7 @@ vertex_t get_total_number_of_sources(raft::handle_t const& handle, vertex_t loca
raft::comms::op_t::SUM,
handle.get_stream());
total_number_of_sources_used = d_number_of_sources.value(handle.get_stream());
// CUDA_TRY(
// RAFT_CUDA_TRY(
// cudaMemcpy(&total_number_of_sources_used, data, sizeof(vertex_t), cudaMemcpyDeviceToHost));
}
return total_number_of_sources_used;
Expand Down Expand Up @@ -237,7 +237,8 @@ void BC<vertex_t, edge_t, weight_t, result_t>::compute_single_source(vertex_t so
auto current_max_depth =
thrust::max_element(handle_.get_thrust_policy(), distances_, distances_ + number_of_vertices_);
vertex_t max_depth = 0;
CUDA_TRY(cudaMemcpy(&max_depth, current_max_depth, sizeof(vertex_t), cudaMemcpyDeviceToHost));
RAFT_CUDA_TRY(
cudaMemcpy(&max_depth, current_max_depth, sizeof(vertex_t), cudaMemcpyDeviceToHost));
// Step 2) Dependency accumulation
accumulate(source_vertex, max_depth);
}
Expand Down
32 changes: 16 additions & 16 deletions cpp/src/community/legacy/triangles_counting.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -96,13 +96,13 @@ static inline void cubSum(InputIteratorT d_in,

cub::DeviceReduce::Sum(
nullptr, temp_storage_bytes, d_in, d_out, num_items, stream, debug_synchronous);
CHECK_CUDA(stream);
RAFT_CHECK_CUDA(stream);

rmm::device_buffer d_temp_storage(temp_storage_bytes, stream);

cub::DeviceReduce::Sum(
d_temp_storage.data(), temp_storage_bytes, d_in, d_out, num_items, stream, debug_synchronous);
CHECK_CUDA(stream);
RAFT_CHECK_CUDA(stream);

return;
}
Expand Down Expand Up @@ -130,7 +130,7 @@ static inline void cubIf(InputIteratorT d_in,
select_op,
stream,
debug_synchronous);
CHECK_CUDA(stream);
RAFT_CHECK_CUDA(stream);

rmm::device_buffer d_temp_storage(temp_storage_bytes, stream);

Expand All @@ -143,7 +143,7 @@ static inline void cubIf(InputIteratorT d_in,
select_op,
stream,
debug_synchronous);
CHECK_CUDA(stream);
RAFT_CHECK_CUDA(stream);

return;
}
Expand Down Expand Up @@ -287,7 +287,7 @@ void tricnt_b2b(T nblock,
// still best overall (with no psum)
tricnt_b2b_k<THREADS, 32, BLK_BWL0><<<nblock, THREADS, 0, stream>>>(
m->nrows, m->rows_d, m->roff_d, m->cols_d, ocnt_d, bmapL0_d, bmldL0, bmapL1_d, bmldL1);
CHECK_CUDA(stream);
RAFT_CHECK_CUDA(stream);
return;
}

Expand Down Expand Up @@ -395,7 +395,7 @@ void tricnt_bsh(T nblock, spmat_t<T>* m, uint64_t* ocnt_d, size_t bmld, cudaStre
{
tricnt_bsh_k<THREADS, 32><<<nblock, THREADS, sizeof(unsigned int) * bmld, stream>>>(
m->nrows, m->rows_d, m->roff_d, m->cols_d, ocnt_d, bmld);
CHECK_CUDA(stream);
RAFT_CHECK_CUDA(stream);
return;
}

Expand Down Expand Up @@ -543,7 +543,7 @@ void tricnt_wrp(
dim3 block(32, THREADS / 32);
tricnt_wrp_ps_k<32, THREADS / 32, WP_LEN_TH1, WP_LEN_TH2>
<<<nblock, block, 0, stream>>>(m->nrows, m->rows_d, m->roff_d, m->cols_d, ocnt_d, bmap_d, bmld);
CHECK_CUDA(stream);
RAFT_CHECK_CUDA(stream);
return;
}

Expand Down Expand Up @@ -631,7 +631,7 @@ void tricnt_thr(T nblock, spmat_t<T>* m, uint64_t* ocnt_d, cudaStream_t stream)

tricnt_thr_k<THREADS, TH_CENT_K_LOCLEN>
<<<nblock, THREADS, 0, stream>>>(m->nrows, m->rows_d, m->roff_d, m->cols_d, ocnt_d);
CHECK_CUDA(stream);
RAFT_CHECK_CUDA(stream);
return;
}

Expand All @@ -657,7 +657,7 @@ void create_nondangling_vector(

cubIf(it, p_nonempty, out_num.data().get(), n, temp_func, stream);
cudaMemcpy(n_nonempty, out_num.data().get(), sizeof(*n_nonempty), cudaMemcpyDeviceToHost);
CHECK_CUDA(stream);
RAFT_CHECK_CUDA(stream);
}

template <typename T>
Expand All @@ -666,7 +666,7 @@ uint64_t reduce(uint64_t* v_d, T n, cudaStream_t stream)
rmm::device_vector<uint64_t> tmp(1);

cubSum(v_d, tmp.data().get(), n, stream);
CHECK_CUDA(stream);
RAFT_CHECK_CUDA(stream);

return tmp[0];
}
Expand Down Expand Up @@ -717,12 +717,12 @@ TrianglesCount<IndexType>::TrianglesCount(IndexType num_vertices,
cudaGetDevice(&device_id);

cudaDeviceGetAttribute(&m_shared_mem_per_block, cudaDevAttrMaxSharedMemoryPerBlock, device_id);
CHECK_CUDA(m_stream);
RAFT_CHECK_CUDA(m_stream);
cudaDeviceGetAttribute(&m_multi_processor_count, cudaDevAttrMultiProcessorCount, device_id);
CHECK_CUDA(m_stream);
RAFT_CHECK_CUDA(m_stream);
cudaDeviceGetAttribute(
&m_max_threads_per_multi_processor, cudaDevAttrMaxThreadsPerMultiProcessor, device_id);
CHECK_CUDA(m_stream);
RAFT_CHECK_CUDA(m_stream);

m_seq.resize(m_mat.N, IndexType{0});
create_nondangling_vector(m_mat.roff_d, m_seq.data().get(), &(m_mat.nrows), m_mat.N, m_stream);
Expand Down Expand Up @@ -758,7 +758,7 @@ void TrianglesCount<IndexType>::tcount_b2b()

size_t free_bytes, total_bytes;
cudaMemGetInfo(&free_bytes, &total_bytes);
CHECK_CUDA(m_stream);
RAFT_CHECK_CUDA(m_stream);

size_t nblock_available = (free_bytes * 95 / 100) / (sizeof(uint32_t) * bmldL1);

Expand Down Expand Up @@ -793,7 +793,7 @@ void TrianglesCount<IndexType>::tcount_wrp()
// number of blocks limited by birmap size
size_t free_bytes, total_bytes;
cudaMemGetInfo(&free_bytes, &total_bytes);
CHECK_CUDA(m_stream);
RAFT_CHECK_CUDA(m_stream);

size_t nblock_available = (free_bytes * 95 / 100) / (sizeof(uint32_t) * bmld * (THREADS / 32));

Expand Down
10 changes: 6 additions & 4 deletions cpp/src/components/utils.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -92,7 +92,7 @@ class Exception : public std::exception {
template <typename Type>
void copy(Type* dst, const Type* src, size_t len, cudaStream_t stream)
{
CUDA_TRY(cudaMemcpyAsync(dst, src, len * sizeof(Type), cudaMemcpyDefault, stream));
RAFT_CUDA_TRY(cudaMemcpyAsync(dst, src, len * sizeof(Type), cudaMemcpyDefault, stream));
}

/**
Expand All @@ -118,7 +118,8 @@ void updateHost(Type* hPtr, const Type* dPtr, size_t len, cudaStream_t stream)
template <typename Type>
void copyAsync(Type* dPtr1, const Type* dPtr2, size_t len, cudaStream_t stream)
{
CUDA_TRY(cudaMemcpyAsync(dPtr1, dPtr2, len * sizeof(Type), cudaMemcpyDeviceToDevice, stream));
RAFT_CUDA_TRY(
cudaMemcpyAsync(dPtr1, dPtr2, len * sizeof(Type), cudaMemcpyDeviceToDevice, stream));
}
/** @} */

Expand Down Expand Up @@ -189,7 +190,8 @@ void myPrintDevVector(const char* variableName,
OutStream& out)
{
std::vector<T> hostMem(componentsCount);
CUDA_TRY(cudaMemcpy(hostMem.data(), devMem, componentsCount * sizeof(T), cudaMemcpyDeviceToHost));
RAFT_CUDA_TRY(
cudaMemcpy(hostMem.data(), devMem, componentsCount * sizeof(T), cudaMemcpyDeviceToHost));
myPrintHostVector(variableName, hostMem.data(), componentsCount, out);
}

Expand Down
14 changes: 7 additions & 7 deletions cpp/src/components/weak_cc.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -165,22 +165,22 @@ void weak_cc_label_batched(vertex_t* labels,
weak_cc_init_label_kernel<vertex_t, TPB_X>
<<<blocks, threads, 0, stream>>>(labels, startVertexId, batchSize, MAX_LABEL, filter_op);

CUDA_TRY(cudaPeekAtLastError());
RAFT_CUDA_TRY(cudaPeekAtLastError());

int n_iters = 0;
do {
CUDA_TRY(cudaMemsetAsync(state.m, false, sizeof(bool), stream));
RAFT_CUDA_TRY(cudaMemsetAsync(state.m, false, sizeof(bool), stream));

weak_cc_label_device<vertex_t, edge_t, TPB_X><<<blocks, threads, 0, stream>>>(
labels, offsets, indices, nnz, state.fa, state.xa, state.m, startVertexId, batchSize);
CUDA_TRY(cudaPeekAtLastError());
CUDA_TRY(cudaStreamSynchronize(stream));
RAFT_CUDA_TRY(cudaPeekAtLastError());
RAFT_CUDA_TRY(cudaStreamSynchronize(stream));

thrust::swap(state.fa, state.xa);

//** Updating m *
MLCommon::updateHost(&host_m, state.m, 1, stream);
CUDA_TRY(cudaStreamSynchronize(stream));
RAFT_CUDA_TRY(cudaStreamSynchronize(stream));

n_iters++;
} while (host_m);
Expand Down Expand Up @@ -235,7 +235,7 @@ void weak_cc_batched(vertex_t* labels,
if (startVertexId == 0) {
weak_cc_init_all_kernel<vertex_t, TPB_X>
<<<blocks, threads, 0, stream>>>(labels, state.fa, state.xa, N, MAX_LABEL);
CUDA_TRY(cudaPeekAtLastError());
RAFT_CUDA_TRY(cudaPeekAtLastError());
}

weak_cc_label_batched<vertex_t, edge_t, TPB_X>(
Expand Down
14 changes: 7 additions & 7 deletions cpp/src/converters/COOtoCSR.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -69,27 +69,27 @@ VT sort(legacy::GraphCOOView<VT, ET, WT>& graph, rmm::cuda_stream_view stream_vi
graph.dst_indices,
graph.dst_indices + graph.number_of_edges,
thrust::make_zip_iterator(thrust::make_tuple(graph.src_indices, graph.edge_data)));
CUDA_TRY(cudaMemcpy(
RAFT_CUDA_TRY(cudaMemcpy(
&max_dst_id, &(graph.dst_indices[graph.number_of_edges - 1]), sizeof(VT), cudaMemcpyDefault));
thrust::stable_sort_by_key(
rmm::exec_policy(stream_view),
graph.src_indices,
graph.src_indices + graph.number_of_edges,
thrust::make_zip_iterator(thrust::make_tuple(graph.dst_indices, graph.edge_data)));
CUDA_TRY(cudaMemcpy(
RAFT_CUDA_TRY(cudaMemcpy(
&max_src_id, &(graph.src_indices[graph.number_of_edges - 1]), sizeof(VT), cudaMemcpyDefault));
} else {
thrust::stable_sort_by_key(rmm::exec_policy(stream_view),
graph.dst_indices,
graph.dst_indices + graph.number_of_edges,
graph.src_indices);
CUDA_TRY(cudaMemcpy(
RAFT_CUDA_TRY(cudaMemcpy(
&max_dst_id, &(graph.dst_indices[graph.number_of_edges - 1]), sizeof(VT), cudaMemcpyDefault));
thrust::stable_sort_by_key(rmm::exec_policy(stream_view),
graph.src_indices,
graph.src_indices + graph.number_of_edges,
graph.dst_indices);
CUDA_TRY(cudaMemcpy(
RAFT_CUDA_TRY(cudaMemcpy(
&max_src_id, &(graph.src_indices[graph.number_of_edges - 1]), sizeof(VT), cudaMemcpyDefault));
}
return std::max(max_src_id, max_dst_id) + 1;
Expand Down Expand Up @@ -177,10 +177,10 @@ void coo_to_csr_inplace(legacy::GraphCOOView<VT, ET, WT>& graph,
graph.number_of_edges,
stream_view);

CUDA_TRY(cudaMemcpy(
RAFT_CUDA_TRY(cudaMemcpy(
result.indices, graph.dst_indices, sizeof(VT) * graph.number_of_edges, cudaMemcpyDefault));
if (graph.has_data())
CUDA_TRY(cudaMemcpy(
RAFT_CUDA_TRY(cudaMemcpy(
result.edge_data, graph.edge_data, sizeof(WT) * graph.number_of_edges, cudaMemcpyDefault));
}

Expand Down
24 changes: 12 additions & 12 deletions cpp/src/layout/barnes_hut.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -80,7 +80,7 @@ void barnes_hut(raft::handle_t const& handle,
float* radiusd = d_radiusd.data();

InitializationKernel<<<1, 1, 0, stream_view.value()>>>(limiter, maxdepthd, radiusd);
CHECK_CUDA(stream_view.value());
RAFT_CHECK_CUDA(stream_view.value());

const int FOUR_NNODES = 4 * nnodes;
const int FOUR_N = 4 * n;
Expand Down Expand Up @@ -158,10 +158,10 @@ void barnes_hut(raft::handle_t const& handle,

// Sort COO for coalesced memory access.
sort(graph, stream_view.value());
CHECK_CUDA(stream_view.value());
RAFT_CHECK_CUDA(stream_view.value());

graph.degree(massl, cugraph::legacy::DegreeDirection::OUT);
CHECK_CUDA(stream_view.value());
RAFT_CHECK_CUDA(stream_view.value());

const vertex_t* row = graph.src_indices;
const vertex_t* col = graph.dst_indices;
Expand Down Expand Up @@ -204,7 +204,7 @@ void barnes_hut(raft::handle_t const& handle,
thrust::fill(handle.get_thrust_policy(), d_traction.begin(), d_traction.end(), 0.f);

ResetKernel<<<1, 1, 0, stream_view.value()>>>(radiusd_squared, bottomd, NNODES, radiusd);
CHECK_CUDA(stream_view.value());
RAFT_CHECK_CUDA(stream_view.value());

// Compute bounding box arround all bodies
BoundingBoxKernel<<<blocks * FACTOR1, THREADS1, 0, stream_view.value()>>>(
Expand All @@ -222,28 +222,28 @@ void barnes_hut(raft::handle_t const& handle,
n,
limiter,
radiusd);
CHECK_CUDA(stream_view.value());
RAFT_CHECK_CUDA(stream_view.value());

ClearKernel1<<<blocks, 1024, 0, stream_view.value()>>>(childl, FOUR_NNODES, FOUR_N);
CHECK_CUDA(stream_view.value());
RAFT_CHECK_CUDA(stream_view.value());

// Build quadtree
TreeBuildingKernel<<<blocks * FACTOR2, THREADS2, 0, stream_view.value()>>>(
childl, nodes_pos, nodes_pos + nnodes + 1, NNODES, n, maxdepthd, bottomd, radiusd);
CHECK_CUDA(stream_view.value());
RAFT_CHECK_CUDA(stream_view.value());

ClearKernel2<<<blocks, 1024, 0, stream_view.value()>>>(startl, massl, NNODES, bottomd);
CHECK_CUDA(stream_view.value());
RAFT_CHECK_CUDA(stream_view.value());

// Summarizes mass and position for each cell, bottom up approach
SummarizationKernel<<<blocks * FACTOR3, THREADS3, 0, stream_view.value()>>>(
countl, childl, massl, nodes_pos, nodes_pos + nnodes + 1, NNODES, n, bottomd);
CHECK_CUDA(stream_view.value());
RAFT_CHECK_CUDA(stream_view.value());

// Group closed bodies together, used to speed up Repulsion kernel
SortKernel<<<blocks * FACTOR4, THREADS4, 0, stream_view.value()>>>(
sortl, countl, startl, childl, NNODES, n, bottomd);
CHECK_CUDA(stream_view.value());
RAFT_CHECK_CUDA(stream_view.value());

// Force computation O(n . log(n))
RepulsionKernel<<<blocks * FACTOR5, THREADS5, 0, stream_view.value()>>>(scaling_ratio,
Expand All @@ -262,7 +262,7 @@ void barnes_hut(raft::handle_t const& handle,
n,
radiusd_squared,
maxdepthd);
CHECK_CUDA(stream_view.value());
RAFT_CHECK_CUDA(stream_view.value());

apply_gravity<vertex_t>(nodes_pos,
nodes_pos + nnodes + 1,
Expand Down
Loading

0 comments on commit ba0e2d2

Please sign in to comment.