Skip to content

Commit

Permalink
Merge pull request #954 from seunghwak/enh_ext_error_check
Browse files Browse the repository at this point in the history
[REVIEW] Use RAFT error handling mechanism
  • Loading branch information
BradReesWork authored Jun 19, 2020
2 parents 8814e9e + fed4cf2 commit 9f71b90
Show file tree
Hide file tree
Showing 63 changed files with 357 additions and 487 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
- PR #946 Install meta packages for dependencies
- PR #952 Updated get_test_data.sh to also (optionally) download and install datasets for benchmark runs
- PR #953 fix setting RAFT_DIR from the RAFT_PATH env var
- PR #954 Update cuGraph error handling to use RAFT

## Bug Fixes
- PR #936 Update Force Atlas 2 doc and wrapper
Expand Down
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ else(DEFINED ENV{RAFT_PATH})

ExternalProject_Add(raft
GIT_REPOSITORY https://github.com/rapidsai/raft.git
GIT_TAG 2487eb0c12f374729043baa5448c0d309c921e60
GIT_TAG 3038c773636540941b8bf9e30c487f20bddc3cdc
PREFIX ${RAFT_DIR}
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
Expand Down
65 changes: 65 additions & 0 deletions cpp/include/utilities/error.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <raft/error.hpp>

namespace cugraph {

/**
* @brief Exception thrown when logical precondition is violated.
*
* This exception should not be thrown directly and is instead thrown by the
* CUGRAPH_EXPECTS and CUGRAPH_FAIL macros.
*
*/
struct logic_error : public raft::exception {
explicit logic_error(char const* const message) : raft::exception(message) {}
explicit logic_error(std::string const& message) : raft::exception(message) {}
};

} // namespace cugraph

/**
* @brief Macro for checking (pre-)conditions that throws an exception when a condition is false
*
* @param[in] cond Expression that evaluates to true or false
* @param[in] fmt String literal description of the reason that cond is expected to be true with
* optinal format tagas
* @throw cugraph::logic_error if the condition evaluates to false.
*/
#define CUGRAPH_EXPECTS(cond, fmt, ...) \
do { \
if (!(cond)) { \
std::string msg{}; \
SET_ERROR_MSG(msg, "cuGraph failure at ", fmt, ##__VA_ARGS__); \
throw cugraph::logic_error(msg); \
} \
} while (0)

/**
* @brief Indicates that an erroneous code path has been taken.
*
* @param[in] fmt String literal description of the reason that this code path is erroneous with
* optinal format tagas
* @throw always throws cugraph::logic_error
*/
#define CUGRAPH_FAIL(fmt, ...) \
do { \
std::string msg{}; \
SET_ERROR_MSG(msg, "cuGraph failure at ", fmt, ##__VA_ARGS__); \
throw cugraph::logic_error(msg); \
} while (0)
5 changes: 3 additions & 2 deletions cpp/src/centrality/betweenness_centrality.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@

#include <thrust/transform.h>

#include <raft/cudart_utils.h>

#include <algorithms.hpp>
#include <graph.hpp>

#include <utilities/error_utils.h>
#include <utilities/error.hpp>

#include "betweenness_centrality.cuh"
#include "betweenness_centrality_kernels.cuh"
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/centrality/katz_centrality.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include <Hornet.hpp>
#include <Static/KatzCentrality/Katz.cuh>
#include <graph.hpp>
#include "utilities/error_utils.h"
#include "utilities/error.hpp"

namespace cugraph {

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/community/ECG.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@

#include <rmm/thrust_rmm_allocator.h>
#include <thrust/random.h>
#include <utilities/error_utils.h>
#include <converters/permute_graph.cuh>
#include <ctime>
#include <utilities/error.hpp>
#include "utilities/graph_utils.cuh"

namespace {
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/community/extract_subgraph_by_vertex.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#include <utilities/cuda_utils.cuh>

#include <rmm/thrust_rmm_allocator.h>
#include <utilities/error_utils.h>
#include <utilities/error.hpp>

namespace {

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/community/ktruss.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
* @file ktruss.cu
* --------------------------------------------------------------------------*/

#include <utilities/error_utils.h>
#include <utilities/error.hpp>

#include <Hornet.hpp>
#include <StandardAPI.hpp>
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/community/louvain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

#include <community/louvain_kernels.hpp>

#include "utilities/error_utils.h"
#include "utilities/error.hpp"

namespace cugraph {

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/community/louvain_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ void generate_superverticies_graph(
new_number_of_vertices,
current_graph.number_of_edges,
stream);
CUDA_CHECK_LAST();
CHECK_CUDA(stream);

src_indices_v.resize(current_graph.number_of_edges);
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/community/spectral_clustering.cu
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@
#include <nvgraph/include/sm_utils.h>
#include <rmm/thrust_rmm_allocator.h>
#include <thrust/transform.h>
#include <utilities/error_utils.h>
#include <ctime>
#include <nvgraph/include/nvgraph_error.hxx>
#include <utilities/error.hpp>

#include <nvgraph/include/modularity_maximization.hxx>
#include <nvgraph/include/nvgraph_cublas.hxx>
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/components/connectivity.cu
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include <graph.hpp>
#include <iostream>
#include <type_traits>
#include "utilities/error_utils.h"
#include "utilities/error.hpp"
#include "utilities/graph_utils.cuh"

#include "topology/topology.cuh"
Expand Down
40 changes: 6 additions & 34 deletions cpp/src/components/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
#include <stdexcept>
#include <string>

#include <utilities/error_utils.h>
#include <raft/cudart_utils.h>

#include <utilities/error.hpp>

namespace MLCommon {

Expand Down Expand Up @@ -77,35 +79,6 @@ class Exception : public std::exception {
}
};

/** macro to throw a runtime error */
#define THROW(fmt, ...) \
do { \
std::string msg; \
char errMsg[2048]; \
std::sprintf(errMsg, "Exception occured! file=%s line=%d: ", __FILE__, __LINE__); \
msg += errMsg; \
std::sprintf(errMsg, fmt, ##__VA_ARGS__); \
msg += errMsg; \
throw MLCommon::Exception(msg); \
} while (0)

/** macro to check for a conditional and assert on failure */
#define ASSERT(check, fmt, ...) \
do { \
if (!(check)) THROW(fmt, ##__VA_ARGS__); \
} while (0)

/** check for cuda runtime API errors and assert accordingly */
#define CUDA_CHECK(call) \
do { \
cudaError_t status = call; \
ASSERT( \
status == cudaSuccess, "FAIL: call='%s'. Reason:%s\n", #call, cudaGetErrorString(status)); \
} while (0)

///@todo: add a similar CUDA_CHECK_NO_THROW
/// (Ref: https://github.com/rapidsai/cuml/issues/229)

/**
* @brief Generic copy method for all kinds of transfers
* @tparam Type data type
Expand All @@ -117,7 +90,7 @@ class Exception : public std::exception {
template <typename Type>
void copy(Type* dst, const Type* src, size_t len, cudaStream_t stream)
{
CUDA_CHECK(cudaMemcpyAsync(dst, src, len * sizeof(Type), cudaMemcpyDefault, stream));
CUDA_TRY(cudaMemcpyAsync(dst, src, len * sizeof(Type), cudaMemcpyDefault, stream));
}

/**
Expand All @@ -143,7 +116,7 @@ void updateHost(Type* hPtr, const Type* dPtr, size_t len, cudaStream_t stream)
template <typename Type>
void copyAsync(Type* dPtr1, const Type* dPtr2, size_t len, cudaStream_t stream)
{
CUDA_CHECK(cudaMemcpyAsync(dPtr1, dPtr2, len * sizeof(Type), cudaMemcpyDeviceToDevice, stream));
CUDA_TRY(cudaMemcpyAsync(dPtr1, dPtr2, len * sizeof(Type), cudaMemcpyDeviceToDevice, stream));
}
/** @} */

Expand Down Expand Up @@ -214,8 +187,7 @@ void myPrintDevVector(const char* variableName,
OutStream& out)
{
std::vector<T> hostMem(componentsCount);
CUDA_CHECK(
cudaMemcpy(hostMem.data(), devMem, componentsCount * sizeof(T), cudaMemcpyDeviceToHost));
CUDA_TRY(cudaMemcpy(hostMem.data(), devMem, componentsCount * sizeof(T), cudaMemcpyDeviceToHost));
myPrintHostVector(variableName, hostMem.data(), componentsCount, out);
}

Expand Down
14 changes: 8 additions & 6 deletions cpp/src/components/weak_cc.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
#include <iostream>
#include <type_traits>

#include <raft/cudart_utils.h>

#include <rmm/thrust_rmm_allocator.h>
#include "utilities/cuda_utils.cuh"
#include "utils.h"
Expand Down Expand Up @@ -163,22 +165,22 @@ void weak_cc_label_batched(vertex_t *labels,
weak_cc_init_label_kernel<vertex_t, TPB_X>
<<<blocks, threads, 0, stream>>>(labels, startVertexId, batchSize, MAX_LABEL, filter_op);

CUDA_CHECK(cudaPeekAtLastError());
CUDA_TRY(cudaPeekAtLastError());

int n_iters = 0;
do {
CUDA_CHECK(cudaMemsetAsync(state.m, false, sizeof(bool), stream));
CUDA_TRY(cudaMemsetAsync(state.m, false, sizeof(bool), stream));

weak_cc_label_device<vertex_t, edge_t, TPB_X><<<blocks, threads, 0, stream>>>(
labels, offsets, indices, nnz, state.fa, state.xa, state.m, startVertexId, batchSize);
CUDA_CHECK(cudaPeekAtLastError());
CUDA_CHECK(cudaStreamSynchronize(stream));
CUDA_TRY(cudaPeekAtLastError());
CUDA_TRY(cudaStreamSynchronize(stream));

thrust::swap(state.fa, state.xa);

//** Updating m *
MLCommon::updateHost(&host_m, state.m, 1, stream);
CUDA_CHECK(cudaStreamSynchronize(stream));
CUDA_TRY(cudaStreamSynchronize(stream));

n_iters++;
} while (host_m);
Expand Down Expand Up @@ -233,7 +235,7 @@ void weak_cc_batched(vertex_t *labels,
if (startVertexId == 0) {
weak_cc_init_all_kernel<vertex_t, TPB_X>
<<<blocks, threads, 0, stream>>>(labels, state.fa, state.xa, N, MAX_LABEL);
CUDA_CHECK(cudaPeekAtLastError());
CUDA_TRY(cudaPeekAtLastError());
}

weak_cc_label_batched<vertex_t, edge_t, TPB_X>(
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/converters/COOtoCSR.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
#include <algorithm>

#include <rmm/thrust_rmm_allocator.h>
#include <utilities/error_utils.h>
#include <utilities/error.hpp>

#include <cub/device/device_radix_sort.cuh>
#include <cub/device/device_run_length_encode.cuh>
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/converters/permute_graph.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
* limitations under the License.
*/
#include <rmm/thrust_rmm_allocator.h>
#include <utilities/error_utils.h>
#include <graph.hpp>
#include <utilities/error.hpp>
#include "converters/COOtoCSR.cuh"
#include "utilities/graph_utils.cuh"

Expand Down
3 changes: 2 additions & 1 deletion cpp/src/converters/renumber.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,11 @@
#include <thrust/iterator/counting_iterator.h>
#include <thrust/scan.h>

#include <raft/cudart_utils.h>
#include <rmm/device_buffer.hpp>

#include <utilities/error.hpp>
#include "sort/bitonic.cuh"
#include "utilities/error_utils.h"
#include "utilities/graph_utils.cuh"

namespace cugraph {
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/cores/core_number.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
*/

#include <rmm/thrust_rmm_allocator.h>
#include <utilities/error_utils.h>
#include <Hornet.hpp>
#include <Static/CoreNumber/CoreNumber.cuh>
#include <graph.hpp>
#include <utilities/error.hpp>
//#include <nvgraph_gdf.h>

namespace cugraph {
Expand Down
9 changes: 7 additions & 2 deletions cpp/src/db/db_object.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,16 @@
* limitations under the License.
*/

#include <db/db_object.cuh>

#include <utilities/error.hpp>

#include <raft/cudart_utils.h>
#include <rmm/thrust_rmm_allocator.h>

#include <thrust/binary_search.h>
#include <utilities/error_utils.h>
#include <cub/device/device_run_length_encode.cuh>
#include <db/db_object.cuh>

#include <sstream>

namespace cugraph {
Expand Down
8 changes: 6 additions & 2 deletions cpp/src/db/db_operators.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,14 @@
* limitations under the License.
*/

#include <utilities/error_utils.h>
#include <cub/device/device_select.cuh>
#include <db/db_operators.cuh>

#include <utilities/error.hpp>

#include <raft/cudart_utils.h>

#include <cub/device/device_select.cuh>

namespace cugraph {
namespace db {
template <typename IndexType>
Expand Down
Loading

0 comments on commit 9f71b90

Please sign in to comment.