Skip to content

Commit

Permalink
Refactor MG C++ tests (handle initialization) (rapidsai#2439)
Browse files Browse the repository at this point in the history
Update MG C++ tests to initialize a RAFT handle_t object in SetUpTestCase().

Previously, a handle_t object is initialized inside individual tests. If an exception is thrown inside a single test, a stack unwinding process starts, and this calls handle_t's destructor, and the destructor invokes ncclCommDestroy(). If only a subset of workers fail, some processes will still advance and can be blocked on a NCCL call, This bars ncclCommDestroy() from finishing, and the stack unwinding process will hang. The outcome is no output in the console and testers may have no clue that exceptions are thrown.

With this restructuring, exceptions in individual tests will not destroy a handle_t object, so testers will see the exception messages.

Authors:
  - Seunghwa Kang (https://github.com/seunghwak)

Approvers:
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Kumar Aatish (https://github.com/kaatish)
  - Joseph Nke (https://github.com/jnke2016)

URL: rapidsai#2439
  • Loading branch information
seunghwak authored Jul 27, 2022
1 parent 2c50989 commit 830d113
Show file tree
Hide file tree
Showing 74 changed files with 1,575 additions and 1,590 deletions.
10 changes: 5 additions & 5 deletions cpp/include/cugraph/partition_manager.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -65,11 +65,10 @@ using pair_comms_t =
// naming policy defaults to simplified naming:
// one key per row subcomms, one per column subcomms;
//
template <typename name_policy_t = key_naming_t, typename size_type = int>
template <typename name_policy_t = key_naming_t>
class subcomm_factory_t {
public:
subcomm_factory_t(raft::handle_t& handle, size_type row_size)
: handle_(handle), row_size_(row_size)
subcomm_factory_t(raft::handle_t& handle, int row_size) : handle_(handle), row_size_(row_size)
{
init_row_col_comms();
}
Expand Down Expand Up @@ -101,8 +100,9 @@ class subcomm_factory_t {

private:
raft::handle_t& handle_;
size_type row_size_;
int row_size_;
pair_comms_t row_col_subcomms_;
};

} // namespace partition_2d
} // namespace cugraph
4 changes: 2 additions & 2 deletions cpp/src/utilities/cython.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1065,8 +1065,8 @@ std::unique_ptr<renum_tuple_t<vertex_t, edge_t>> call_renumber(
// Helper for setting up subcommunicators
void init_subcomms(raft::handle_t& handle, size_t row_comm_size)
{
partition_2d::subcomm_factory_t<partition_2d::key_naming_t, int> subcomm_factory(handle,
row_comm_size);
partition_2d::subcomm_factory_t<partition_2d::key_naming_t> subcomm_factory(handle,
row_comm_size);
}

// Explicit instantiations
Expand Down
4 changes: 3 additions & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ target_link_libraries(cugraphtestutil


add_library(cugraphmgtestutil STATIC
utilities/device_comm_wrapper.cu)
utilities/device_comm_wrapper.cu
utilities/mg_utilities.cpp)

set_property(TARGET cugraphmgtestutil PROPERTY POSITION_INDEPENDENT_CODE ON)

Expand All @@ -64,6 +65,7 @@ target_link_libraries(cugraphmgtestutil
PUBLIC
cugraph::cugraph
NCCL::NCCL
MPI::MPI_CXX
)

###################################################################################################
Expand Down
40 changes: 18 additions & 22 deletions cpp/tests/bcast/mg_graph_bcast.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -18,6 +18,7 @@
//
#include <utilities/base_fixture.hpp>
#include <utilities/device_comm_wrapper.hpp>
#include <utilities/mg_utilities.hpp>
#include <utilities/test_utilities.hpp>

#include <cugraph/graph_functions.hpp>
Expand Down Expand Up @@ -59,10 +60,11 @@ struct GraphBcast_Usecase {
// test. In this case, each test is identical except for the inputs and
// expected outputs, so the entire test is defined in the run_test() method.
//
class GraphBcast_MG_Testfixture : public ::testing::TestWithParam<GraphBcast_Usecase> {
class Tests_MGGraphBcast : public ::testing::TestWithParam<GraphBcast_Usecase> {
public:
static void SetUpTestCase() {}
static void TearDownTestCase() {}
static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); }

static void TearDownTestCase() { handle_.reset(); }

// Run once for each test instance
//
Expand All @@ -76,42 +78,36 @@ class GraphBcast_MG_Testfixture : public ::testing::TestWithParam<GraphBcast_Use
template <typename vertex_t, typename edge_t, typename weight_t>
void run_test(const GraphBcast_Usecase& param)
{
using namespace cugraph::broadcast;
using sg_graph_t = cugraph::graph_t<vertex_t, edge_t, weight_t, false, false>;

raft::handle_t handle;

raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD);
const auto& comm = handle.get_comms();

auto const comm_rank = comm.get_rank();

auto [sg_graph, d_renumber_map_labels] =
cugraph::test::read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, false, false>(
handle, param.graph_file_full_path, true, /*renumber=*/false);
*handle_, param.graph_file_full_path, true, /*renumber=*/false);

if (comm_rank == 0) {
graph_broadcast(handle, &sg_graph);
if (handle_->get_comms().get_rank() == 0) {
cugraph::broadcast::graph_broadcast(*handle_, &sg_graph);
} else {
sg_graph_t* g_ignore{nullptr};
auto graph_copy = graph_broadcast(handle, g_ignore);
auto [same, str_fail] = cugraph::test::compare_graphs(handle, sg_graph, graph_copy);
auto graph_copy = cugraph::broadcast::graph_broadcast(*handle_, g_ignore);
auto [same, str_fail] = cugraph::test::compare_graphs(*handle_, sg_graph, graph_copy);

if (!same) std::cerr << "Graph comparison failed on " << str_fail << '\n';

ASSERT_TRUE(same);
}
}

private:
static std::unique_ptr<raft::handle_t> handle_;
};

std::unique_ptr<raft::handle_t> Tests_MGGraphBcast::handle_ = nullptr;

////////////////////////////////////////////////////////////////////////////////
TEST_P(GraphBcast_MG_Testfixture, CheckInt32Int32Float)
{
run_test<int32_t, int32_t, float>(GetParam());
}
TEST_P(Tests_MGGraphBcast, CheckInt32Int32Float) { run_test<int32_t, int32_t, float>(GetParam()); }

INSTANTIATE_TEST_SUITE_P(simple_test,
GraphBcast_MG_Testfixture,
Tests_MGGraphBcast,
::testing::Values(GraphBcast_Usecase("test/datasets/karate.mtx")
//,GraphBcast_Usecase("test/datasets/smallworld.mtx")
));
Expand Down
4 changes: 2 additions & 2 deletions cpp/tests/c_api/mg_test_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ extern "C" void* create_raft_handle(int prows)
raft::handle_t* handle = new raft::handle_t{};
raft::comms::initialize_mpi_comms(handle, MPI_COMM_WORLD);

cugraph::partition_2d::subcomm_factory_t<cugraph::partition_2d::key_naming_t, int>
subcomm_factory(*handle, prows);
cugraph::partition_2d::subcomm_factory_t<cugraph::partition_2d::key_naming_t> subcomm_factory(
*handle, prows);

return handle;
}
Expand Down
3 changes: 2 additions & 1 deletion cpp/tests/centrality/eigenvector_centrality_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ class Tests_EigenvectorCentrality
: public ::testing::TestWithParam<std::tuple<EigenvectorCentrality_Usecase, input_usecase_t>> {
public:
Tests_EigenvectorCentrality() {}
static void SetupTestCase() {}

static void SetUpTestCase() {}
static void TearDownTestCase() {}

virtual void SetUp() {}
Expand Down
3 changes: 2 additions & 1 deletion cpp/tests/centrality/katz_centrality_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ class Tests_KatzCentrality
: public ::testing::TestWithParam<std::tuple<KatzCentrality_Usecase, input_usecase_t>> {
public:
Tests_KatzCentrality() {}
static void SetupTestCase() {}

static void SetUpTestCase() {}
static void TearDownTestCase() {}

virtual void SetUp() {}
Expand Down
4 changes: 3 additions & 1 deletion cpp/tests/centrality/legacy/betweenness_centrality_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -303,11 +303,13 @@ class Tests_BC : public ::testing::TestWithParam<BC_Usecase> {

public:
Tests_BC() {}
static void SetupTestCase() {}

static void SetUpTestCase() {}
static void TearDownTestCase() {}

virtual void SetUp() {}
virtual void TearDown() {}

// vertex_t vertex identifier data type
// edge_t edge identifier data type
// weight_t edge weight data type
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,11 +230,13 @@ class Tests_EdgeBC : public ::testing::TestWithParam<EdgeBC_Usecase> {

public:
Tests_EdgeBC() {}
static void SetupTestCase() {}

static void SetUpTestCase() {}
static void TearDownTestCase() {}

virtual void SetUp() {}
virtual void TearDown() {}

// FIXME: Should normalize be part of the configuration instead?
// vertex_t vertex identifier data type
// edge_t edge identifier data type
Expand Down
4 changes: 3 additions & 1 deletion cpp/tests/centrality/legacy/katz_centrality_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,10 @@ typedef struct Katz_Usecase_t {
class Tests_Katz : public ::testing::TestWithParam<Katz_Usecase> {
public:
Tests_Katz() {}
static void SetupTestCase() {}

static void SetUpTestCase() {}
static void TearDownTestCase() {}

virtual void SetUp() {}
virtual void TearDown() {}

Expand Down
Loading

0 comments on commit 830d113

Please sign in to comment.