Skip to content

Commit

Permalink
Merge pull request #1241 from seunghwak/bug_personalized_pagerank
Browse files Browse the repository at this point in the history
[REVIEW] BUG Personalized PageRank bug fix
  • Loading branch information
BradReesWork authored Oct 27, 2020
2 parents 6450729 + 0cfbd95 commit c8d8e23
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 38 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
- PR #1174 Fix bugs in MNMG pattern accelerators and pattern accelerator based implementations of MNMG PageRank, BFS, and SSSP
- PR #1233 Temporarily disabling C++ tests for 0.16
- PR #1240 Require `ucx-proc=*=gpu`
- PR #1241 Fix a bug in personalized PageRank with the new graph primitives API.


# cuGraph 0.15.0 (26 Aug 2020)
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/experimental/katz_centrality.cu
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ void katz_centrality(raft::handle_t &handle,

iter++;

if (diff_sum < static_cast<result_t>(num_vertices) * epsilon) {
if (diff_sum < epsilon) {
break;
} else if (iter >= max_iterations) {
CUGRAPH_FAIL("Katz Centrality failed to converge.");
Expand Down
25 changes: 17 additions & 8 deletions cpp/src/experimental/pagerank.cu
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,20 @@ void pagerank(raft::handle_t const& handle,
auto const num_vertices = pull_graph_view.get_number_of_vertices();
if (num_vertices == 0) { return; }

auto aggregate_personalization_vector_size =
GraphViewType::is_multi_gpu
? host_scalar_allreduce(handle.get_comms(), personalization_vector_size, handle.get_stream())
: personalization_vector_size;

// 1. check input arguments

CUGRAPH_EXPECTS(
(personalization_vertices == nullptr) || (personalization_values != nullptr),
"Invalid input argument: if personalization verties are provided, personalization "
"values should be provided as well.");
((personalization_vector_size > 0) && (personalization_vertices != nullptr) &&
(personalization_values != nullptr)) ||
((personalization_vector_size == 0) && (personalization_vertices == nullptr) &&
(personalization_values == nullptr)),
"Invalid input argument: if personalization_vector_size is non-zero, personalization verties "
"and personalization values should be provided. Otherwise, they should not be provided.");
CUGRAPH_EXPECTS((alpha >= 0.0) && (alpha <= 1.0),
"Invalid input argument: alpha should be in [0.0, 1.0].");
CUGRAPH_EXPECTS(epsilon >= 0.0, "Invalid input argument: epsilon should be non-negative.");
Expand Down Expand Up @@ -109,7 +117,7 @@ void pagerank(raft::handle_t const& handle,
"Invalid input argument: initial guess values should be non-negative.");
}

if (personalization_vertices != nullptr) {
if (aggregate_personalization_vector_size > 0) {
vertex_partition_device_t<GraphViewType> vertex_partition(pull_graph_view);
auto num_invalid_vertices =
count_if_v(handle,
Expand Down Expand Up @@ -177,7 +185,7 @@ void pagerank(raft::handle_t const& handle,
// 4. sum the personalization values

result_t personalization_sum{0.0};
if (personalization_vertices != nullptr) {
if (aggregate_personalization_vector_size > 0) {
personalization_sum = reduce_v(handle,
pull_graph_view,
personalization_values,
Expand Down Expand Up @@ -229,7 +237,7 @@ void pagerank(raft::handle_t const& handle,

copy_to_adj_matrix_row(handle, pull_graph_view, pageranks, adj_matrix_row_pageranks.begin());

auto unvarying_part = personalization_vertices == nullptr
auto unvarying_part = aggregate_personalization_vector_size == 0
? (dangling_sum * alpha + static_cast<result_t>(1.0 - alpha)) /
static_cast<result_t>(num_vertices)
: result_t{0.0};
Expand All @@ -245,11 +253,12 @@ void pagerank(raft::handle_t const& handle,
unvarying_part,
pageranks);

if (personalization_vertices != nullptr) {
if (aggregate_personalization_vector_size > 0) {
vertex_partition_device_t<GraphViewType> vertex_partition(pull_graph_view);
auto val_first = thrust::make_zip_iterator(
thrust::make_tuple(personalization_vertices, personalization_values));
thrust::for_each(
rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
val_first,
val_first + personalization_vector_size,
[vertex_partition, pageranks, dangling_sum, personalization_sum, alpha] __device__(
Expand All @@ -271,7 +280,7 @@ void pagerank(raft::handle_t const& handle,

iter++;

if (diff_sum < static_cast<result_t>(num_vertices) * epsilon) {
if (diff_sum < epsilon) {
break;
} else if (iter >= max_iterations) {
CUGRAPH_FAIL("PageRank failed to converge.");
Expand Down
16 changes: 11 additions & 5 deletions cpp/tests/experimental/katz_centrality_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ void katz_centrality_reference(edge_t* offsets,

result_t diff_sum{0.0};
for (vertex_t i = 0; i < num_vertices; ++i) {
diff_sum += fabs(katz_centralities[i] - old_katz_centralities[i]);
diff_sum += std::abs(katz_centralities[i] - old_katz_centralities[i]);
}
if (diff_sum < static_cast<result_t>(num_vertices) * epsilon) { break; }
if (diff_sum < epsilon) { break; }
iter++;
ASSERT_TRUE(iter < max_iterations);
}
Expand Down Expand Up @@ -164,7 +164,7 @@ class Tests_KatzCentrality : public ::testing::TestWithParam<KatzCentrality_Usec
epsilon,
std::numeric_limits<size_t>::max(),
false,
false);
true);

rmm::device_uvector<result_t> d_katz_centralities(graph_view.get_number_of_vertices(),
handle.get_stream());
Expand All @@ -180,7 +180,7 @@ class Tests_KatzCentrality : public ::testing::TestWithParam<KatzCentrality_Usec
epsilon,
std::numeric_limits<size_t>::max(),
false,
false,
true,
false);

CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement
Expand All @@ -193,7 +193,13 @@ class Tests_KatzCentrality : public ::testing::TestWithParam<KatzCentrality_Usec
handle.get_stream());
CUDA_TRY(cudaStreamSynchronize(handle.get_stream()));

auto nearly_equal = [epsilon](auto lhs, auto rhs) { return std::fabs(lhs - rhs) < epsilon; };
auto threshold_ratio = 1e-3;
auto threshold_magnitude =
(epsilon / static_cast<result_t>(graph_view.get_number_of_vertices())) * threshold_ratio;
auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) {
auto diff = std::abs(lhs - rhs);
return (diff < std::max(lhs, rhs) * threshold_ratio) || (diff < threshold_magnitude);
};

ASSERT_TRUE(std::equal(h_reference_katz_centralities.begin(),
h_reference_katz_centralities.end(),
Expand Down
110 changes: 86 additions & 24 deletions cpp/tests/experimental/pagerank_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <iterator>
#include <limits>
#include <numeric>
#include <random>
#include <vector>

template <typename vertex_t, typename edge_t, typename weight_t, typename result_t>
Expand Down Expand Up @@ -94,20 +95,21 @@ void pagerank_reference(edge_t* offsets,
pageranks[i] += alpha * old_pageranks[nbr] * (w / out_weight_sums[nbr]);
}
if (personalization_vertices == nullptr) {
pageranks[i] += (dangling_sum + (1.0 - alpha)) / static_cast<result_t>(num_vertices);
pageranks[i] +=
(dangling_sum * alpha + (1.0 - alpha)) / static_cast<result_t>(num_vertices);
}
}
if (personalization_vertices != nullptr) {
for (vertex_t i = 0; i < personalization_vector_size; ++i) {
auto v = personalization_vertices[i];
pageranks[v] += (dangling_sum + (1.0 - alpha)) * personalization_values[i];
pageranks[v] += (dangling_sum * alpha + (1.0 - alpha)) * personalization_values[i];
}
}
result_t diff_sum{0.0};
for (vertex_t i = 0; i < num_vertices; ++i) {
diff_sum += fabs(pageranks[i] - old_pageranks[i]);
diff_sum += std::abs(pageranks[i] - old_pageranks[i]);
}
if (diff_sum < static_cast<result_t>(num_vertices) * epsilon) { break; }
if (diff_sum < epsilon) { break; }
iter++;
ASSERT_TRUE(iter < max_iterations);
}
Expand All @@ -117,10 +119,13 @@ void pagerank_reference(edge_t* offsets,

typedef struct PageRank_Usecase_t {
std::string graph_file_full_path{};
double personalization_ratio{0.0};
bool test_weighted{false};

PageRank_Usecase_t(std::string const& graph_file_path, bool test_weighted)
: test_weighted(test_weighted)
PageRank_Usecase_t(std::string const& graph_file_path,
double personalization_ratio,
bool test_weighted)
: personalization_ratio(personalization_ratio), test_weighted(test_weighted)
{
if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) {
graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path;
Expand Down Expand Up @@ -169,6 +174,49 @@ class Tests_PageRank : public ::testing::TestWithParam<PageRank_Usecase> {
}
CUDA_TRY(cudaStreamSynchronize(handle.get_stream()));

std::vector<vertex_t> h_personalization_vertices{};
std::vector<result_t> h_personalization_values{};
if (configuration.personalization_ratio > 0.0) {
std::random_device r{};
std::default_random_engine generator{r()};
std::uniform_real_distribution<double> distribution{0.0, 1.0};
h_personalization_vertices.resize(graph_view.get_number_of_local_vertices());
std::iota(h_personalization_vertices.begin(),
h_personalization_vertices.end(),
graph_view.get_local_vertex_first());
h_personalization_vertices.erase(
std::remove_if(h_personalization_vertices.begin(),
h_personalization_vertices.end(),
[&generator, &distribution, configuration](auto v) {
return distribution(generator) >= configuration.personalization_ratio;
}),
h_personalization_vertices.end());
h_personalization_values.resize(h_personalization_vertices.size());
std::for_each(h_personalization_values.begin(),
h_personalization_values.end(),
[&distribution, &generator](auto& val) { val = distribution(generator); });
auto sum = std::accumulate(
h_personalization_values.begin(), h_personalization_values.end(), result_t{0.0});
std::for_each(h_personalization_values.begin(),
h_personalization_values.end(),
[sum](auto& val) { val /= sum; });
}

rmm::device_uvector<vertex_t> d_personalization_vertices(h_personalization_vertices.size(),
handle.get_stream());
rmm::device_uvector<result_t> d_personalization_values(d_personalization_vertices.size(),
handle.get_stream());
if (d_personalization_vertices.size() > 0) {
raft::update_device(d_personalization_vertices.data(),
h_personalization_vertices.data(),
h_personalization_vertices.size(),
handle.get_stream());
raft::update_device(d_personalization_values.data(),
h_personalization_values.data(),
h_personalization_values.size(),
handle.get_stream());
}

std::vector<result_t> h_reference_pageranks(graph_view.get_number_of_vertices());

result_t constexpr alpha{0.85};
Expand All @@ -177,11 +225,11 @@ class Tests_PageRank : public ::testing::TestWithParam<PageRank_Usecase> {
pagerank_reference(h_offsets.data(),
h_indices.data(),
h_weights.size() > 0 ? h_weights.data() : static_cast<weight_t*>(nullptr),
static_cast<vertex_t*>(nullptr),
static_cast<result_t*>(nullptr),
h_personalization_vertices.data(),
h_personalization_values.data(),
h_reference_pageranks.data(),
graph_view.get_number_of_vertices(),
vertex_t{0},
static_cast<vertex_t>(h_personalization_vertices.size()),
alpha,
epsilon,
std::numeric_limits<size_t>::max(),
Expand All @@ -195,9 +243,9 @@ class Tests_PageRank : public ::testing::TestWithParam<PageRank_Usecase> {
cugraph::experimental::pagerank(handle,
graph_view,
static_cast<weight_t*>(nullptr),
static_cast<vertex_t*>(nullptr),
static_cast<result_t*>(nullptr),
vertex_t{0},
d_personalization_vertices.data(),
d_personalization_values.data(),
static_cast<vertex_t>(d_personalization_vertices.size()),
d_pageranks.begin(),
alpha,
epsilon,
Expand All @@ -213,7 +261,13 @@ class Tests_PageRank : public ::testing::TestWithParam<PageRank_Usecase> {
h_cugraph_pageranks.data(), d_pageranks.data(), d_pageranks.size(), handle.get_stream());
CUDA_TRY(cudaStreamSynchronize(handle.get_stream()));

auto nearly_equal = [epsilon](auto lhs, auto rhs) { return std::fabs(lhs - rhs) < epsilon; };
auto threshold_ratio = 1e-3;
auto threshold_magnitude =
(epsilon / static_cast<result_t>(graph_view.get_number_of_vertices())) * threshold_ratio;
auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) {
auto diff = std::abs(lhs - rhs);
return (diff < std::max(lhs, rhs) * threshold_ratio) || (diff < threshold_magnitude);
};

ASSERT_TRUE(std::equal(h_reference_pageranks.begin(),
h_reference_pageranks.end(),
Expand All @@ -229,16 +283,24 @@ TEST_P(Tests_PageRank, CheckInt32Int32FloatFloat)
run_current_test<int32_t, int32_t, float, float>(GetParam());
}

INSTANTIATE_TEST_CASE_P(simple_test,
Tests_PageRank,
::testing::Values(PageRank_Usecase("test/datasets/karate.mtx", false),
PageRank_Usecase("test/datasets/karate.mtx", true),
PageRank_Usecase("test/datasets/web-Google.mtx", false),
PageRank_Usecase("test/datasets/web-Google.mtx", true),
PageRank_Usecase("test/datasets/ljournal-2008.mtx",
false),
PageRank_Usecase("test/datasets/ljournal-2008.mtx", true),
PageRank_Usecase("test/datasets/webbase-1M.mtx", false),
PageRank_Usecase("test/datasets/webbase-1M.mtx", true)));
INSTANTIATE_TEST_CASE_P(
simple_test,
Tests_PageRank,
::testing::Values(PageRank_Usecase("test/datasets/karate.mtx", 0.0, false),
PageRank_Usecase("test/datasets/karate.mtx", 0.5, false),
PageRank_Usecase("test/datasets/karate.mtx", 0.0, true),
PageRank_Usecase("test/datasets/karate.mtx", 0.5, true),
PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, false),
PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, false),
PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, true),
PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, true),
PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, false),
PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, false),
PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, true),
PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, true),
PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, false),
PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, false),
PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, true),
PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, true)));

CUGRAPH_TEST_PROGRAM_MAIN()

0 comments on commit c8d8e23

Please sign in to comment.