-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathmaximum_matching_vertex_based.cu
137 lines (120 loc) · 5.49 KB
/
maximum_matching_vertex_based.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#include <iostream>
#include <thrust/device_vector.h>
#include <cuda_runtime.h>
#include "maximum_matching_vertex_based.h"
#include "time_measure_util.h"
#include "rama_utils.h"
#define numThreads 256
__device__ int get_best_neighbour(const int row_index, float max_value,
const int* const __restrict__ row_offsets,
const int* const __restrict__ col_ids,
const float* const __restrict__ data,
const int* const __restrict__ ignore_vertices)
{
int n_id = -1;
for(int l = row_offsets[row_index]; l < row_offsets[row_index + 1]; ++l)
{
float current_val = data[l];
int current_n = col_ids[l];
if (current_val <= max_value || ignore_vertices[current_n] || current_n == row_index) // self edges can be present with 0 cost.
continue;
max_value = current_val;
n_id = current_n;
}
return n_id;
}
__global__ void pick_best_neighbour(const int num_nodes,
const float max_value,
const int* const __restrict__ row_offsets,
const int* const __restrict__ col_ids,
const float* const __restrict__ costs,
const int* const __restrict__ v_matched,
int* __restrict__ v_best_neighbours)
{
int tid = blockIdx.x * blockDim.x + threadIdx.x;
int num_threads = blockDim.x * gridDim.x;
for (int v = tid; v < num_nodes; v += num_threads)
{
if (v_matched[v])
continue;
v_best_neighbours[v] = get_best_neighbour(v, max_value, row_offsets, col_ids, costs, v_matched);
}
}
__global__ void match_neighbours(const int num_nodes,
const int* const __restrict__ v_best_neighbours,
int* __restrict__ v_matched,
int* __restrict__ node_mapping,
bool* __restrict__ still_running)
{
int tid = blockIdx.x * blockDim.x + threadIdx.x;
int num_threads = blockDim.x * gridDim.x;
for (int v = tid; v < num_nodes; v += num_threads)
{
if (v_matched[v])
continue;
int v_best = v_best_neighbours[v];
if (v_best == -1 || v_matched[v_best])
continue;
if (v_best_neighbours[v_best] == v)
{
node_mapping[max(v_best, v)] = min(v_best, v);
v_matched[v] = 1;
v_matched[v_best] = 1;
*still_running = true;
}
}
}
float determine_matching_threshold(const dCOO& A, const float mean_multiplier_mm)
{
thrust::device_vector<float> data = A.get_data();
auto first = thrust::make_zip_iterator(thrust::make_tuple(thrust::constant_iterator<int>(1), data.begin()));
auto last = thrust::make_zip_iterator(thrust::make_tuple(thrust::constant_iterator<int>(1) + data.size(), data.end()));
// compute average of positive edge costs:
auto red = thrust::transform_reduce(first, last, pos_part(), thrust::make_tuple(0, 0.0f), tuple_sum());
if (thrust::get<0>(red) == 0)
return -1.0;
return mean_multiplier_mm * thrust::get<1>(red) / thrust::get<0>(red);
}
std::tuple<thrust::device_vector<int>, int> filter_edges_by_matching_vertex_based(const dCOO& A, const float mean_multiplier_mm, const bool verbose)
{
assert(!A.is_directed());
thrust::device_vector<int> v_best_neighbours(A.rows(), -1);
thrust::device_vector<int> v_matched(A.rows(), 0);
thrust::device_vector<int> node_mapping(A.rows());
thrust::sequence(node_mapping.begin(), node_mapping.end(), 0);
int numBlocks = ceil(A.rows() / (float) numThreads);
thrust::device_vector<bool> still_running(1);
const thrust::device_vector<int> A_row_offsets = A.compute_row_offsets();
const float min_edge_weight_to_match = determine_matching_threshold(A, mean_multiplier_mm);
if (min_edge_weight_to_match < 0)
return {node_mapping, 0};
int prev_num_edges = 0;
for (int t = 0; t < 10; t++)
{
thrust::fill(thrust::device, still_running.begin(), still_running.end(), false);
pick_best_neighbour<<<numBlocks, numThreads>>>(A.rows(), min_edge_weight_to_match,
thrust::raw_pointer_cast(A_row_offsets.data()),
A.get_col_ids_ptr(),
A.get_data_ptr(),
thrust::raw_pointer_cast(v_matched.data()),
thrust::raw_pointer_cast(v_best_neighbours.data()));
match_neighbours<<<numBlocks, numThreads>>>(A.rows(),
thrust::raw_pointer_cast(v_best_neighbours.data()),
thrust::raw_pointer_cast(v_matched.data()),
thrust::raw_pointer_cast(node_mapping.data()),
thrust::raw_pointer_cast(still_running.data()));
int current_num_edges = thrust::reduce(v_matched.begin(), v_matched.end(), 0);
float rel_increase = (current_num_edges - prev_num_edges) / (prev_num_edges + 1.0f);
if (verbose)
std::cout << "matched sum: " << current_num_edges << ", rel_increase: " << rel_increase <<"\n";
prev_num_edges = current_num_edges;
if (!still_running[0] || rel_increase < 0.1)
break;
}
if (verbose)
{
std::cout << "# vertices = " << A.rows() << "\n";
std::cout << "# matched edges = " << prev_num_edges / 2 << " / "<< A.nnz() / 2 << "\n";
}
return {node_mapping, prev_num_edges};
}