-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.cpp
266 lines (231 loc) · 8.91 KB
/
test.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
#include "CLI/App.hpp"
#include "CLI/Formatter.hpp"
#include "CLI/Config.hpp"
#include "nanobench.h"
#include <chrono>
#include <vector>
#include <queue>
#include <random>
struct Linear {
private:
const std::vector<std::vector<int> >& indices;
int max_index;
private:
// The cached position of the pointer at each primary element.
// Specifically, 'indices[i][cached_indptrs[i]]' is the lower bound for 'last_request' in the primary element 'i'.
std::vector<size_t> cached_indptrs;
// This vector contains the cached index being pointed to by 'cached_indptrs'.
// We store this here as it is more cache-friendly than doing a look-up to 'indices' every time.
std::vector<int> cached_indices;
// Closest value in 'cached_indices' to the 'last_request', to see whether we can short-circuit the iteration.
// This is the minimum of values in 'cached_indices'.
int closest_cached_index = 0;
// This is the first position of 'cached_indices' equal to 'closest_cached_index',
// i.e., 'cached_indices[closest_cached_index_position] == closest_cached_index'.
size_t closest_cached_index_position = 0;
// What was the last requested index on the secondary dimension?
int last_request = 0;
public:
Linear(const std::vector<std::vector<int> >& idx, int mi) :
indices(idx),
max_index(mi),
cached_indptrs(indices.size()),
cached_indices(indices.size())
{
for (size_t p = 0, pend = indices.size(); p < pend; ++p) {
const auto& curi = indices[p];
cached_indices[p] = (curi.empty() ? max_index : curi.front());
}
if (!cached_indices.empty()) {
auto closestIt = std::min_element(cached_indices.begin(), cached_indices.end());
closest_cached_index_position = closestIt - cached_indices.begin();
closest_cached_index = *closestIt;
}
}
private:
template<class Store_>
void search_above(int secondary, int primary, Store_ store) {
// Skipping if the curdex (corresponding to curptr) is already higher
// than secondary. So, we only need to do more work if the request is
// greater than the stored index. This also catches cases where we're
// at the end of the dimension, as curdex is set to max_index.
auto& curdex = cached_indices[primary];
if (curdex > secondary) {
return;
}
auto& curptr = cached_indptrs[primary];
if (curdex == secondary) {
store(primary, cached_indptrs[primary]);
return;
}
// Having a peek at the index of the next non-zero element; the
// requested index should be equal to or below this, as would be the
// case for consecutive accesses. An actual implementation would have
// to account for non-consecutive jumps but we'll keep things simple
// here for a comparison to an equally simple queue implementation.
++curptr;
const auto& curi = indices[primary];
auto endptr = curi.size();
if (curptr == endptr) {
curdex = max_index;
return;
}
auto iraw = curi.begin();
auto inext = iraw + curptr;
curdex = *inext;
if (curdex == secondary) {
store(primary, curptr);
return;
}
}
public:
template<class Store_>
void search_simple(int secondary, Store_ store) {
for (size_t p = 0, pend = indices.size(); p < pend; ++p) {
search_above(secondary, p, store);
}
last_request = secondary;
}
template<class Store_>
void search_shortcircuit(int secondary, Store_ store) {
if (secondary < closest_cached_index) {
last_request = secondary;
return;
}
bool found = false;
for (size_t p = 0, pend = indices.size(); p < pend; ++p) {
search_above(secondary, p, [&](int i, size_t s) {
store(i, s);
found = true;
});
}
if (found) {
closest_cached_index = secondary;
} else {
closest_cached_index = *(std::min_element(cached_indices.begin(), cached_indices.end()));
}
last_request = secondary;
}
};
struct Pqueue {
private:
const std::vector<std::vector<int> >& indices;
typedef std::pair<int, int> HeapElement;
std::priority_queue<HeapElement, std::vector<HeapElement>, std::greater<HeapElement> > next_heap;
std::vector<int> hits, tmp_hits;
std::vector<size_t> state;
public:
Pqueue(const std::vector<std::vector<int> >& idx) : indices(idx), hits(indices.size()), state(indices.size()) {
for (size_t c = 0, cend = indices.size(); c < cend; ++c) { // force everything to be re-searched on initialization.
hits[c] = c;
--state[c];
}
}
public:
template<class Store_>
void search(int secondary, Store_ store) {
tmp_hits.swap(hits);
hits.clear();
// Refilling the indices we popped out in the last round. This gives
// us an opportunity to check whether they're equal to the current
// 'secondary' (and thus elide an insertion into the queue).
for (auto x : tmp_hits) {
++state[x];
const auto& curx = indices[x];
if (state[x] < curx.size()) {
int current = curx[state[x]];
if (current == secondary) {
hits.push_back(x);
} else {
next_heap.emplace(current, x);
}
}
}
// Finding all the queue elements equal to our current position.
// No need to do anything fancy when we're just incrementing;
// it's always going to be '>= secondary'.
while (!next_heap.empty()) {
auto current_secondary = next_heap.top().first;
if (current_secondary > secondary) {
break;
}
auto current_primary_index = next_heap.top().second;
next_heap.pop();
hits.push_back(current_primary_index);
}
/*
* We're going to paint the priority queue in the best possible light
* here by skipping the sort step, which is technically necessary to
* have 1:1 feature parity with the linear methods.
*/
// std::sort(hits.begin(), hits.end());
for (auto x : hits) {
store(x, state[x]);
}
}
};
int main(int argc, char* argv []) {
CLI::App app{"Sparse priority queue testing"};
double density;
app.add_option("-d,--density", density, "Density of the sparse matrix")->default_val(0.1);
int nr;
app.add_option("-r,--nrow", nr, "Number of rows")->default_val(10000);
int nc;
app.add_option("-c,--ncol", nc, "Number of columns")->default_val(50000);
CLI11_PARSE(app, argc, argv);
std::cout << "Testing a " << nr << " x " << nc << " matrix with a density of " << density << std::endl;
// Simulating a sparse matrix, albeit not very efficiently, but whatever.
std::vector<std::vector<int> > i(nc);
std::mt19937_64 generator(1234567);
std::uniform_real_distribution<double> distu;
for (size_t c = 0; c < nc; ++c) {
auto& curi = i[c];
for (size_t r = 0; r < nr; ++r) {
if (distu(generator) <= density) {
curi.push_back(r);
}
}
}
size_t expected = 0;
{
Linear linear(i, nr);
for (size_t r = 0; r < nr; ++r) {
linear.search_simple(r, [&](int, size_t) { ++expected; });
}
std::cout << "Expecting a sum of " << expected << std::endl;
}
// Doing the linear iteration with simple caching.
ankerl::nanobench::Bench().run("linear simple", [&](){
Linear linear(i, nr);
size_t sum = 0;
for (size_t r = 0; r < nr; ++r) {
linear.search_simple(r, [&](int, size_t) { ++sum; });
}
if (sum != expected) {
std::cerr << "WARNING: different result from linear access (" << sum << ")" << std::endl;
}
});
// Doing the linear iteration with more caching.
ankerl::nanobench::Bench().run("linear shortcircuit", [&](){
Linear linear(i, nr);
size_t sum = 0;
for (size_t r = 0; r < nr; ++r) {
linear.search_shortcircuit(r, [&](int, size_t) { ++sum; });
}
if (sum != expected) {
std::cerr << "WARNING: different result from linear shortcircuit access (" << sum << ")" << std::endl;
}
});
// Comparing to a priority queue.
ankerl::nanobench::Bench().run("queue", [&](){
Pqueue pqueue(i);
size_t sum = 0;
for (size_t r = 0; r < nr; ++r) {
pqueue.search(r, [&](int, size_t) { ++sum; });
}
if (sum != expected) {
std::cerr << "WARNING: different result from queue access (" << sum << ")" << std::endl;
}
});
return 0;
}