-
Notifications
You must be signed in to change notification settings - Fork 1.2k
/
Copy pathRebuildIndexTask.cpp
258 lines (231 loc) · 10.8 KB
/
RebuildIndexTask.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
/* Copyright (c) 2020 vesoft inc. All rights reserved.
*
* This source code is licensed under Apache 2.0 License.
*/
#include "storage/admin/RebuildIndexTask.h"
#include "common/utils/OperationKeyUtils.h"
#include "kvstore/Common.h"
#include "storage/StorageFlags.h"
namespace nebula {
namespace storage {
const int32_t kReserveNum = 1024 * 4;
RebuildIndexTask::RebuildIndexTask(StorageEnv* env, TaskContext&& ctx)
: AdminTask(env, std::move(ctx)) {
// Rebuild index rate is limited to FLAGS_rebuild_index_part_rate_limit * SubTaskConcurrency. As
// for default configuration in a 3 replica cluster, send rate is 512Kb for a partition. From a
// global perspective, the leaders are distributed evenly, so both send and recv traffic will be
// 1Mb (512 * 2 peers). Multiplied by the subtasks concurrency, the total send/recv traffic will
// be 10Mb, which is non-trival.
LOG(INFO) << "Rebuild index task is rate limited to " << FLAGS_rebuild_index_part_rate_limit
<< " for each subtask by default";
}
ErrorOr<nebula::cpp2::ErrorCode, std::vector<AdminSubTask>> RebuildIndexTask::genSubTasks() {
CHECK_NOTNULL(env_->kvstore_);
space_ = *ctx_.parameters_.space_id_ref();
auto parts = *ctx_.parameters_.parts_ref();
IndexItems items;
if (!ctx_.parameters_.task_specific_paras_ref().has_value() ||
(*ctx_.parameters_.task_specific_paras_ref()).empty()) {
auto itemsRet = getIndexes(space_);
if (!itemsRet.ok()) {
LOG(ERROR) << "Indexes not found";
return nebula::cpp2::ErrorCode::E_INDEX_NOT_FOUND;
}
items = std::move(itemsRet).value();
} else {
for (const auto& index : *ctx_.parameters_.task_specific_paras_ref()) {
auto indexID = folly::to<IndexID>(index);
auto indexRet = getIndex(space_, indexID);
if (!indexRet.ok()) {
LOG(ERROR) << "Index not found: " << indexID;
return nebula::cpp2::ErrorCode::E_INDEX_NOT_FOUND;
}
items.emplace_back(indexRet.value());
}
}
if (items.empty()) {
LOG(ERROR) << "Index is empty";
return nebula::cpp2::ErrorCode::SUCCEEDED;
}
std::vector<AdminSubTask> tasks;
for (auto it = env_->rebuildIndexGuard_->cbegin(); it != env_->rebuildIndexGuard_->cend(); ++it) {
if (std::get<0>(it->first) == space_ && it->second != IndexState::FINISHED) {
LOG(ERROR) << "This space is building index";
return nebula::cpp2::ErrorCode::E_REBUILD_INDEX_FAILED;
}
}
for (const auto& part : parts) {
env_->rebuildIndexGuard_->insert_or_assign(std::make_tuple(space_, part), IndexState::STARTING);
std::function<nebula::cpp2::ErrorCode()> task =
std::bind(&RebuildIndexTask::invoke, this, space_, part, items);
tasks.emplace_back(std::move(task));
}
return tasks;
}
nebula::cpp2::ErrorCode RebuildIndexTask::invoke(GraphSpaceID space,
PartitionID part,
const IndexItems& items) {
auto rateLimiter = std::make_unique<kvstore::RateLimiter>();
// TaskManager will make sure that there won't be cocurrent invoke of a given part
auto result = removeLegacyLogs(space, part);
if (result != nebula::cpp2::ErrorCode::SUCCEEDED) {
LOG(ERROR) << "Remove legacy logs at part: " << part << " failed";
return nebula::cpp2::ErrorCode::E_REBUILD_INDEX_FAILED;
} else {
VLOG(1) << "Remove legacy logs at part: " << part << " successful";
}
// todo(doodle): this place has potential bug is that we'd better lock the
// part at first, then switch to BUILDING, otherwise some data won't build
// index in worst case.
env_->rebuildIndexGuard_->assign(std::make_tuple(space, part), IndexState::BUILDING);
LOG(INFO) << "Start building index";
result = buildIndexGlobal(space, part, items, rateLimiter.get());
if (result != nebula::cpp2::ErrorCode::SUCCEEDED) {
LOG(ERROR) << "Building index failed";
return nebula::cpp2::ErrorCode::E_REBUILD_INDEX_FAILED;
} else {
LOG(INFO) << folly::sformat("Building index successful, space={}, part={}", space, part);
}
LOG(INFO) << folly::sformat("Processing operation logs, space={}, part={}", space, part);
result = buildIndexOnOperations(space, part, rateLimiter.get());
if (result != nebula::cpp2::ErrorCode::SUCCEEDED) {
LOG(ERROR) << folly::sformat(
"Building index with operation logs failed, space={}, part={}", space, part);
return nebula::cpp2::ErrorCode::E_INVALID_OPERATION;
}
env_->rebuildIndexGuard_->assign(std::make_tuple(space, part), IndexState::FINISHED);
LOG(INFO) << folly::sformat("RebuildIndexTask Finished, space={}, part={}", space, part);
return result;
}
nebula::cpp2::ErrorCode RebuildIndexTask::buildIndexOnOperations(
GraphSpaceID space, PartitionID part, kvstore::RateLimiter* rateLimiter) {
if (canceled_) {
LOG(INFO) << folly::sformat("Rebuild index canceled, space={}, part={}", space, part);
return nebula::cpp2::ErrorCode::SUCCEEDED;
}
while (true) {
std::unique_ptr<kvstore::KVIterator> operationIter;
auto operationPrefix = OperationKeyUtils::operationPrefix(part);
auto operationRet = env_->kvstore_->prefix(space, part, operationPrefix, &operationIter);
if (operationRet != nebula::cpp2::ErrorCode::SUCCEEDED) {
LOG(ERROR) << "Processing Part " << part << " Failed";
return operationRet;
}
std::unique_ptr<kvstore::BatchHolder> batchHolder = std::make_unique<kvstore::BatchHolder>();
batchHolder->reserve(kReserveNum);
while (operationIter->valid()) {
auto opKey = operationIter->key();
auto opVal = operationIter->val();
// replay operation record
if (OperationKeyUtils::isModifyOperation(opKey)) {
VLOG(3) << "Processing Modify Operation " << opKey;
auto key = OperationKeyUtils::getOperationKey(opKey);
batchHolder->put(std::move(key), opVal.str());
} else if (OperationKeyUtils::isDeleteOperation(opKey)) {
VLOG(3) << "Processing Delete Operation " << opVal;
batchHolder->remove(opVal.str());
} else {
LOG(ERROR) << "Unknow Operation Type";
return nebula::cpp2::ErrorCode::E_INVALID_OPERATION;
}
batchHolder->remove(opKey.str());
if (batchHolder->size() > FLAGS_rebuild_index_batch_size) {
auto ret = writeOperation(space, part, batchHolder.get(), rateLimiter);
if (nebula::cpp2::ErrorCode::SUCCEEDED != ret) {
LOG(ERROR) << "Write Operation Failed";
return ret;
}
}
operationIter->next();
}
auto ret = writeOperation(space, part, batchHolder.get(), rateLimiter);
if (nebula::cpp2::ErrorCode::SUCCEEDED != ret) {
LOG(ERROR) << "Write Operation Failed";
return ret;
}
// When the processed operation size is less than the batch size,
// we will mark the lock's building in StorageEnv and refuse writing for
// a short piece of time.
if (batchHolder->size() <= FLAGS_rebuild_index_batch_size) {
// lock the part
auto key = std::make_tuple(space, part);
auto stateIter = env_->rebuildIndexGuard_->find(key);
// If the state is LOCKED, we should wait the on flying request process
// finished.
if (stateIter != env_->rebuildIndexGuard_->cend() &&
stateIter->second == IndexState::BUILDING) {
env_->rebuildIndexGuard_->assign(std::move(key), IndexState::LOCKED);
// Waiting all of the on flying requests have finished.
int32_t currentRequestNum;
do {
currentRequestNum = env_->onFlyingRequest_.load();
VLOG(3) << "On Flying Request: " << currentRequestNum;
usleep(100);
} while (currentRequestNum != 0);
} else {
break;
}
}
}
return nebula::cpp2::ErrorCode::SUCCEEDED;
}
nebula::cpp2::ErrorCode RebuildIndexTask::removeLegacyLogs(GraphSpaceID space, PartitionID part) {
auto operationPrefix = OperationKeyUtils::operationPrefix(part);
folly::Baton<true, std::atomic> baton;
auto result = nebula::cpp2::ErrorCode::SUCCEEDED;
env_->kvstore_->asyncRemoveRange(space,
part,
NebulaKeyUtils::firstKey(operationPrefix, sizeof(int64_t)),
NebulaKeyUtils::lastKey(operationPrefix, sizeof(int64_t)),
[&result, &baton](nebula::cpp2::ErrorCode code) {
if (code != nebula::cpp2::ErrorCode::SUCCEEDED) {
LOG(ERROR) << "Modify the index failed";
result = code;
}
baton.post();
});
baton.wait();
return nebula::cpp2::ErrorCode::SUCCEEDED;
}
nebula::cpp2::ErrorCode RebuildIndexTask::writeData(GraphSpaceID space,
PartitionID part,
std::vector<kvstore::KV> data,
size_t batchSize,
kvstore::RateLimiter* rateLimiter) {
folly::Baton<true, std::atomic> baton;
auto result = nebula::cpp2::ErrorCode::SUCCEEDED;
rateLimiter->consume(static_cast<double>(batchSize), // toConsume
static_cast<double>(FLAGS_rebuild_index_part_rate_limit), // rate
static_cast<double>(FLAGS_rebuild_index_part_rate_limit)); // burstSize
env_->kvstore_->asyncMultiPut(
space, part, std::move(data), [&result, &baton](nebula::cpp2::ErrorCode code) {
if (code != nebula::cpp2::ErrorCode::SUCCEEDED) {
result = code;
}
baton.post();
});
baton.wait();
return result;
}
nebula::cpp2::ErrorCode RebuildIndexTask::writeOperation(GraphSpaceID space,
PartitionID part,
kvstore::BatchHolder* batchHolder,
kvstore::RateLimiter* rateLimiter) {
folly::Baton<true, std::atomic> baton;
auto result = nebula::cpp2::ErrorCode::SUCCEEDED;
auto encoded = encodeBatchValue(batchHolder->getBatch());
rateLimiter->consume(static_cast<double>(batchHolder->size()), // toConsume
static_cast<double>(FLAGS_rebuild_index_part_rate_limit), // rate
static_cast<double>(FLAGS_rebuild_index_part_rate_limit)); // burstSize
env_->kvstore_->asyncAppendBatch(
space, part, std::move(encoded), [&result, &baton](nebula::cpp2::ErrorCode code) {
if (code != nebula::cpp2::ErrorCode::SUCCEEDED) {
result = code;
}
baton.post();
});
baton.wait();
return result;
}
} // namespace storage
} // namespace nebula