Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix blobstore truncate size may not right #5127

Merged
merged 19 commits into from
Jun 14, 2022
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 42 additions & 15 deletions dbms/src/Storages/Page/V3/BlobStore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -851,8 +851,8 @@ struct BlobStoreGCInfo
toTypeString("Read-Only Blob", 0),
toTypeString("No GC Blob", 1),
toTypeString("Full GC Blob", 2),
toTypeString("Truncated Blob", 3),
toTypeString("Big Blob", 4));
toTypeString("Big Blob", 3),
toTypeTruncateString("Truncated Blob"));
}

void appendToReadOnlyBlob(const BlobFileId blob_id, double valid_rate)
Expand All @@ -870,23 +870,24 @@ struct BlobStoreGCInfo
blob_gc_info[2].emplace_back(std::make_pair(blob_id, valid_rate));
}

void appendToTruncatedBlob(const BlobFileId blob_id, double valid_rate)
void appendToBigBlob(const BlobFileId blob_id, double valid_rate)
{
blob_gc_info[3].emplace_back(std::make_pair(blob_id, valid_rate));
}

void appendToBigBlob(const BlobFileId blob_id, double valid_rate)
void appendToTruncatedBlob(const BlobFileId blob_id, UInt64 origin_size, UInt64 truncated_size, double valid_rate)
{
blob_gc_info[4].emplace_back(std::make_pair(blob_id, valid_rate));
blob_gc_truncate_info.emplace_back(std::make_tuple(blob_id, origin_size, truncated_size, valid_rate));
}

private:
// 1. read only blob
// 2. no need gc blob
// 3. full gc blob
// 4. need truncate blob
// 5. big blob
std::vector<std::pair<BlobFileId, double>> blob_gc_info[5];
// 4. big blob
std::vector<std::pair<BlobFileId, double>> blob_gc_info[4];

std::vector<std::tuple<BlobFileId, UInt64, UInt64, double>> blob_gc_truncate_info;

String toTypeString(const std::string_view prefix, const size_t index) const
{
Expand All @@ -911,6 +912,32 @@ struct BlobStoreGCInfo

return fmt_buf.toString();
}

String toTypeTruncateString(const std::string_view prefix) const
{
FmtBuffer fmt_buf;
if (blob_gc_truncate_info.empty())
{
fmt_buf.fmtAppend("{}: [null]", prefix);
}
else
{
fmt_buf.fmtAppend("{}: [", prefix);
fmt_buf.joinStr(
blob_gc_truncate_info.begin(),
blob_gc_truncate_info.end(),
[](const auto arg, FmtBuffer & fb) {
fb.fmtAppend("{} origin: {} truncate: {} rate: {:.2f}", //
std::get<0>(arg), // blob id
std::get<1>(arg), // origin size
std::get<2>(arg), // truncated size
std::get<3>(arg)); // valid rate
},
", ");
fmt_buf.append("]");
}
return fmt_buf.toString();
}
};

std::vector<BlobFileId> BlobStore::getGCStats()
Expand Down Expand Up @@ -953,7 +980,7 @@ std::vector<BlobFileId> BlobStore::getGCStats()
}

auto lock = stat->lock();
auto right_margin = stat->smap->getRightMargin();
auto right_margin = stat->smap->getUsedBoundary();

// Avoid divide by zero
if (right_margin == 0)
Expand All @@ -966,14 +993,13 @@ std::vector<BlobFileId> BlobStore::getGCStats()
stat->sm_valid_rate));
}

LOG_FMT_TRACE(log, "Current blob is empty [blob_id={}, total size(all invalid)={}] [valid_rate={}].", stat->id, stat->sm_total_size, stat->sm_valid_rate);

// If current blob empty, the size of in disk blob may not empty
// So we need truncate current blob, and let it be reused.
auto blobfile = getBlobFile(stat->id);
LOG_FMT_TRACE(log, "Truncate empty blob file [blob_id={}] to 0.", stat->id);
LOG_FMT_INFO(log, "Current blob file is empty, truncated to zero [blob_id={}] [total_size={}] [valid_rate={}]", stat->id, stat->sm_total_size, stat->sm_valid_rate);
JaySon-Huang marked this conversation as resolved.
Show resolved Hide resolved
blobfile->truncate(right_margin);
blobstore_gc_info.appendToTruncatedBlob(stat->id, stat->sm_valid_rate);
blobstore_gc_info.appendToTruncatedBlob(stat->id, stat->sm_total_size, right_margin, stat->sm_valid_rate);
stat->sm_total_size = right_margin;
continue;
}

Expand Down Expand Up @@ -1012,11 +1038,12 @@ std::vector<BlobFileId> BlobStore::getGCStats()
if (right_margin != stat->sm_total_size)
{
auto blobfile = getBlobFile(stat->id);
LOG_FMT_TRACE(log, "Truncate blob file [blob_id={}] [origin size={}] [truncated size={}]", stat->id, stat->sm_total_size, right_margin);
LOG_FMT_INFO(log, "Truncate blob file [blob_id={}] [origin size={}] [truncated size={}]", stat->id, stat->sm_total_size, right_margin);
jiaqizho marked this conversation as resolved.
Show resolved Hide resolved
blobfile->truncate(right_margin);
blobstore_gc_info.appendToTruncatedBlob(stat->id, stat->sm_total_size, right_margin, stat->sm_valid_rate);

stat->sm_total_size = right_margin;
stat->sm_valid_rate = stat->sm_valid_size * 1.0 / stat->sm_total_size;
blobstore_gc_info.appendToTruncatedBlob(stat->id, stat->sm_valid_rate);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Storages/Page/V3/PageDirectory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1223,7 +1223,7 @@ bool PageDirectory::tryDumpSnapshot(const ReadLimiterPtr & read_limiter, const W
// `being_ref_count` by the function `createSnapshot()`.
assert(!files_snap.persisted_log_files.empty()); // should not be empty when `needSave` return true
auto log_num = files_snap.persisted_log_files.rbegin()->log_num;
auto identifier = fmt::format("{}_dump_{}", wal->name(), log_num);
auto identifier = fmt::format("{}.dump_{}", wal->name(), log_num);
auto snapshot_reader = wal->createReaderForFiles(identifier, files_snap.persisted_log_files, read_limiter);
PageDirectoryFactory factory;
// we just use the `collapsed_dir` to dump edit of the snapshot, should never call functions like `apply` that
Expand Down
5 changes: 5 additions & 0 deletions dbms/src/Storages/Page/V3/spacemap/SpaceMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,11 @@ SpaceMap::SpaceMap(UInt64 start_, UInt64 end_, SpaceMapType type_)
, end(end_)
, log(&Poco::Logger::get("SpaceMap"))
{
if (start != 0)
jiaqizho marked this conversation as resolved.
Show resolved Hide resolved
{
throw Exception(fmt::format("SpaceMap not support start is not 0. [start={}], [end={}]", start, end),
jiaqizho marked this conversation as resolved.
Show resolved Hide resolved
ErrorCodes::NOT_IMPLEMENTED);
}
}

} // namespace PS::V3
Expand Down
6 changes: 4 additions & 2 deletions dbms/src/Storages/Page/V3/spacemap/SpaceMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,11 @@ class SpaceMap
virtual std::tuple<UInt64, UInt64, bool> searchInsertOffset(size_t size) = 0;

/**
* Get the offset of the last free block. `[margin_offset, +∞)` is not used at all.
* Get the used boundary of current SpaceMap.
* If last node is [xxx, end]. Then `[margin_offset, +∞)` is not used at all. Return the `start` of last node.
* If last node is not [xxx, end]. Then right margin must be `end`.
jiaqizho marked this conversation as resolved.
Show resolved Hide resolved
*/
virtual UInt64 getRightMargin() = 0;
virtual UInt64 getUsedBoundary() = 0;

/**
* Get the accurate max capacity of the space map.
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Storages/Page/V3/spacemap/SpaceMapBig.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class BigSpaceMap
return std::make_pair(size_in_used, size_in_used);
}

UInt64 getRightMargin() override
UInt64 getUsedBoundary() override
{
return end;
}
Expand Down
30 changes: 23 additions & 7 deletions dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ static void rb_get_new_entry(struct SmapRbEntry ** entry, UInt64 start, UInt64 c
{
struct SmapRbEntry * new_entry;

new_entry = static_cast<struct SmapRbEntry *>(calloc(1, sizeof(struct SmapRbEntry)));
new_entry = static_cast<struct SmapRbEntry *>(calloc(1, sizeof(struct SmapRbEntry))); // NOLINT
if (new_entry == nullptr)
{
return;
Expand Down Expand Up @@ -115,7 +115,7 @@ inline static void rb_free_entry(struct RbPrivate * private_data, struct SmapRbE
private_data->read_index_next = nullptr;
}

free(entry);
free(entry); // NOLINT
}


Expand Down Expand Up @@ -419,7 +419,7 @@ std::shared_ptr<RBTreeSpaceMap> RBTreeSpaceMap::create(UInt64 start, UInt64 end)
{
auto ptr = std::shared_ptr<RBTreeSpaceMap>(new RBTreeSpaceMap(start, end));

ptr->rb_tree = static_cast<struct RbPrivate *>(calloc(1, sizeof(struct RbPrivate)));
ptr->rb_tree = static_cast<struct RbPrivate *>(calloc(1, sizeof(struct RbPrivate))); // NOLINT
if (ptr->rb_tree == nullptr)
{
return nullptr;
Expand All @@ -435,7 +435,7 @@ std::shared_ptr<RBTreeSpaceMap> RBTreeSpaceMap::create(UInt64 start, UInt64 end)
if (!rb_insert_entry(start, end, ptr->rb_tree, ptr->log))
{
LOG_FMT_ERROR(ptr->log, "Erorr happend, when mark all space free. [start={}] , [end={}]", start, end);
free(ptr->rb_tree);
free(ptr->rb_tree); // NOLINT
return nullptr;
}
return ptr;
Expand All @@ -451,7 +451,7 @@ static void rb_free_tree(struct rb_root * root)
next = rb_tree_next(node);
entry = node_to_entry(node);
rb_node_remove(node, root);
free(entry);
free(entry); // NOLINT
}
}

Expand All @@ -460,7 +460,7 @@ void RBTreeSpaceMap::freeSmap()
if (rb_tree)
{
rb_free_tree(&rb_tree->root);
free(rb_tree);
free(rb_tree); // NOLINT
}
}

Expand Down Expand Up @@ -734,7 +734,7 @@ std::pair<UInt64, UInt64> RBTreeSpaceMap::getSizes() const
}
}

UInt64 RBTreeSpaceMap::getRightMargin()
UInt64 RBTreeSpaceMap::getUsedBoundary()
{
struct rb_node * node = rb_tree_last(&rb_tree->root);
if (node == nullptr)
Expand All @@ -743,6 +743,22 @@ UInt64 RBTreeSpaceMap::getRightMargin()
}

auto * entry = node_to_entry(node);

// If last node is not [xxx, end]
// Then we should return `end` rather than last node start
//
// When there is a space with a span of [xxx, end],
// Then `getUsedBoundary` will return an incorrect right margin.
// ex.
// 1. Space limit is 100. So current space is [0, 100]
// 2. Mark a span {offset=90, size=10} as used, then the free range in SpaceMap is [0, 90).
// 3. without this check, `getUsedBoundary` will return 0. This is incorrect.
jiaqizho marked this conversation as resolved.
Show resolved Hide resolved
if (entry->start + entry->count != end)
{
return end;
}

// Else we should return the offset of last free node
return entry->start;
}

Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Storages/Page/V3/spacemap/SpaceMapRBTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class RBTreeSpaceMap

std::pair<UInt64, UInt64> getSizes() const override;

UInt64 getRightMargin() override;
UInt64 getUsedBoundary() override;

protected:
RBTreeSpaceMap(UInt64 start, UInt64 end)
Expand Down
24 changes: 21 additions & 3 deletions dbms/src/Storages/Page/V3/spacemap/SpaceMapSTDMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,31 @@ class STDMapSpaceMap
}
}

UInt64 getRightMargin() override
UInt64 getUsedBoundary() override
{
if (free_map.empty())
{
return end - start;
return end;
}
return free_map.rbegin()->first;

const auto & last_node_it = free_map.rbegin();

// If last node is not [xxx, end]
// Then we should return `end` rather than last node start
//
// When there is a space with a span of [xxx, end],
// Then `getUsedBoundary` will return an incorrect right margin.
// ex.
// 1. Space limit is 100. So current space is [0, 100]
// 2. Mark a span {offset=90, size=10} as used, then the free range in SpaceMap is [0, 90).
// 3. without this check, `getUsedBoundary` will return 0. This is incorrect.
jiaqizho marked this conversation as resolved.
Show resolved Hide resolved
if (last_node_it->first + last_node_it->second != end)
jiaqizho marked this conversation as resolved.
Show resolved Hide resolved
{
return end;
}

// Else we should return the offset of last free node
return last_node_it->first;
}

bool isMarkUnused(UInt64 offset, size_t length) override
Expand Down
37 changes: 37 additions & 0 deletions dbms/src/Storages/Page/V3/tests/gtest_free_map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,43 @@ TEST_P(SpaceMapTest, TestGetMaxCap)
}
}


TEST_P(SpaceMapTest, TestGetUsedBoundary)
{
jiaqizho marked this conversation as resolved.
Show resolved Hide resolved
{
auto smap = SpaceMap::createSpaceMap(test_type, 0, 100);
ASSERT_TRUE(smap->markUsed(50, 10));
ASSERT_EQ(smap->getUsedBoundary(), 60);
ASSERT_TRUE(smap->markUsed(80, 10));
ASSERT_EQ(smap->getUsedBoundary(), 90);

ASSERT_TRUE(smap->markUsed(90, 10));
ASSERT_EQ(smap->getUsedBoundary(), 100);
}

{
auto smap = SpaceMap::createSpaceMap(test_type, 0, 100);
ASSERT_TRUE(smap->markUsed(90, 10));
ASSERT_EQ(smap->getUsedBoundary(), 100);

ASSERT_TRUE(smap->markUsed(20, 10));
ASSERT_EQ(smap->getUsedBoundary(), 100);

ASSERT_TRUE(smap->markFree(90, 10));
ASSERT_EQ(smap->getUsedBoundary(), 30);

ASSERT_TRUE(smap->markUsed(90, 10));
ASSERT_EQ(smap->getUsedBoundary(), 100);
}

{
auto smap = SpaceMap::createSpaceMap(test_type, 0, 100);
ASSERT_EQ(smap->getUsedBoundary(), 0);
ASSERT_TRUE(smap->markUsed(0, 100));
ASSERT_EQ(smap->getUsedBoundary(), 100);
}
}

INSTANTIATE_TEST_CASE_P(
Type,
SpaceMapTest,
Expand Down