Skip to content

Commit

Permalink
[Enhancement](merge-on-write) add correctness check for the calculati…
Browse files Browse the repository at this point in the history
…on of delete bitmap (#22282)

Currently, for merge-on-write unique table, the delete bitmap of a rowset will be calculated during flush phase, commit phase and publish phase. In this PR, we add a special mark in every rowset considered when we calculate delete bitmap in these three phases. Before we finally merge the delete bitmap to the table meta's delete bitmap, we will check if all the rowsets contain the special mark to check if we have considered all the rowsets during the above three phases.
Because the executor can not fail in publish phase if the coordinator have received successful commits info from all the executors, we just print logs if this correctness check failed rather than report a failure.
  • Loading branch information
bobhan1 authored and xiaokang committed Aug 11, 2023
1 parent 78ed14a commit ae6509c
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 0 deletions.
5 changes: 5 additions & 0 deletions be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1054,6 +1054,11 @@ DEFINE_Bool(enable_hdfs_hedged_read, "false");
DEFINE_Int32(hdfs_hedged_read_thread_num, "128");
DEFINE_Int32(hdfs_hedged_read_threshold_time, "500");

DEFINE_mBool(enable_merge_on_write_correctness_check, "true");

// The secure path with user files, used in the `local` table function.
DEFINE_mString(user_files_secure_path, "${DORIS_HOME}");

#ifdef BE_TEST
// test s3
DEFINE_String(test_s3_resource, "resource");
Expand Down
5 changes: 5 additions & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1105,6 +1105,11 @@ DECLARE_Int32(hdfs_hedged_read_thread_num);
// Maybe overwritten by the value specified when creating catalog
DECLARE_Int32(hdfs_hedged_read_threshold_time);

DECLARE_mBool(enable_merge_on_write_correctness_check);

// The secure path with user files, used in the `local` table function.
DECLARE_mString(user_files_secure_path);

#ifdef BE_TEST
// test s3
DECLARE_String(test_s3_resource);
Expand Down
5 changes: 5 additions & 0 deletions be/src/olap/rowset/segment_v2/segment_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,11 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block*
}
CHECK(use_default_or_null_flag.size() == num_rows);

if (config::enable_merge_on_write_correctness_check) {
_tablet->add_sentinel_mark_to_delete_bitmap(_mow_context->delete_bitmap,
_mow_context->rowset_ids);
}

// read and fill block
auto mutable_full_columns = full_block.mutate_columns();
RETURN_IF_ERROR(fill_missing_columns(mutable_full_columns, use_default_or_null_flag,
Expand Down
58 changes: 58 additions & 0 deletions be/src/olap/tablet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2980,6 +2980,17 @@ Status Tablet::calc_segment_delete_bitmap(RowsetSharedPtr rowset,
}
DCHECK_EQ(total, row_id) << "segment total rows: " << total << " row_id:" << row_id;

if (config::enable_merge_on_write_correctness_check) {
RowsetIdUnorderedSet rowsetids;
for (const auto& rowset : specified_rowsets) {
rowsetids.emplace(rowset->rowset_id());
LOG(INFO) << "[tabletID:" << tablet_id() << "]"
<< "[add_sentinel_mark_to_delete_bitmap][end_version:" << end_version << "]"
<< "add:" << rowset->rowset_id();
}
add_sentinel_mark_to_delete_bitmap(delete_bitmap, rowsetids);
}

if (pos > 0) {
RETURN_IF_ERROR(generate_new_block_for_partial_update(
rowset_schema, read_plan_ori, read_plan_update, rsid_to_rowset, &block));
Expand Down Expand Up @@ -3225,6 +3236,12 @@ Status Tablet::update_delete_bitmap_without_lock(const RowsetSharedPtr& rowset)
<< ", rowset_ids: " << cur_rowset_ids.size() << ", cur max_version: " << cur_version
<< ", transaction_id: " << -1 << ", cost: " << watch.get_elapse_time_us()
<< "(us), total rows: " << total_rows;
if (config::enable_merge_on_write_correctness_check) {
// check if all the rowset has ROWSET_SENTINEL_MARK
RETURN_IF_ERROR(
_check_delete_bitmap_correctness(delete_bitmap, cur_version - 1, cur_rowset_ids));
_remove_sentinel_mark_from_delete_bitmap(delete_bitmap);
}
for (auto iter = delete_bitmap->delete_bitmap.begin();
iter != delete_bitmap->delete_bitmap.end(); ++iter) {
_tablet_meta->delete_bitmap().merge(
Expand Down Expand Up @@ -3322,6 +3339,14 @@ Status Tablet::update_delete_bitmap(const RowsetSharedPtr& rowset,
<< ", cur max_version: " << cur_version << ", transaction_id: " << txn_id
<< ", cost: " << watch.get_elapse_time_us() << "(us), total rows: " << total_rows;

if (config::enable_merge_on_write_correctness_check && rowset->num_rows() != 0) {
// only do correctness check if the rowset has at least one row written
// check if all the rowset has ROWSET_SENTINEL_MARK
RETURN_IF_ERROR(
_check_delete_bitmap_correctness(delete_bitmap, cur_version - 1, cur_rowset_ids));
_remove_sentinel_mark_from_delete_bitmap(delete_bitmap);
}

// update version without write lock, compaction and publish_txn
// will update delete bitmap, handle compaction with _rowset_update_lock
// and publish_txn runs sequential so no need to lock here
Expand Down Expand Up @@ -3655,4 +3680,37 @@ Status Tablet::calc_delete_bitmap_between_segments(
return Status::OK();
}

void Tablet::add_sentinel_mark_to_delete_bitmap(DeleteBitmapPtr delete_bitmap,
const RowsetIdUnorderedSet& rowsetids) {
for (const auto& rowsetid : rowsetids) {
delete_bitmap->add({rowsetid, DeleteBitmap::INVALID_SEGMENT_ID, 0},
DeleteBitmap::ROWSET_SENTINEL_MARK);
}
}

void Tablet::_remove_sentinel_mark_from_delete_bitmap(DeleteBitmapPtr delete_bitmap) {
for (auto it = delete_bitmap->delete_bitmap.begin(), end = delete_bitmap->delete_bitmap.end();
it != end;) {
if (std::get<1>(it->first) == DeleteBitmap::INVALID_SEGMENT_ID) {
it = delete_bitmap->delete_bitmap.erase(it);
} else {
++it;
}
}
}

Status Tablet::_check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, int64_t max_version,
const RowsetIdUnorderedSet& rowset_ids) const {
for (const auto& rowsetid : rowset_ids) {
if (!delete_bitmap->delete_bitmap.contains(
{rowsetid, DeleteBitmap::INVALID_SEGMENT_ID, 0})) {
LOG(WARNING) << "check delete bitmap correctness failed, can't find sentinel mark in "
"rowset with RowsetId:"
<< rowsetid << ",max_version:" << max_version;
DCHECK(false) << "check delete bitmap correctness failed!";
return Status::OK();
}
}
return Status::OK();
}
} // namespace doris
6 changes: 6 additions & 0 deletions be/src/olap/tablet.h
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,8 @@ class Tablet : public BaseTablet {
int64_t binlog_max_bytes() const { return _tablet_meta->binlog_config().max_bytes(); }

void set_binlog_config(BinlogConfig binlog_config);
void add_sentinel_mark_to_delete_bitmap(DeleteBitmapPtr delete_bitmap,
const RowsetIdUnorderedSet& rowsetids);

private:
Status _init_once_action();
Expand Down Expand Up @@ -597,6 +599,10 @@ class Tablet : public BaseTablet {
// end cooldown functions
////////////////////////////////////////////////////////////////////////////

void _remove_sentinel_mark_from_delete_bitmap(DeleteBitmapPtr delete_bitmap);
Status _check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, int64_t max_version,
const RowsetIdUnorderedSet& rowset_ids) const;

public:
static const int64_t K_INVALID_CUMULATIVE_POINT = -1;

Expand Down
4 changes: 4 additions & 0 deletions be/src/olap/tablet_meta.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include <atomic>
#include <cstddef>
#include <limits>
#include <map>
#include <memory>
#include <mutex>
Expand Down Expand Up @@ -335,6 +336,9 @@ class DeleteBitmap {
using Version = uint64_t;
using BitmapKey = std::tuple<RowsetId, SegmentId, Version>;
std::map<BitmapKey, roaring::Roaring> delete_bitmap; // Ordered map
constexpr static inline uint32_t INVALID_SEGMENT_ID = std::numeric_limits<uint32_t>::max() - 1;
constexpr static inline uint32_t ROWSET_SENTINEL_MARK =
std::numeric_limits<uint32_t>::max() - 1;

/**
*
Expand Down

0 comments on commit ae6509c

Please sign in to comment.