From c4936f26e8d549a08744d308db1cc6c3b7797790 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Mon, 25 Sep 2023 09:34:39 -0700 Subject: [PATCH] Only flush after recovery for retryable IOError (#11880) Summary: https://github.com/facebook/rocksdb/issues/11872 causes a unit test to start failing with the error message below. The cause is that the additional call to `FlushAllColumnFamilies()` in `DBImpl::ResumeImpl()` can run while DB is closing. More detailed explanation: there are two places where we call `ResumeImpl()`: 1. in `ErrorHandler::RecoverFromBGError`, for manual resume or recovery from errors like OutOfSpace through sst file manager, and 2. in `Errorhandler::RecoverFromRetryableBGIOError`, for error recovery from errors like flush failure due to retryable IOError. This is tracked by `ErrorHandler::recovery_thread_`. Here is how DB close waits for error recovery: https://github.com/facebook/rocksdb/blob/49da91ec097b4efcd8a8e4dc1b287e9f81eb4093/db/db_impl/db_impl.cc#L540-L543 `CancelErrorRecovery()` waits until `recovery_thread_` finishes and `IsRecoveryInProgress()` checks the `recovery_in_prog_` flag. The additional call to `FlushAllColumnFamilies()` in `ResumeImpl()` happens after it clears bg error and the `recovery_in_prog_` flag: https://github.com/facebook/rocksdb/blob/49da91ec097b4efcd8a8e4dc1b287e9f81eb4093/db/db_impl/db_impl.cc#L436-L463. So if `ResumeImpl()` is called in `RecoverFromBGError()`, we can have a thread running `FlushAllColumnFamilies()` while DB is closing and thought that recovery is done. The fix is to only do the additional call to `FlushAllColumnFamilies()` when doing error recovery through `Errorhandler::RecoverFromRetryableBGIOError` by setting flags in `DBRecoverContext`. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11880 Test Plan: `gtest-parallel --repeat=100 --workers=4 ./error_handler_fs_test --gtest_filter="*AutoRecoverFlushError*"` reproduces the error pretty reliably. ```[==========] Running 1 test from 1 test case. [----------] Global test environment set-up. [----------] 1 test from DBErrorHandlingFSTest [ RUN ] DBErrorHandlingFSTest.AutoRecoverFlushError error_handler_fs_test: db/column_family.cc:1618: rocksdb::ColumnFamilySet::~ColumnFamilySet(): Assertion `last_ref' failed. Received signal 6 (Aborted) ... https://github.com/facebook/rocksdb/issues/10 0x00007fac4409efd6 in __GI___assert_fail (assertion=0x7fac452c0afa "last_ref", file=0x7fac452c9fb5 "db/column_family.cc", line=1618, function=0x7fac452cb950 "rocksdb::ColumnFamilySet::~ColumnFamilySet()") at assert.c:101 101 in assert.c https://github.com/facebook/rocksdb/issues/11 0x00007fac44b5324f in rocksdb::ColumnFamilySet::~ColumnFamilySet (this=0x7b5400000000) at db/column_family.cc:1618 1618 assert(last_ref); https://github.com/facebook/rocksdb/issues/12 0x00007fac44e0f047 in std::default_delete::operator() (this=0x7b5800000940, __ptr=0x7b5400000000) at /usr/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:85 85 delete __ptr; https://github.com/facebook/rocksdb/issues/13 std::__uniq_ptr_impl >::reset (this=0x7b5800000940, __p=0x0) at /usr/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:182 182 _M_deleter()(__old_p); https://github.com/facebook/rocksdb/issues/14 std::unique_ptr >::reset (this=0x7b5800000940, __p=0x0) at /usr/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:456 456 _M_t.reset(std::move(__p)); https://github.com/facebook/rocksdb/issues/15 rocksdb::VersionSet::~VersionSet (this=this@entry=0x7b5800000900) at db/version_set.cc:5081 5081 column_family_set_.reset(); https://github.com/facebook/rocksdb/issues/16 0x00007fac44e0f97a in rocksdb::VersionSet::~VersionSet (this=0x7b5800000900) at db/version_set.cc:5078 5078 VersionSet::~VersionSet() { https://github.com/facebook/rocksdb/issues/17 0x00007fac44bf0b2f in std::default_delete::operator() (this=0x7b8c00000068, __ptr=0x7b5800000900) at /usr/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:85 85 delete __ptr; https://github.com/facebook/rocksdb/issues/18 std::__uniq_ptr_impl >::reset (this=0x7b8c00000068, __p=0x0) at /usr/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:182 182 _M_deleter()(__old_p); https://github.com/facebook/rocksdb/issues/19 std::unique_ptr >::reset (this=0x7b8c00000068, __p=0x0) at /usr/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:456 456 _M_t.reset(std::move(__p)); https://github.com/facebook/rocksdb/issues/20 rocksdb::DBImpl::CloseHelper (this=this@entry=0x7b8c00000000) at db/db_impl/db_impl.cc:676 676 versions_.reset(); https://github.com/facebook/rocksdb/issues/21 0x00007fac44bf1346 in rocksdb::DBImpl::CloseImpl (this=0x7b8c00000000) at db/db_impl/db_impl.cc:720 720 Status DBImpl::CloseImpl() { return CloseHelper(); } https://github.com/facebook/rocksdb/issues/22 rocksdb::DBImpl::~DBImpl (this=this@entry=0x7b8c00000000) at db/db_impl/db_impl.cc:738 738 closing_status_ = CloseImpl(); https://github.com/facebook/rocksdb/issues/23 0x00007fac44bf2bba in rocksdb::DBImpl::~DBImpl (this=0x7b8c00000000) at db/db_impl/db_impl.cc:722 722 DBImpl::~DBImpl() { https://github.com/facebook/rocksdb/issues/24 0x00007fac455444d4 in rocksdb::DBTestBase::Close (this=this@entry=0x7b6c00000000) at db/db_test_util.cc:678 678 delete db_; https://github.com/facebook/rocksdb/issues/25 0x00007fac455455fb in rocksdb::DBTestBase::TryReopen (this=this@entry=0x7b6c00000000, options=...) at db/db_test_util.cc:707 707 Close(); https://github.com/facebook/rocksdb/issues/26 0x00007fac45543459 in rocksdb::DBTestBase::Reopen (this=0x7ffed74b79a0, options=...) at db/db_test_util.cc:670 670 ASSERT_OK(TryReopen(options)); https://github.com/facebook/rocksdb/issues/27 0x00000000004f2522 in rocksdb::DBErrorHandlingFSTest_AutoRecoverFlushError_Test::TestBody (this=this@entry=0x7b6c00000000) at db/error_handler_fs_test.cc:1224 1224 Reopen(options); ``` Reviewed By: jowlyzhang Differential Revision: D49579701 Pulled By: cbi42 fbshipit-source-id: 3fc8325e6dde7e7faa8bcad95060cb4e26eda638 --- db/db_impl/db_impl.cc | 14 ++++++++------ db/error_handler.cc | 1 + db/error_handler.h | 9 ++++++--- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index ace2899e980..1af0a1fecaa 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -453,7 +453,7 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { if (shutdown_initiated_) { s = Status::ShutdownInProgress(); } - if (s.ok()) { + if (s.ok() && context.flush_after_recovery) { // Since we drop all non-recovery flush requests during recovery, // and new memtable may fill up during recovery, // schedule one more round of flush. @@ -472,14 +472,16 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { // FlushAllColumnFamilies releases and re-acquires mutex. if (shutdown_initiated_) { s = Status::ShutdownInProgress(); - } else { - for (auto cfd : *versions_->GetColumnFamilySet()) { - SchedulePendingCompaction(cfd); - } - MaybeScheduleFlushOrCompaction(); } } + if (s.ok()) { + for (auto cfd : *versions_->GetColumnFamilySet()) { + SchedulePendingCompaction(cfd); + } + MaybeScheduleFlushOrCompaction(); + } + // Wake up any waiters - in this case, it could be the shutdown thread bg_cv_.SignalAll(); diff --git a/db/error_handler.cc b/db/error_handler.cc index d7945550932..ddcfa5401ad 100644 --- a/db/error_handler.cc +++ b/db/error_handler.cc @@ -679,6 +679,7 @@ void ErrorHandler::RecoverFromRetryableBGIOError() { return; } DBRecoverContext context = recover_context_; + context.flush_after_recovery = true; int resume_count = db_options_.max_bgerror_resume_count; uint64_t wait_interval = db_options_.bgerror_resume_retry_interval; uint64_t retry_count = 0; diff --git a/db/error_handler.h b/db/error_handler.h index 34e08a525d7..6b1e8028636 100644 --- a/db/error_handler.h +++ b/db/error_handler.h @@ -19,10 +19,13 @@ class DBImpl; // FlushReason, which tells the flush job why this flush is called. struct DBRecoverContext { FlushReason flush_reason; + bool flush_after_recovery; - DBRecoverContext() : flush_reason(FlushReason::kErrorRecovery) {} - - DBRecoverContext(FlushReason reason) : flush_reason(reason) {} + DBRecoverContext() + : flush_reason(FlushReason::kErrorRecovery), + flush_after_recovery(false) {} + DBRecoverContext(FlushReason reason) + : flush_reason(reason), flush_after_recovery(false) {} }; class ErrorHandler {