From 5dad2ea797b1e18f6f03cbdaca3c20deb48e46bd Mon Sep 17 00:00:00 2001 From: Huang-Ming Huang Date: Sat, 5 Dec 2020 16:42:39 -0600 Subject: [PATCH 1/5] sync from block vault during block production --- plugins/producer_plugin/producer_plugin.cpp | 55 +++++++++++++++++++++ tests/blockvault_tests.py | 11 +++++ 2 files changed, 66 insertions(+) diff --git a/plugins/producer_plugin/producer_plugin.cpp b/plugins/producer_plugin/producer_plugin.cpp index bb57e6b9429..98c8cf88b2f 100644 --- a/plugins/producer_plugin/producer_plugin.cpp +++ b/plugins/producer_plugin/producer_plugin.cpp @@ -120,10 +120,28 @@ enum class pending_block_mode { speculating }; +class producer_plugin_impl; +class block_only_sync : public blockvault::sync_callback { + producer_plugin_impl* _impl; + boost::asio::deadline_timer _start_sync_timer; + bool _pending = false; + + public: + block_only_sync(producer_plugin_impl* impl, boost::asio::io_service& io) + : _impl(impl), _start_sync_timer(io) {} + + bool is_pending() const { return _pending; } + void cancel() { _start_sync_timer.cancel(); } + void schedule(); + void on_snapshot(const char* snapshot_filename) override; + void on_block(eosio::chain::signed_block_ptr block) override; +}; + class producer_plugin_impl : public std::enable_shared_from_this { public: producer_plugin_impl(boost::asio::io_service& io) :_timer(io) + ,_block_vault_resync(this, io) ,_transaction_ack_channel(app().get_channel()) { } @@ -148,6 +166,7 @@ class producer_plugin_impl : public std::enable_shared_from_this _signature_providers; std::set _producers; boost::asio::deadline_timer _timer; + block_only_sync _block_vault_resync; using producer_watermark = std::pair; std::map _producer_watermarks; pending_block_mode _pending_block_mode = pending_block_mode::speculating; @@ -353,6 +372,11 @@ class producer_plugin_impl : public std::enable_shared_from_thisproducer ) > 0 ) { + // Cancel any pending resync from blockvault if we received any blocks from the same logical producer + _block_vault_resync.cancel(); + } + const auto& id = block_id ? *block_id : block->calculate_id(); auto blk_num = block->block_num(); @@ -2053,6 +2077,36 @@ static auto maybe_make_debug_time_logger() -> std::optionalchain_plug->chain().last_irreversible_block_id(); + fc_dlog(_log, "Attempt to resync from block vault"); + _impl->blockvault->sync(&id, *this); + } + _pending = false; + })); + } +} + +void block_only_sync::on_snapshot(const char*) { + EOS_THROW(producer_exception, "Attempting to resync from blockvault encountered a snapshot and the node must restart to continue!"); +} + +void block_only_sync::on_block(eosio::chain::signed_block_ptr block) { + try { + _impl->on_sync_block(block, true); + } + catch (unlinkable_block_exception&) { + fc_dlog(_log, "got unlinkable block ${num} from block vault", ("num", block->block_num())); + } +} + void producer_plugin_impl::produce_block() { //ilog("produce_block ${t}", ("t", fc::time_point::now())); // for testing _produce_time_offset_us EOS_ASSERT(_pending_block_mode == pending_block_mode::producing, producer_exception, "called produce_block while not actually producing"); @@ -2098,6 +2152,7 @@ void producer_plugin_impl::produce_block() { pending_blk_state->block, [&p](bool b) { p.set_value(b); }); if (!f.get()) { _latest_rejected_block_num = pending_blk_state->block->block_num(); + _block_vault_resync.schedule(); EOS_ASSERT(false, block_validation_error, "Block rejected by block vault"); } diff --git a/tests/blockvault_tests.py b/tests/blockvault_tests.py index dc11d7d66f3..e2d627d04fd 100755 --- a/tests/blockvault_tests.py +++ b/tests/blockvault_tests.py @@ -150,6 +150,17 @@ def testFailOver(cluster, nodeToKill, addSwapFlags={}): assert node2.waitForLibToAdvance(timeout=60) + Print("#################################################################################") + Print("# Scenario 4: Test one of the two identical producer node fails and the other #") + Print("# can take over. #") + Print("#################################################################################") + Print("Kill node 1") + cluster.biosNode.kill(signal.SIGTERM) + node0.kill(signal.SIGTERM) + node1.kill(signal.SIGTERM) + time.sleep(10) + assert node2.waitForHeadToAdvance(timeout=60) + testSuccessful=True finally: TestHelper.shutdown(cluster, walletMgr, testSuccessful=testSuccessful, killEosInstances=killEosInstances, killWallet=killWallet, keepLogs=keepLogs, cleanRun=killAll, dumpErrorDetails=dumpErrorDetails) From afd745e4179657da448504d54b3319df75b3ec92 Mon Sep 17 00:00:00 2001 From: Huang-Ming Huang Date: Sat, 5 Dec 2020 21:38:35 -0600 Subject: [PATCH 2/5] Address PR comments --- plugins/producer_plugin/producer_plugin.cpp | 26 ++++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/plugins/producer_plugin/producer_plugin.cpp b/plugins/producer_plugin/producer_plugin.cpp index 98c8cf88b2f..19cadfda31b 100644 --- a/plugins/producer_plugin/producer_plugin.cpp +++ b/plugins/producer_plugin/producer_plugin.cpp @@ -372,11 +372,6 @@ class producer_plugin_impl : public std::enable_shared_from_thisproducer ) > 0 ) { - // Cancel any pending resync from blockvault if we received any blocks from the same logical producer - _block_vault_resync.cancel(); - } - const auto& id = block_id ? *block_id : block->calculate_id(); auto blk_num = block->block_num(); @@ -414,7 +409,12 @@ class producer_plugin_impl : public std::enable_shared_from_thisproducer ) > 0 ) { + // Cancel any pending resync from blockvault if we received any blocks from the same logical producer + _block_vault_resync.cancel(); + } blockvault->async_append_external_block(blk_state->dpos_irreversible_blocknum, blk_state->block, [](bool){}); } } catch ( const guard_exception& e ) { @@ -968,6 +968,7 @@ void producer_plugin::plugin_startup() void producer_plugin::plugin_shutdown() { try { my->_timer.cancel(); + my->_block_vault_resync.cancel(); } catch ( const std::bad_alloc& ) { chain_plugin::handle_bad_alloc(); } catch ( const boost::interprocess::bad_alloc& ) { @@ -2084,10 +2085,17 @@ void block_only_sync::schedule() { _pending = true; _start_sync_timer.async_wait( app().get_priority_queue().wrap(priority::high, [this](const boost::system::error_code& ec) { - if (ec != boost::asio::error::operation_aborted) { + if (!ec) { auto id = _impl->chain_plug->chain().last_irreversible_block_id(); fc_dlog(_log, "Attempt to resync from block vault"); - _impl->blockvault->sync(&id, *this); + try { + _impl->blockvault->sync(&id, *this); + } catch( fc::exception& er ) { + wlog("Attempting to resync from blockvault encountered ${details}; the node must restart to " + "continue!", + ("details", er.to_detail_string())); + app().quit(); + } } _pending = false; })); @@ -2095,12 +2103,12 @@ void block_only_sync::schedule() { } void block_only_sync::on_snapshot(const char*) { - EOS_THROW(producer_exception, "Attempting to resync from blockvault encountered a snapshot and the node must restart to continue!"); + EOS_THROW(producer_exception, "a snapshot"); } void block_only_sync::on_block(eosio::chain::signed_block_ptr block) { try { - _impl->on_sync_block(block, true); + _impl->on_sync_block(block, block->block_num() != _impl->chain_plug->chain().head_block_num() + 1); } catch (unlinkable_block_exception&) { fc_dlog(_log, "got unlinkable block ${num} from block vault", ("num", block->block_num())); From a406b8513355290fa9efca7873dcc36415d10d8d Mon Sep 17 00:00:00 2001 From: Huang-Ming Huang Date: Sun, 6 Dec 2020 07:41:23 -0600 Subject: [PATCH 3/5] fix capturing weak_ptr for time usage --- plugins/producer_plugin/producer_plugin.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/plugins/producer_plugin/producer_plugin.cpp b/plugins/producer_plugin/producer_plugin.cpp index 19cadfda31b..7d2b7bd2fdb 100644 --- a/plugins/producer_plugin/producer_plugin.cpp +++ b/plugins/producer_plugin/producer_plugin.cpp @@ -323,7 +323,7 @@ class producer_plugin_impl : public std::enable_shared_from_thisprevious); if (!previous) { - dlog("Don't have previous block for block number ${bn}, looking for block id ${pbi}", + fc_dlog(_log, "Don't have previous block for block number ${bn}, looking for block id ${pbi}", ("bn", block->block_num())("pbi", block->previous)); return true; } @@ -338,7 +338,7 @@ class producer_plugin_impl : public std::enable_shared_from_thisweak_from_this()](const boost::system::error_code& ec) { + auto shared_impl = weak_impl.lock(); + if (shared_impl.get() && !ec) { auto id = _impl->chain_plug->chain().last_irreversible_block_id(); fc_dlog(_log, "Attempt to resync from block vault"); try { _impl->blockvault->sync(&id, *this); } catch( fc::exception& er ) { - wlog("Attempting to resync from blockvault encountered ${details}; the node must restart to " + fc_wlog(_log, "Attempting to resync from blockvault encountered ${details}; the node must restart to " "continue!", ("details", er.to_detail_string())); app().quit(); From 49db373855ed2ff56b52d2c187252c6ef48ce6fc Mon Sep 17 00:00:00 2001 From: Huang-Ming Huang Date: Sun, 6 Dec 2020 12:26:47 -0600 Subject: [PATCH 4/5] ignore connectivity check now. --- plugins/producer_plugin/producer_plugin.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/plugins/producer_plugin/producer_plugin.cpp b/plugins/producer_plugin/producer_plugin.cpp index 7d2b7bd2fdb..82adf077510 100644 --- a/plugins/producer_plugin/producer_plugin.cpp +++ b/plugins/producer_plugin/producer_plugin.cpp @@ -2109,7 +2109,8 @@ void block_only_sync::on_snapshot(const char*) { void block_only_sync::on_block(eosio::chain::signed_block_ptr block) { try { - _impl->on_sync_block(block, block->block_num() != _impl->chain_plug->chain().head_block_num() + 1); + bool connectivity_check = false; // use false right now, should investigate further after 3.0 rc + _impl->on_sync_block(block, connectivity_check); } catch (unlinkable_block_exception&) { fc_dlog(_log, "got unlinkable block ${num} from block vault", ("num", block->block_num())); From 09a0b0ddef6289ccdf50e106f5a0e0847b2d0e06 Mon Sep 17 00:00:00 2001 From: Huang-Ming Huang Date: Sun, 6 Dec 2020 12:57:26 -0600 Subject: [PATCH 5/5] address another PR comment --- plugins/producer_plugin/producer_plugin.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/plugins/producer_plugin/producer_plugin.cpp b/plugins/producer_plugin/producer_plugin.cpp index 82adf077510..a223b9233f7 100644 --- a/plugins/producer_plugin/producer_plugin.cpp +++ b/plugins/producer_plugin/producer_plugin.cpp @@ -2086,11 +2086,12 @@ void block_only_sync::schedule() { _start_sync_timer.async_wait(app().get_priority_queue().wrap( priority::high, [this, weak_impl = _impl->weak_from_this()](const boost::system::error_code& ec) { auto shared_impl = weak_impl.lock(); - if (shared_impl.get() && !ec) { - auto id = _impl->chain_plug->chain().last_irreversible_block_id(); + auto impl = shared_impl.get(); + if (impl && !ec) { + auto id = impl->chain_plug->chain().last_irreversible_block_id(); fc_dlog(_log, "Attempt to resync from block vault"); try { - _impl->blockvault->sync(&id, *this); + impl->blockvault->sync(&id, *this); } catch( fc::exception& er ) { fc_wlog(_log, "Attempting to resync from blockvault encountered ${details}; the node must restart to " "continue!", @@ -2098,7 +2099,7 @@ void block_only_sync::schedule() { app().quit(); } } - _pending = false; + this->_pending = false; })); } }