From 02c5edc19658b8877a3b42cfc87dcd1c9bd61c4b Mon Sep 17 00:00:00 2001 From: UdjinM6 Date: Tue, 5 Dec 2023 17:09:03 +0300 Subject: [PATCH] fix: Start LLMQContext early to let `VerifyDB()` check ChainLock signatures in coinbase (#5752) ## Issue being fixed or feature implemented Now that we have ChainLock sigs in coinbase `VerifyDB()` have to process them. It works most of the time because usually we simply read contributions from quorum db https://github.com/dashpay/dash/blob/develop/src/llmq/quorums.cpp#L385. However, sometimes these contributions aren't available so we try to re-build them https://github.com/dashpay/dash/blob/develop/src/llmq/quorums.cpp#L388. But by the time we call `VerifyDB()` bls worker threads aren't started yet, so we keep pushing jobs into worker's queue but it can't do anything and it halts everything. backtrace: ``` * frame #0: 0x00007fdd85a2873d libc.so.6`syscall at syscall.S:38 frame #1: 0x0000555c41152921 dashd_testnet`std::__atomic_futex_unsigned_base::_M_futex_wait_until(unsigned int*, unsigned int, bool, std::chrono::duration >, std::chrono::duration >) + 225 frame #2: 0x0000555c40e22bd2 dashd_testnet`CBLSWorker::BuildQuorumVerificationVector(Span > > >, bool) at atomic_futex.h:102:36 frame #3: 0x0000555c40d35567 dashd_testnet`llmq::CQuorumManager::BuildQuorumContributions(std::unique_ptr > const&, std::shared_ptr const&) const at quorums.cpp:419:65 frame #4: 0x0000555c40d3b9d1 dashd_testnet`llmq::CQuorumManager::BuildQuorumFromCommitment(Consensus::LLMQType, gsl::not_null) const at quorums.cpp:388:37 frame #5: 0x0000555c40d3c415 dashd_testnet`llmq::CQuorumManager::GetQuorum(Consensus::LLMQType, gsl::not_null) const at quorums.cpp:588:37 frame #6: 0x0000555c40d406a9 dashd_testnet`llmq::CQuorumManager::ScanQuorums(Consensus::LLMQType, CBlockIndex const*, unsigned long) const at quorums.cpp:545:64 frame #7: 0x0000555c40937629 dashd_testnet`llmq::CSigningManager::SelectQuorumForSigning(Consensus::LLMQParams const&, llmq::CQuorumManager const&, uint256 const&, int, int) at signing.cpp:1038:90 frame #8: 0x0000555c40937d34 dashd_testnet`llmq::CSigningManager::VerifyRecoveredSig(Consensus::LLMQType, llmq::CQuorumManager const&, int, uint256 const&, uint256 const&, CBLSSignature const&, int) at signing.cpp:1061:113 frame #9: 0x0000555c408e2d43 dashd_testnet`llmq::CChainLocksHandler::VerifyChainLock(llmq::CChainLockSig const&) const at chainlocks.cpp:559:53 frame #10: 0x0000555c40c8b09e dashd_testnet`CheckCbTxBestChainlock(CBlock const&, CBlockIndex const*, llmq::CChainLocksHandler const&, BlockValidationState&) at cbtx.cpp:368:47 frame #11: 0x0000555c40cf75db dashd_testnet`ProcessSpecialTxsInBlock(CBlock const&, CBlockIndex const*, CMNHFManager&, llmq::CQuorumBlockProcessor&, llmq::CChainLocksHandler const&, Consensus::Params const&, CCoinsViewCache const&, bool, bool, BlockValidationState&, std::optional&) at specialtxman.cpp:202:60 frame #12: 0x0000555c40c00a47 dashd_testnet`CChainState::ConnectBlock(CBlock const&, BlockValidationState&, CBlockIndex*, CCoinsViewCache&, bool) at validation.cpp:2179:34 frame #13: 0x0000555c40c0e593 dashd_testnet`CVerifyDB::VerifyDB(CChainState&, CChainParams const&, CCoinsView&, CEvoDB&, int, int) at validation.cpp:4789:41 frame #14: 0x0000555c40851627 dashd_testnet`AppInitMain(std::variant, std::reference_wrapper, std::reference_wrapper, std::reference_wrapper, std::reference_wrapper, std::reference_wrapper > const&, NodeContext&, interfaces::BlockAndHeaderTipInfo*) at init.cpp:2098:50 frame #15: 0x0000555c4082fe11 dashd_testnet`AppInit(int, char**) at bitcoind.cpp:145:54 frame #16: 0x0000555c40823c64 dashd_testnet`main at bitcoind.cpp:173:20 frame #17: 0x00007fdd85934083 libc.so.6`__libc_start_main(main=(dashd_testnet`main at bitcoind.cpp:160:1), argc=3, argv=0x00007ffcb8ca5b88, init=, fini=, rtld_fini=, stack_end=0x00007ffcb8ca5b78) at libc-start.c:308:16 frame #18: 0x0000555c4082f27e dashd_testnet`_start + 46 ``` Fixes #5741 ## What was done? Start LLMQContext early. Alternative solution could be moving bls worker Start/Stop into llmq context ctor/dtor. ## How Has This Been Tested? I had a node with that issue. This patch fixed it. ## Breaking Changes Not sure, hopefully none. ## Checklist: - [x] I have performed a self-review of my own code - [x] I have commented my code, particularly in hard-to-understand areas - [ ] I have added or updated relevant unit/integration/functional/e2e tests - [ ] I have made corresponding changes to the documentation - [x] I have assigned this pull request to a milestone _(for repository code-owners and collaborators only)_ --- src/init.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/init.cpp b/src/init.cpp index 9b139e4b5ade1..e59bab8d3074a 100644 --- a/src/init.cpp +++ b/src/init.cpp @@ -1934,8 +1934,15 @@ bool AppInitMain(const CoreContext& context, NodeContext& node, interfaces::Bloc creditPoolManager.reset(new CCreditPoolManager(*node.evodb)); llmq::quorumSnapshotManager.reset(); llmq::quorumSnapshotManager.reset(new llmq::CQuorumSnapshotManager(*node.evodb)); + + if (node.llmq_ctx) { + node.llmq_ctx->Interrupt(); + node.llmq_ctx->Stop(); + } node.llmq_ctx.reset(); node.llmq_ctx.reset(new LLMQContext(chainman.ActiveChainstate(), *node.connman, *node.evodb, *::sporkManager, *node.mempool, node.peerman, false, fReset || fReindexChainState)); + // Have to start it early to let VerifyDB check ChainLock signatures in coinbase + node.llmq_ctx->Start(); if (fReset) { pblocktree->WriteReindexing(true); @@ -2307,8 +2314,6 @@ bool AppInitMain(const CoreContext& context, NodeContext& node, interfaces::Bloc node.scheduler->scheduleEvery(std::bind(&PeriodicStats, std::ref(*node.args), std::cref(*node.mempool)), std::chrono::seconds{nStatsPeriod}); } - node.llmq_ctx->Start(); - // ********************************************************* Step 11: import blocks if (!CheckDiskSpace(GetDataDir())) {