From 93fce549c35ad141a5b153b2b568501c36157ea1 Mon Sep 17 00:00:00 2001 From: Alexander Rutkovsky Date: Thu, 9 Jan 2025 10:07:08 +0000 Subject: [PATCH] Fix ForceBlockTabletData behaviour in BlobDepot and DS proxy mock --- ydb/core/base/blobstorage.h | 4 +-- ydb/core/blob_depot/agent/agent_impl.h | 4 +++ ydb/core/blob_depot/agent/query.cpp | 15 +++++++++ .../agent/storage_collect_garbage.cpp | 9 +----- .../blob_depot/agent/storage_discover.cpp | 30 +++++------------- ydb/core/blob_depot/agent/storage_get.cpp | 31 +++++++++++++------ .../blob_depot/agent/storage_get_block.cpp | 24 +++----------- ydb/core/blob_depot/agent/storage_put.cpp | 10 ++---- ydb/core/blobstorage/dsproxy/mock/model.h | 13 ++++++++ .../blobstorage/vdisk/common/vdisk_events.h | 2 +- .../vdisk/skeleton/skeleton_block_and_get.cpp | 19 ++++++------ 11 files changed, 82 insertions(+), 79 deletions(-) diff --git a/ydb/core/base/blobstorage.h b/ydb/core/base/blobstorage.h index 7ce479e7baba..e512380d9517 100644 --- a/ydb/core/base/blobstorage.h +++ b/ydb/core/base/blobstorage.h @@ -1138,8 +1138,8 @@ struct TEvBlobStorage { ui32 Generation = 0; // tablet generation }; - using TForceBlockTabletData = TTabletData; - using TReaderTabletData = TTabletData; + struct TForceBlockTabletData : TTabletData { using TTabletData::TTabletData; }; + struct TReaderTabletData : TTabletData { using TTabletData::TTabletData; }; std::optional ReaderTabletData; std::optional ForceBlockTabletData; diff --git a/ydb/core/blob_depot/agent/agent_impl.h b/ydb/core/blob_depot/agent/agent_impl.h index 91b61de5134c..e4edfdaa237a 100644 --- a/ydb/core/blob_depot/agent/agent_impl.h +++ b/ydb/core/blob_depot/agent/agent_impl.h @@ -356,6 +356,7 @@ namespace NKikimr::NBlobDepot { NLog::EPriority WatchdogPriority = NLog::PRI_WARN; bool Destroyed = false; std::shared_ptr ExecutionRelay; + ui32 BlockChecksRemain = 3; static constexpr TDuration WatchdogDuration = TDuration::Seconds(10); @@ -377,6 +378,9 @@ namespace NKikimr::NBlobDepot { virtual void OnIdAllocated(bool /*success*/) {} virtual void OnDestroy(bool /*success*/) {} + NKikimrProto::EReplyStatus CheckBlockForTablet(ui64 tabletId, std::optional generation, + ui32 *blockedGeneration = nullptr); + protected: // reading logic struct TReadContext; struct TReadArg { diff --git a/ydb/core/blob_depot/agent/query.cpp b/ydb/core/blob_depot/agent/query.cpp index ee98791115cd..271451f932ac 100644 --- a/ydb/core/blob_depot/agent/query.cpp +++ b/ydb/core/blob_depot/agent/query.cpp @@ -1,4 +1,5 @@ #include "agent_impl.h" +#include "blocks.h" namespace NKikimr::NBlobDepot { @@ -209,6 +210,20 @@ namespace NKikimr::NBlobDepot { DoDestroy(); } + NKikimrProto::EReplyStatus TBlobDepotAgent::TQuery::CheckBlockForTablet(ui64 tabletId, std::optional generation, + ui32 *blockedGeneration) { + const NKikimrProto::EReplyStatus status = Agent.BlocksManager.CheckBlockForTablet(tabletId, generation, this, + blockedGeneration); + if (status != NKikimrProto::OK) { + if (status != NKikimrProto::UNKNOWN) { + EndWithError(status, "block race detected"); + } else if (!--BlockChecksRemain) { + EndWithError(NKikimrProto::ERROR, "failed to obtain blocked generation"); + } + } + return status; + } + void TBlobDepotAgent::TQuery::DoDestroy() { Y_ABORT_UNLESS(!Destroyed); Destroyed = true; diff --git a/ydb/core/blob_depot/agent/storage_collect_garbage.cpp b/ydb/core/blob_depot/agent/storage_collect_garbage.cpp index 73e38b6e7a9d..3f7620da3c10 100644 --- a/ydb/core/blob_depot/agent/storage_collect_garbage.cpp +++ b/ydb/core/blob_depot/agent/storage_collect_garbage.cpp @@ -1,12 +1,10 @@ #include "agent_impl.h" -#include "blocks.h" namespace NKikimr::NBlobDepot { template<> TBlobDepotAgent::TQuery *TBlobDepotAgent::CreateQuery(std::unique_ptr ev) { class TCollectGarbageQuery : public TBlobStorageQuery { - ui32 BlockChecksRemain = 3; ui32 KeepIndex = 0; ui32 NumKeep; ui32 DoNotKeepIndex = 0; @@ -21,13 +19,8 @@ namespace NKikimr::NBlobDepot { NumKeep = Request.Keep ? Request.Keep->size() : 0; NumDoNotKeep = Request.DoNotKeep ? Request.DoNotKeep->size() : 0; - const auto status = Agent.BlocksManager.CheckBlockForTablet(Request.TabletId, Request.RecordGeneration, this, nullptr); - if (status == NKikimrProto::OK) { + if (CheckBlockForTablet(Request.TabletId, Request.RecordGeneration) == NKikimrProto::OK) { IssueCollectGarbage(); - } else if (status != NKikimrProto::UNKNOWN) { - EndWithError(status, "block race detected"); - } else if (!--BlockChecksRemain) { - EndWithError(NKikimrProto::ERROR, "failed to acquire blocks"); } } diff --git a/ydb/core/blob_depot/agent/storage_discover.cpp b/ydb/core/blob_depot/agent/storage_discover.cpp index fee3a5db9bc8..5cfec5491b31 100644 --- a/ydb/core/blob_depot/agent/storage_discover.cpp +++ b/ydb/core/blob_depot/agent/storage_discover.cpp @@ -1,5 +1,4 @@ #include "agent_impl.h" -#include "blocks.h" #include "blob_mapping_cache.h" namespace NKikimr::NBlobDepot { @@ -7,8 +6,6 @@ namespace NKikimr::NBlobDepot { template<> TBlobDepotAgent::TQuery *TBlobDepotAgent::CreateQuery(std::unique_ptr ev) { class TDiscoverQuery : public TBlobStorageQuery { - ui32 GetBlockedGenerationRetriesRemain = 10; - bool DoneWithBlockedGeneration = false; bool DoneWithData = false; @@ -27,17 +24,15 @@ namespace NKikimr::NBlobDepot { GenerateInitialResolve(); IssueResolve(); + CheckBlockedGeneration(); + } - if (Request.DiscoverBlockedGeneration) { - const auto status = Agent.BlocksManager.CheckBlockForTablet(Request.TabletId, std::nullopt, this, &BlockedGeneration); - if (status == NKikimrProto::OK) { - DoneWithBlockedGeneration = true; - } else if (status != NKikimrProto::UNKNOWN) { - EndWithError(status, "tablet was deleted"); - } - } else { - DoneWithBlockedGeneration = true; + void CheckBlockedGeneration() { + if (!DoneWithBlockedGeneration) { + DoneWithBlockedGeneration = !Request.DiscoverBlockedGeneration || + CheckBlockForTablet(Request.TabletId, std::nullopt, &BlockedGeneration) == NKikimrProto::OK; } + CheckIfDone(); } void GenerateInitialResolve() { @@ -85,16 +80,7 @@ namespace NKikimr::NBlobDepot { void OnUpdateBlock() override { STLOG(PRI_DEBUG, BLOB_DEPOT_AGENT, BDA18, "OnUpdateBlock", (AgentId, Agent.LogId), (QueryId, GetQueryId())); - - const auto status = Agent.BlocksManager.CheckBlockForTablet(Request.TabletId, std::nullopt, this, &BlockedGeneration); - if (status == NKikimrProto::OK) { - DoneWithBlockedGeneration = true; - CheckIfDone(); - } else if (status != NKikimrProto::UNKNOWN) { - EndWithError(status, "tablet was deleted"); - } else if (!--GetBlockedGenerationRetriesRemain) { - EndWithError(NKikimrProto::ERROR, "too many retries to obtain blocked generation"); - } + CheckBlockedGeneration(); } void HandleResolveResult(ui64 id, TRequestContext::TPtr context, TEvBlobDepot::TEvResolveResult& msg) { diff --git a/ydb/core/blob_depot/agent/storage_get.cpp b/ydb/core/blob_depot/agent/storage_get.cpp index 6350f203ef70..4891c66cd67d 100644 --- a/ydb/core/blob_depot/agent/storage_get.cpp +++ b/ydb/core/blob_depot/agent/storage_get.cpp @@ -1,6 +1,5 @@ #include "agent_impl.h" #include "blob_mapping_cache.h" -#include "blocks.h" namespace NKikimr::NBlobDepot { @@ -22,6 +21,24 @@ namespace NKikimr::NBlobDepot { using TBlobStorageQuery::TBlobStorageQuery; void Initiate() override { + if (const auto& blk = Request.ReaderTabletData) { + if (CheckBlockForTablet(blk->Id, blk->Generation) != NKikimrProto::OK) { + return; + } + } + + if (const auto& blk = Request.ForceBlockTabletData; blk && blk->Generation) { + ui32 blockedGeneration; + if (CheckBlockForTablet(blk->Id, std::nullopt, &blockedGeneration) != NKikimrProto::OK) { + return; + } + if (blockedGeneration != blk->Generation) { + // this can happen only in distributed storage, but not possible in BlobDepot + return EndWithError(NKikimrProto::ERROR, "incorrect blocked generation provided for" + " ForceBlockTabletData in TEvGet query to BlobDepot"); + } + } + if (IS_LOG_PRIORITY_ENABLED(NLog::PRI_TRACE, NKikimrServices::BLOB_DEPOT_EVENTS)) { for (ui32 i = 0; i < Request.QuerySize; ++i) { const auto& q = Request.Queries[i]; @@ -34,14 +51,6 @@ namespace NKikimr::NBlobDepot { TGroupId::FromValue(Agent.VirtualGroupId)); AnswersRemain = Request.QuerySize; - if (Request.ReaderTabletData) { - auto status = Agent.BlocksManager.CheckBlockForTablet(Request.ReaderTabletData->Id, Request.ReaderTabletData->Generation, this, nullptr); - if (status == NKikimrProto::BLOCKED) { - EndWithError(status, "Fail TEvGet due to BLOCKED tablet generation"); - return; - } - } - for (ui32 i = 0; i < Request.QuerySize; ++i) { auto& query = Request.Queries[i]; @@ -65,6 +74,10 @@ namespace NKikimr::NBlobDepot { CheckAndFinish(); } + void OnUpdateBlock() override { + Initiate(); + } + bool ProcessSingleResult(ui32 queryIdx, const TKeyResolved& result) { STLOG(PRI_DEBUG, BLOB_DEPOT_AGENT, BDA27, "ProcessSingleResult", (AgentId, Agent.LogId), (QueryId, GetQueryId()), (QueryIdx, queryIdx), (Result, result)); diff --git a/ydb/core/blob_depot/agent/storage_get_block.cpp b/ydb/core/blob_depot/agent/storage_get_block.cpp index 39fe966f340c..60765ab1f71d 100644 --- a/ydb/core/blob_depot/agent/storage_get_block.cpp +++ b/ydb/core/blob_depot/agent/storage_get_block.cpp @@ -1,5 +1,4 @@ #include "agent_impl.h" -#include "blocks.h" namespace NKikimr::NBlobDepot { @@ -7,34 +6,21 @@ namespace NKikimr::NBlobDepot { TBlobDepotAgent::TQuery *TBlobDepotAgent::CreateQuery(std::unique_ptr ev) { class TGetBlockQuery : public TBlobStorageQuery { ui32 BlockedGeneration = 0; - ui32 RetriesRemain = 10; - - private: - void TryGetBlock() { - const auto status = Agent.BlocksManager.CheckBlockForTablet(Request.TabletId, std::nullopt, this, &BlockedGeneration); - if (status == NKikimrProto::OK) { - EndWithSuccess(std::make_unique(NKikimrProto::OK, Request.TabletId, BlockedGeneration)); - } else if (status != NKikimrProto::UNKNOWN) { - EndWithError(status, "BlobDepot tablet is unreachable"); - } - } public: using TBlobStorageQuery::TBlobStorageQuery; void Initiate() override { - TryGetBlock(); + if (CheckBlockForTablet(Request.TabletId, std::nullopt, &BlockedGeneration) == NKikimrProto::OK) { + EndWithSuccess(std::make_unique(NKikimrProto::OK, + Request.TabletId, BlockedGeneration)); + } } void OnUpdateBlock() override { STLOG(PRI_DEBUG, BLOB_DEPOT_AGENT, BDA52, "OnUpdateBlock", (AgentId, Agent.LogId), (QueryId, GetQueryId())); - - if (!--RetriesRemain) { - EndWithError(NKikimrProto::ERROR, "too many retries to get blocked generation"); - return; - } - TryGetBlock(); + Initiate(); } void ProcessResponse(ui64 /*id*/, TRequestContext::TPtr /*context*/, TResponse /*response*/) override { diff --git a/ydb/core/blob_depot/agent/storage_put.cpp b/ydb/core/blob_depot/agent/storage_put.cpp index cce18b7bfb9d..a1d8aeb6f20e 100644 --- a/ydb/core/blob_depot/agent/storage_put.cpp +++ b/ydb/core/blob_depot/agent/storage_put.cpp @@ -1,5 +1,4 @@ #include "agent_impl.h" -#include "blocks.h" namespace NKikimr::NBlobDepot { @@ -9,7 +8,6 @@ namespace NKikimr::NBlobDepot { const bool SuppressFooter = true; const bool IssueUncertainWrites = false; - std::vector BlockChecksRemain; ui32 PutsInFlight = 0; bool PutsIssued = false; bool WaitingForCommitBlobSeq = false; @@ -44,7 +42,7 @@ namespace NKikimr::NBlobDepot { return EndWithError(NKikimrProto::ERROR, "blob id is zero"); } - BlockChecksRemain.resize(1 + Request.ExtraBlockChecks.size(), 3); // set number of tries for every block + BlockChecksRemain = (1 + Request.ExtraBlockChecks.size()) * 3; // set number of tries for every block CheckBlocks(); } @@ -54,13 +52,9 @@ namespace NKikimr::NBlobDepot { const auto *blkp = i ? &Request.ExtraBlockChecks[i - 1] : nullptr; const ui64 tabletId = blkp ? blkp->first : Request.Id.TabletID(); const ui32 generation = blkp ? blkp->second : Request.Id.Generation(); - const auto status = Agent.BlocksManager.CheckBlockForTablet(tabletId, generation, this, nullptr); + const auto status = CheckBlockForTablet(tabletId, generation); if (status == NKikimrProto::OK) { continue; - } else if (status != NKikimrProto::UNKNOWN) { - return EndWithError(status, "block race detected"); - } else if (!--BlockChecksRemain[i]) { - return EndWithError(NKikimrProto::ERROR, "failed to acquire blocks"); } else { someBlocksMissing = true; } diff --git a/ydb/core/blobstorage/dsproxy/mock/model.h b/ydb/core/blobstorage/dsproxy/mock/model.h index acf5e00adf58..7d969f34d1a4 100644 --- a/ydb/core/blobstorage/dsproxy/mock/model.h +++ b/ydb/core/blobstorage/dsproxy/mock/model.h @@ -106,6 +106,19 @@ namespace NFake { } TEvBlobStorage::TEvGetResult* Handle(TEvBlobStorage::TEvGet *msg) { + if (const auto& blk = msg->ReaderTabletData) { + if (IsBlocked(blk->Id, blk->Generation)) { + auto response = msg->MakeErrorResponse(NKikimrProto::BLOCKED, "block race detected", GroupId); + return response.release(); + } + } + if (const auto& blk = msg->ForceBlockTabletData; blk && blk->Generation) { + auto it = Blocks.find(blk->Id); + Y_VERIFY_S(it != Blocks.end() && it->second == blk->Generation, "incorrect ForceBlockTabletData" + << " expected Generation# " << blk->Generation + << " having Generation# " << (it != Blocks.end() ? ToString(it->second) : "none")); + } + // prepare result structure holding the returned data auto result = std::make_unique(NKikimrProto::OK, msg->QuerySize, GroupId); diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_events.h b/ydb/core/blobstorage/vdisk/common/vdisk_events.h index fc080f6e3203..ed640a644f1b 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_events.h +++ b/ydb/core/blobstorage/vdisk/common/vdisk_events.h @@ -1109,7 +1109,7 @@ namespace NKikimr { ShowInternals = 2, }; - using TForceBlockTabletData = TEvBlobStorage::TEvGet::TTabletData; + using TForceBlockTabletData = TEvBlobStorage::TEvGet::TForceBlockTabletData; struct TExtremeQuery : std::tuple { TExtremeQuery(const TLogoBlobID &logoBlobId, ui32 sh, ui32 sz, const ui64 *cookie = nullptr) diff --git a/ydb/core/blobstorage/vdisk/skeleton/skeleton_block_and_get.cpp b/ydb/core/blobstorage/vdisk/skeleton/skeleton_block_and_get.cpp index 23db234c041c..d408be5c3004 100644 --- a/ydb/core/blobstorage/vdisk/skeleton/skeleton_block_and_get.cpp +++ b/ydb/core/blobstorage/vdisk/skeleton/skeleton_block_and_get.cpp @@ -12,8 +12,6 @@ namespace NKikimr { class TBlockAndGetActor : public TActorBootstrapped { -private: - static constexpr auto VBLOCK_DEFAULT_DEADLINE_SECONDS = 50; public: TBlockAndGetActor() = delete; explicit TBlockAndGetActor( @@ -40,13 +38,15 @@ class TBlockAndGetActor : public TActorBootstrapped { void Bootstrap() { // create TEvVBlock request + const TInstant deadline = Request->Get()->Record.GetMsgQoS().HasDeadlineSeconds() + ? TInstant::Seconds(Request->Get()->Record.GetMsgQoS().GetDeadlineSeconds()) + : TInstant::Max(); + auto request = std::make_unique( Request->Get()->Record.GetForceBlockTabletData().GetId(), Request->Get()->Record.GetForceBlockTabletData().GetGeneration(), VDiskIDFromVDiskID(Request->Get()->Record.GetVDiskID()), - Request->Get()->Record.GetMsgQoS().HasDeadlineSeconds() ? - TInstant::Seconds(Request->Get()->Record.GetMsgQoS().GetDeadlineSeconds()) : - TInstant::Seconds(VBLOCK_DEFAULT_DEADLINE_SECONDS) + deadline ); // send TEvVBlock request @@ -62,11 +62,10 @@ class TBlockAndGetActor : public TActorBootstrapped { void Handle(TEvBlobStorage::TEvVBlockResult::TPtr &ev) { switch (ev->Get()->Record.GetStatus()) { - case NKikimrProto::OK: - break; - case NKikimrProto::ALREADY: - break; - default: { + case NKikimrProto::OK: + case NKikimrProto::ALREADY: + break; + default: { // we failed to block required generation, so return failure auto response = NErrBuilder::ErroneousResult( VCtx,