From 0ef9e876d1335f42598160428c129ad480697157 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Sat, 16 Nov 2024 14:18:09 +0800 Subject: [PATCH] DAOS-16477 mgmt: return suspect engines for pool healthy query (#15458) * DAOS-16477 mgmt: return suspect engines for pool healthy query After significant failures, the system may leave behind some suspect engines that were marked as DEAD by the SWIM protocol, but were not excluded from the system to prevent data loss. An administrator can bring these ranks back online by restarting them. This PR aims to provide an administrative interface for querying suspect engines following a massive failure. These suspect engines can be retrieved using the daos/dmg --health-only command. An example of output of dmg pool query --health-only: Pool 6f450a68-8c7d-4da9-8900-02691650f6a2, ntarget=8, disabled=2, leader=3, version=4, state=Degraded Pool health info: - Disabled ranks: 1 - Suspect ranks: 2 - Rebuild busy, 0 objs, 0 recs Features: DmgPoolQueryRanks skip-nlt: true Required-githooks: true Signed-off-by: Wang Shilong Signed-off-by: Phil Henderson Co-authored-by: Phil Henderson --- docs/admin/pool_operations.md | 22 + src/control/cmd/daos/health.go | 19 +- src/control/cmd/daos/pool.go | 67 ++- src/control/cmd/daos/pretty/health.go | 7 + src/control/cmd/daos/pretty/pool.go | 4 + src/control/cmd/daos/pretty/pool_test.go | 39 ++ .../common/proto/ctl/storage_nvme.pb.go | 4 +- src/control/common/proto/ctl/support.pb.go | 2 +- src/control/common/proto/mgmt/pool.pb.go | 198 ++++----- src/control/common/proto/mgmt/svc.pb.go | 2 +- src/control/lib/control/pool_test.go | 101 ++++- src/control/lib/daos/pool.go | 7 +- src/control/lib/daos/pool_test.go | 21 +- src/include/daos_pool.h | 2 + src/include/daos_srv/pool.h | 5 +- src/mgmt/pool.pb-c.c | 19 +- src/mgmt/pool.pb-c.h | 6 +- src/mgmt/srv_drpc.c | 37 +- src/mgmt/srv_internal.h | 4 +- src/mgmt/srv_pool.c | 8 +- src/mgmt/svc.pb-c.c | 380 +++++++++++------- src/mgmt/svc.pb-c.h | 66 +-- src/mgmt/tests/mocks.c | 15 +- src/mgmt/tests/mocks.h | 3 +- src/mgmt/tests/srv_drpc_tests.c | 8 +- src/pool/srv_cli.c | 85 +++- src/proto/mgmt/pool.proto | 1 + .../ftest/control/dmg_pool_query_ranks.py | 172 ++++---- .../ftest/control/dmg_pool_query_ranks.yaml | 9 +- src/tests/ftest/util/dmg_utils.py | 6 +- src/tests/ftest/util/dmg_utils_base.py | 1 + src/tests/ftest/util/server_utils_params.py | 6 +- src/tests/ftest/util/test_utils_pool.py | 27 ++ 33 files changed, 932 insertions(+), 421 deletions(-) diff --git a/docs/admin/pool_operations.md b/docs/admin/pool_operations.md index 36907a2e31fa..efc827ae1e78 100644 --- a/docs/admin/pool_operations.md +++ b/docs/admin/pool_operations.md @@ -286,6 +286,28 @@ The example below shows a rebuild in progress and NVMe space allocated. Rebuild busy, 75 objs, 9722 recs ``` +After experiencing significant failures, the pool may retain some suspect +engines that have been marked as DEAD by the SWIM protocol but were not excluded +from the pool to prevent potential data inconsistency. An administrator can bring +these engines back online by restarting them. The example below illustrates the +system’s status with suspect and disabled engines. + +```bash +$ dmg pool query tank -t +``` + +NB: The --health-only/-t option is necessary to conduct pool health-related queries only. +This is important because suspect ranks may cause commands to hang and timeout so identifying +and restarting them is a useful procedure. + +```bash +Pool 6f450a68-8c7d-4da9-8900-02691650f6a2, ntarget=8, disabled=2, leader=3, version=4, state=Degraded + Pool health info: + - Disabled ranks: 1 + - Suspect ranks: 2 + - Rebuild busy, 0 objs, 0 recs +``` + Additional status and telemetry data is planned to be exported through management tools and will be documented here once available. diff --git a/src/control/cmd/daos/health.go b/src/control/cmd/daos/health.go index cbc29c1e3baf..41398d11c921 100644 --- a/src/control/cmd/daos/health.go +++ b/src/control/cmd/daos/health.go @@ -95,24 +95,19 @@ func (cmd *healthCheckCmd) Execute([]string) error { } }() - queryMask := daos.MustNewPoolQueryMask(daos.PoolQueryOptionEnabledEngines) + queryMask := daos.MustNewPoolQueryMask(daos.PoolQueryOptionEnabledEngines, + daos.PoolQueryOptionSuspectEngines) + if pool.DisabledTargets > 0 { + queryMask.SetOptions(daos.PoolQueryOptionDisabledEngines) + } tpi, err := queryPool(poolHdl, queryMask) if err != nil { cmd.Errorf("failed to query pool %s: %v", pool.Label, err) continue } pool.EnabledRanks = tpi.EnabledRanks - - if pool.DisabledTargets > 0 { - queryMask.ClearAll() - queryMask.SetOptions(daos.PoolQueryOptionDisabledEngines) - tpi, err = queryPool(poolHdl, queryMask) - if err != nil { - cmd.Errorf("failed to query pool %s: %v", pool.Label, err) - continue - } - pool.DisabledRanks = tpi.DisabledRanks - } + pool.DisabledRanks = tpi.DisabledRanks + pool.SuspectRanks = tpi.SuspectRanks poolConts, err := listContainers(poolHdl) if err != nil { diff --git a/src/control/cmd/daos/pool.go b/src/control/cmd/daos/pool.go index 2aae717766e8..4964a5cf54f7 100644 --- a/src/control/cmd/daos/pool.go +++ b/src/control/cmd/daos/pool.go @@ -296,11 +296,12 @@ func convertPoolInfo(pinfo *C.daos_pool_info_t) (*daos.PoolInfo, error) { return poolInfo, nil } -func queryPool(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.PoolInfo, error) { +func queryPoolRankLists(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.PoolInfo, error) { var rlPtr **C.d_rank_list_t = nil var rl *C.d_rank_list_t = nil - if queryMask.HasOption(daos.PoolQueryOptionEnabledEngines) || queryMask.HasOption(daos.PoolQueryOptionDisabledEngines) { + if queryMask.HasOption(daos.PoolQueryOptionEnabledEngines) || queryMask.HasOption(daos.PoolQueryOptionDisabledEngines) || + queryMask.HasOption(daos.PoolQueryOptionSuspectEngines) { rlPtr = &rl } @@ -330,6 +331,68 @@ func queryPool(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.Poo if queryMask.HasOption(daos.PoolQueryOptionDisabledEngines) { poolInfo.DisabledRanks = rs } + if queryMask.HasOption(daos.PoolQueryOptionSuspectEngines) { + poolInfo.SuspectRanks = rs + } + } + + return poolInfo, nil +} +func queryPool(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.PoolInfo, error) { + poolInfo := &daos.PoolInfo{} + originalMask := queryMask // Save the original queryMask + + // Function to handle the query and return a single RankList + queryAndUpdate := func(option string) error { + // Clear previous options and set new option + queryMask.ClearAll() + queryMask.SetOptions(option) + + poolInfo1, err := queryPoolRankLists(poolHdl, queryMask) + if err != nil { + return err + } + + switch option { + case daos.PoolQueryOptionEnabledEngines: + poolInfo.EnabledRanks = poolInfo1.EnabledRanks + case daos.PoolQueryOptionDisabledEngines: + poolInfo.DisabledRanks = poolInfo1.DisabledRanks + case daos.PoolQueryOptionSuspectEngines: + poolInfo.SuspectRanks = poolInfo1.SuspectRanks + } + return nil + } + + // Preprocess queryMask, select one option for the first query + var firstOption string + if originalMask.HasOption(daos.PoolQueryOptionEnabledEngines) { + firstOption = daos.PoolQueryOptionEnabledEngines + } else if originalMask.HasOption(daos.PoolQueryOptionDisabledEngines) { + firstOption = daos.PoolQueryOptionDisabledEngines + } else if originalMask.HasOption(daos.PoolQueryOptionSuspectEngines) { + firstOption = daos.PoolQueryOptionSuspectEngines + } + + // Perform the first query to get basic information + if err := queryAndUpdate(firstOption); err != nil { + return nil, err + } + + // Check the original query mask and update fields as needed + queryOptions := []string{ + daos.PoolQueryOptionEnabledEngines, + daos.PoolQueryOptionDisabledEngines, + daos.PoolQueryOptionSuspectEngines, + } + + // Process each option sequentially + for _, opt := range queryOptions { + if originalMask.HasOption(opt) && opt != firstOption { + if err := queryAndUpdate(opt); err != nil { + return nil, err + } + } } return poolInfo, nil diff --git a/src/control/cmd/daos/pretty/health.go b/src/control/cmd/daos/pretty/health.go index 25c94e10f998..ee77cd723718 100644 --- a/src/control/cmd/daos/pretty/health.go +++ b/src/control/cmd/daos/pretty/health.go @@ -61,6 +61,13 @@ func printPoolHealth(out io.Writer, pi *daos.PoolInfo, verbose bool) { } var healthStrings []string + if pi.SuspectRanks != nil && pi.SuspectRanks.Count() > 0 { + degStr := "Suspect" + if verbose { + degStr += fmt.Sprintf(" %s", pi.SuspectRanks) + } + healthStrings = append(healthStrings, degStr) + } if pi.DisabledTargets > 0 { degStr := "Degraded" if verbose { diff --git a/src/control/cmd/daos/pretty/pool.go b/src/control/cmd/daos/pretty/pool.go index aa70115152df..631db00ad1a9 100644 --- a/src/control/cmd/daos/pretty/pool.go +++ b/src/control/cmd/daos/pretty/pool.go @@ -53,6 +53,10 @@ func PrintPoolInfo(pi *daos.PoolInfo, out io.Writer) error { if pi.DisabledRanks != nil && pi.DisabledRanks.Count() > 0 { fmt.Fprintf(w, "- Disabled ranks: %s\n", pi.DisabledRanks) } + if pi.QueryMask.HasOption(daos.PoolQueryOptionSuspectEngines) && + pi.SuspectRanks != nil && pi.SuspectRanks.Count() > 0 { + fmt.Fprintf(w, "- Suspect ranks: %s\n", pi.SuspectRanks) + } if pi.Rebuild != nil { if pi.Rebuild.Status == 0 { fmt.Fprintf(w, "- Rebuild %s, %d objs, %d recs\n", diff --git a/src/control/cmd/daos/pretty/pool_test.go b/src/control/cmd/daos/pretty/pool_test.go index 3a1724e1ddae..11f50c0e6be5 100644 --- a/src/control/cmd/daos/pretty/pool_test.go +++ b/src/control/cmd/daos/pretty/pool_test.go @@ -122,6 +122,45 @@ Pool space info: - Storage tier 1 (NVMe): Total size: 2 B Free: 1 B, min:0 B, max:0 B, mean:0 B +`, poolUUID.String()), + }, + "normal response; suspect ranks": { + pi: &daos.PoolInfo{ + QueryMask: daos.HealthOnlyPoolQueryMask, + State: daos.PoolServiceStateDegraded, + UUID: poolUUID, + TotalTargets: 2, + DisabledTargets: 1, + ActiveTargets: 1, + ServiceLeader: 42, + Version: 100, + PoolLayoutVer: 1, + UpgradeLayoutVer: 2, + DisabledRanks: ranklist.MustCreateRankSet("[0,1,3]"), + SuspectRanks: ranklist.MustCreateRankSet("[2]"), + Rebuild: &daos.PoolRebuildStatus{ + State: daos.PoolRebuildStateBusy, + Objects: 42, + Records: 21, + }, + TierStats: []*daos.StorageUsageStats{ + { + Total: 2, + Free: 1, + }, + { + Total: 2, + Free: 1, + }, + }, + }, + expPrintStr: fmt.Sprintf(` +Pool %s, ntarget=2, disabled=1, leader=42, version=100, state=Degraded +Pool layout out of date (1 < 2) -- see `+backtickStr+` for details. +Pool health info: +- Disabled ranks: 0-1,3 +- Suspect ranks: 2 +- Rebuild busy, 42 objs, 21 recs `, poolUUID.String()), }, "normal response; disabled ranks": { diff --git a/src/control/common/proto/ctl/storage_nvme.pb.go b/src/control/common/proto/ctl/storage_nvme.pb.go index cb2dc5099d45..bbda4c5e5c34 100644 --- a/src/control/common/proto/ctl/storage_nvme.pb.go +++ b/src/control/common/proto/ctl/storage_nvme.pb.go @@ -1,12 +1,12 @@ // -// (C) Copyright 2019-2023 Intel Corporation. +// (C) Copyright 2019-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.31.0 +// protoc-gen-go v1.33.0 // protoc v3.5.0 // source: ctl/storage_nvme.proto diff --git a/src/control/common/proto/ctl/support.pb.go b/src/control/common/proto/ctl/support.pb.go index 7253f2ee183c..7c3905c20245 100644 --- a/src/control/common/proto/ctl/support.pb.go +++ b/src/control/common/proto/ctl/support.pb.go @@ -6,7 +6,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.28.1 +// protoc-gen-go v1.33.0 // protoc v3.5.0 // source: ctl/support.proto diff --git a/src/control/common/proto/mgmt/pool.pb.go b/src/control/common/proto/mgmt/pool.pb.go index b699d1f55dc2..7c8e36c80089 100644 --- a/src/control/common/proto/mgmt/pool.pb.go +++ b/src/control/common/proto/mgmt/pool.pb.go @@ -6,7 +6,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.31.0 +// protoc-gen-go v1.33.0 // protoc v3.5.0 // source: mgmt/pool.proto @@ -1842,6 +1842,7 @@ type PoolQueryResp struct { SvcLdr uint32 `protobuf:"varint,18,opt,name=svc_ldr,json=svcLdr,proto3" json:"svc_ldr,omitempty"` // current raft leader (2.6+) SvcReps []uint32 `protobuf:"varint,19,rep,packed,name=svc_reps,json=svcReps,proto3" json:"svc_reps,omitempty"` // service replica ranks QueryMask uint64 `protobuf:"varint,20,opt,name=query_mask,json=queryMask,proto3" json:"query_mask,omitempty"` // Bitmask of pool query options used + SuspectRanks string `protobuf:"bytes,21,opt,name=suspect_ranks,json=suspectRanks,proto3" json:"suspect_ranks,omitempty"` // optional set of suspect ranks } func (x *PoolQueryResp) Reset() { @@ -2009,6 +2010,13 @@ func (x *PoolQueryResp) GetQueryMask() uint64 { return 0 } +func (x *PoolQueryResp) GetSuspectRanks() string { + if x != nil { + return x.SuspectRanks + } + return "" +} + type PoolProperty struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -3032,7 +3040,7 @@ var file_mgmt_pool_proto_rawDesc = []byte{ 0x04, 0x52, 0x07, 0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x22, 0x25, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x08, 0x0a, 0x04, 0x49, 0x44, 0x4c, 0x45, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x44, 0x4f, 0x4e, 0x45, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, - 0x02, 0x22, 0xc0, 0x05, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, + 0x02, 0x22, 0xe5, 0x05, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, @@ -3075,103 +3083,105 @@ var file_mgmt_pool_proto_rawDesc = []byte{ 0x65, 0x70, 0x73, 0x18, 0x13, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, 0x52, 0x65, 0x70, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x71, 0x75, 0x65, 0x72, 0x79, 0x5f, 0x6d, 0x61, 0x73, 0x6b, 0x18, 0x14, 0x20, 0x01, 0x28, 0x04, 0x52, 0x09, 0x71, 0x75, 0x65, 0x72, 0x79, 0x4d, 0x61, 0x73, - 0x6b, 0x4a, 0x04, 0x08, 0x09, 0x10, 0x0a, 0x52, 0x0b, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x6e, - 0x6f, 0x64, 0x65, 0x73, 0x22, 0x63, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, - 0x65, 0x72, 0x74, 0x79, 0x12, 0x16, 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x12, 0x18, 0x0a, 0x06, - 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x06, - 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x12, 0x18, 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x76, 0x61, 0x6c, - 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x48, 0x00, 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x76, 0x61, 0x6c, - 0x42, 0x07, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, - 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, - 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, - 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x32, - 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, - 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, - 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, - 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, - 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, - 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, - 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, - 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, - 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, - 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, - 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, - 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, - 0x69, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, - 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, - 0x22, 0x5d, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, - 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x32, 0x0a, 0x0a, 0x70, - 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, - 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, - 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x22, - 0x4f, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, + 0x6b, 0x12, 0x23, 0x0a, 0x0d, 0x73, 0x75, 0x73, 0x70, 0x65, 0x63, 0x74, 0x5f, 0x72, 0x61, 0x6e, + 0x6b, 0x73, 0x18, 0x15, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x73, 0x75, 0x73, 0x70, 0x65, 0x63, + 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x4a, 0x04, 0x08, 0x09, 0x10, 0x0a, 0x52, 0x0b, 0x74, 0x6f, + 0x74, 0x61, 0x6c, 0x5f, 0x6e, 0x6f, 0x64, 0x65, 0x73, 0x22, 0x63, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, + 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x12, 0x16, 0x0a, 0x06, 0x6e, 0x75, 0x6d, + 0x62, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, + 0x72, 0x12, 0x18, 0x0a, 0x06, 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x09, 0x48, 0x00, 0x52, 0x06, 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x12, 0x18, 0x0a, 0x06, 0x6e, + 0x75, 0x6d, 0x76, 0x61, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x48, 0x00, 0x52, 0x06, 0x6e, + 0x75, 0x6d, 0x76, 0x61, 0x6c, 0x42, 0x07, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x22, 0x83, + 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, - 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, - 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, - 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x81, 0x01, 0x0a, 0x12, - 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, + 0x02, 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, + 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, + 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, + 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, + 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, + 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, + 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, + 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, - 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x61, 0x72, 0x67, - 0x65, 0x74, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, - 0x74, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, - 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, - 0x75, 0x0a, 0x12, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, - 0x55, 0x73, 0x61, 0x67, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x12, 0x0a, 0x04, 0x66, - 0x72, 0x65, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x66, 0x72, 0x65, 0x65, 0x12, - 0x35, 0x0a, 0x0a, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, - 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, - 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x52, 0x09, 0x6d, 0x65, 0x64, - 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x22, 0xda, 0x02, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x51, - 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x38, - 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x6d, + 0x52, 0x02, 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, + 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, + 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, + 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, + 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, + 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x5d, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, + 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, + 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x02, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, + 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, + 0x74, 0x69, 0x65, 0x73, 0x22, 0x4f, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, 0x72, + 0x61, 0x64, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, + 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, + 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, + 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, + 0x22, 0x81, 0x01, 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, + 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x18, 0x0a, + 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, + 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, + 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, + 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x75, 0x0a, 0x12, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x54, + 0x61, 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, + 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, + 0x12, 0x12, 0x0a, 0x04, 0x66, 0x72, 0x65, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, + 0x66, 0x72, 0x65, 0x65, 0x12, 0x35, 0x0a, 0x0a, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x5f, 0x74, 0x79, + 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, + 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, + 0x52, 0x09, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x22, 0xda, 0x02, 0x0a, 0x13, + 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, + 0x6e, 0x66, 0x6f, 0x12, 0x38, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x0e, 0x32, 0x24, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, + 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x54, 0x61, 0x72, + 0x67, 0x65, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x3b, 0x0a, + 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x25, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, - 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x54, 0x79, - 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x3b, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, - 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x25, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, - 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, - 0x66, 0x6f, 0x2e, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, - 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x2e, 0x0a, 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x03, - 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, - 0x61, 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x52, 0x05, - 0x73, 0x70, 0x61, 0x63, 0x65, 0x22, 0x3b, 0x0a, 0x0a, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x54, - 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, - 0x12, 0x07, 0x0a, 0x03, 0x48, 0x44, 0x44, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x53, 0x44, - 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, 0x50, 0x4d, 0x10, 0x03, 0x12, 0x06, 0x0a, 0x02, 0x56, 0x4d, - 0x10, 0x04, 0x22, 0x5f, 0x0a, 0x0b, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, - 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, - 0x57, 0x4e, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, 0x44, 0x4f, 0x57, 0x4e, 0x5f, 0x4f, 0x55, 0x54, - 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x44, 0x4f, 0x57, 0x4e, 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, - 0x55, 0x50, 0x10, 0x03, 0x12, 0x09, 0x0a, 0x05, 0x55, 0x50, 0x5f, 0x49, 0x4e, 0x10, 0x04, 0x12, - 0x07, 0x0a, 0x03, 0x4e, 0x45, 0x57, 0x10, 0x05, 0x12, 0x09, 0x0a, 0x05, 0x44, 0x52, 0x41, 0x49, - 0x4e, 0x10, 0x06, 0x22, 0x5e, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, - 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, - 0x75, 0x73, 0x12, 0x2f, 0x0a, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, - 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x05, 0x69, 0x6e, - 0x66, 0x6f, 0x73, 0x2a, 0x25, 0x0a, 0x10, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, - 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x43, 0x4d, 0x10, 0x00, - 0x12, 0x08, 0x0a, 0x04, 0x4e, 0x56, 0x4d, 0x45, 0x10, 0x01, 0x2a, 0x56, 0x0a, 0x10, 0x50, 0x6f, - 0x6f, 0x6c, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x0c, - 0x0a, 0x08, 0x43, 0x72, 0x65, 0x61, 0x74, 0x69, 0x6e, 0x67, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, - 0x52, 0x65, 0x61, 0x64, 0x79, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, 0x44, 0x65, 0x73, 0x74, 0x72, - 0x6f, 0x79, 0x69, 0x6e, 0x67, 0x10, 0x02, 0x12, 0x0c, 0x0a, 0x08, 0x44, 0x65, 0x67, 0x72, 0x61, - 0x64, 0x65, 0x64, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x6e, 0x6b, 0x6e, 0x6f, 0x77, 0x6e, - 0x10, 0x04, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, - 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, - 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, - 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, + 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x2e, 0x0a, 0x05, 0x73, 0x70, + 0x61, 0x63, 0x65, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x67, 0x6d, 0x74, + 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, + 0x61, 0x67, 0x65, 0x52, 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, 0x22, 0x3b, 0x0a, 0x0a, 0x54, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, + 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x48, 0x44, 0x44, 0x10, 0x01, 0x12, 0x07, + 0x0a, 0x03, 0x53, 0x53, 0x44, 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, 0x50, 0x4d, 0x10, 0x03, 0x12, + 0x06, 0x0a, 0x02, 0x56, 0x4d, 0x10, 0x04, 0x22, 0x5f, 0x0a, 0x0b, 0x54, 0x61, 0x72, 0x67, 0x65, + 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, + 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, 0x44, 0x4f, 0x57, + 0x4e, 0x5f, 0x4f, 0x55, 0x54, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x44, 0x4f, 0x57, 0x4e, 0x10, + 0x02, 0x12, 0x06, 0x0a, 0x02, 0x55, 0x50, 0x10, 0x03, 0x12, 0x09, 0x0a, 0x05, 0x55, 0x50, 0x5f, + 0x49, 0x4e, 0x10, 0x04, 0x12, 0x07, 0x0a, 0x03, 0x4e, 0x45, 0x57, 0x10, 0x05, 0x12, 0x09, 0x0a, + 0x05, 0x44, 0x52, 0x41, 0x49, 0x4e, 0x10, 0x06, 0x22, 0x5e, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, + 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, + 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, + 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x2f, 0x0a, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, + 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, + 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, + 0x6f, 0x52, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x2a, 0x25, 0x0a, 0x10, 0x53, 0x74, 0x6f, 0x72, + 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x12, 0x07, 0x0a, 0x03, + 0x53, 0x43, 0x4d, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x4e, 0x56, 0x4d, 0x45, 0x10, 0x01, 0x2a, + 0x56, 0x0a, 0x10, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x74, + 0x61, 0x74, 0x65, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x72, 0x65, 0x61, 0x74, 0x69, 0x6e, 0x67, 0x10, + 0x00, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x65, 0x61, 0x64, 0x79, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, + 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x69, 0x6e, 0x67, 0x10, 0x02, 0x12, 0x0c, 0x0a, 0x08, + 0x44, 0x65, 0x67, 0x72, 0x61, 0x64, 0x65, 0x64, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x6e, + 0x6b, 0x6e, 0x6f, 0x77, 0x6e, 0x10, 0x04, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, + 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, + 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, + 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, + 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/src/control/common/proto/mgmt/svc.pb.go b/src/control/common/proto/mgmt/svc.pb.go index d0a0039f2822..a21b05bc0b44 100644 --- a/src/control/common/proto/mgmt/svc.pb.go +++ b/src/control/common/proto/mgmt/svc.pb.go @@ -6,7 +6,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.31.0 +// protoc-gen-go v1.33.0 // protoc v3.5.0 // source: mgmt/svc.proto diff --git a/src/control/lib/control/pool_test.go b/src/control/lib/control/pool_test.go index aace54a277bb..51304dd05963 100644 --- a/src/control/lib/control/pool_test.go +++ b/src/control/lib/control/pool_test.go @@ -814,7 +814,7 @@ func TestControl_PoolQueryResp_MarshalJSON(t *testing.T) { }, exp: `{"query_mask":"rebuild,space","state":"Ready","uuid":"` + poolUUID.String() + `","total_targets":1,"active_targets":2,"total_engines":3,"disabled_targets":4,"version":5,"svc_ldr":6,"svc_reps":[0,1,2],"rebuild":null,"tier_stats":null,"pool_layout_ver":7,"upgrade_layout_ver":8,"status":42}`, }, - "valid rankset": { + "valid rankset default query": { pqr: &PoolQueryResp{ Status: 42, PoolInfo: daos.PoolInfo{ @@ -836,6 +836,28 @@ func TestControl_PoolQueryResp_MarshalJSON(t *testing.T) { }, exp: `{"query_mask":"rebuild,space","state":"Ready","uuid":"` + poolUUID.String() + `","total_targets":1,"active_targets":2,"total_engines":3,"disabled_targets":4,"version":5,"svc_ldr":6,"svc_reps":[0,1,2],"rebuild":null,"tier_stats":null,"enabled_ranks":[0,1,2,3,5],"disabled_ranks":[],"pool_layout_ver":7,"upgrade_layout_ver":8,"status":42}`, }, + "valid rankset health query": { + pqr: &PoolQueryResp{ + Status: 42, + PoolInfo: daos.PoolInfo{ + QueryMask: daos.HealthOnlyPoolQueryMask, + State: daos.PoolServiceStateReady, + UUID: poolUUID, + TotalTargets: 1, + ActiveTargets: 2, + TotalEngines: 3, + DisabledTargets: 4, + Version: 5, + ServiceLeader: 6, + ServiceReplicas: []ranklist.Rank{0, 1, 2}, + DisabledRanks: &ranklist.RankSet{}, + SuspectRanks: ranklist.MustCreateRankSet("[7,8,9]"), + PoolLayoutVer: 7, + UpgradeLayoutVer: 8, + }, + }, + exp: `{"query_mask":"disabled_engines,rebuild,suspect_engines","state":"Ready","uuid":"` + poolUUID.String() + `","total_targets":1,"active_targets":2,"total_engines":3,"disabled_targets":4,"version":5,"svc_ldr":6,"svc_reps":[0,1,2],"rebuild":null,"tier_stats":null,"disabled_ranks":[],"suspect_ranks":[7,8,9],"pool_layout_ver":7,"upgrade_layout_ver":8,"status":42}`, + }, } { t.Run(name, func(t *testing.T) { got, err := json.Marshal(tc.pqr) @@ -876,7 +898,7 @@ func TestControl_PoolQueryResp_UnmarshalJSON(t *testing.T) { }, }, "valid rankset": { - data: `{"enabled_ranks":"[0,1-3,5]","disabled_ranks":"[]","status":0,"uuid":"` + poolUUID.String() + `","total_targets":1,"active_targets":2,"total_engines":3,"disabled_targets":4,"version":5,"svc_ldr":6,"svc_reps":null,"rebuild":null,"tier_stats":null,"pool_layout_ver":7,"upgrade_layout_ver":8}`, + data: `{"enabled_ranks":"[0,1-3,5]","disabled_ranks":"[]","suspect_ranks":"[4]","status":0,"uuid":"` + poolUUID.String() + `","total_targets":1,"active_targets":2,"total_engines":3,"disabled_targets":4,"version":5,"svc_ldr":6,"svc_reps":null,"rebuild":null,"tier_stats":null,"pool_layout_ver":7,"upgrade_layout_ver":8}`, expResp: PoolQueryResp{ Status: 0, PoolInfo: daos.PoolInfo{ @@ -889,6 +911,7 @@ func TestControl_PoolQueryResp_UnmarshalJSON(t *testing.T) { ServiceLeader: 6, EnabledRanks: ranklist.MustCreateRankSet("[0-3,5]"), DisabledRanks: &ranklist.RankSet{}, + SuspectRanks: ranklist.MustCreateRankSet("[4]"), PoolLayoutVer: 7, UpgradeLayoutVer: 8, }, @@ -1159,6 +1182,80 @@ func TestControl_PoolQuery(t *testing.T) { }, }, }, + "query succeeds suspect ranks": { + mic: &MockInvokerConfig{ + UnaryResponse: MockMSResponse("host1", nil, + &mgmtpb.PoolQueryResp{ + Uuid: poolUUID.String(), + TotalTargets: 42, + ActiveTargets: 16, + DisabledTargets: 17, + PoolLayoutVer: 1, + UpgradeLayoutVer: 2, + State: mgmtpb.PoolServiceState_Degraded, + Rebuild: &mgmtpb.PoolRebuildStatus{ + State: mgmtpb.PoolRebuildStatus_BUSY, + Objects: 1, + Records: 2, + }, + TierStats: []*mgmtpb.StorageUsageStats{ + { + Total: 123456, + Free: 0, + Min: 1, + Max: 2, + Mean: 3, + MediaType: mgmtpb.StorageMediaType(daos.StorageMediaTypeScm), + }, + { + Total: 123456, + Free: 0, + Min: 1, + Max: 2, + Mean: 3, + MediaType: mgmtpb.StorageMediaType(daos.StorageMediaTypeNvme), + }, + }, + SuspectRanks: "[1,2,3,7]", + }, + ), + }, + expResp: &PoolQueryResp{ + PoolInfo: daos.PoolInfo{ + UUID: poolUUID, + TotalTargets: 42, + ActiveTargets: 16, + DisabledTargets: 17, + PoolLayoutVer: 1, + UpgradeLayoutVer: 2, + State: daos.PoolServiceStateDegraded, + Rebuild: &daos.PoolRebuildStatus{ + State: daos.PoolRebuildStateBusy, + Objects: 1, + Records: 2, + }, + TierStats: []*daos.StorageUsageStats{ + { + Total: 123456, + Free: 0, + Min: 1, + Max: 2, + Mean: 3, + MediaType: daos.StorageMediaTypeScm, + }, + { + Total: 123456, + Free: 0, + Min: 1, + Max: 2, + Mean: 3, + MediaType: daos.StorageMediaTypeNvme, + }, + }, + SuspectRanks: ranklist.MustCreateRankSet("[1-3,7]"), + }, + }, + }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) diff --git a/src/control/lib/daos/pool.go b/src/control/lib/daos/pool.go index fe44a00e210a..e96b3c588d12 100644 --- a/src/control/lib/daos/pool.go +++ b/src/control/lib/daos/pool.go @@ -77,6 +77,7 @@ type ( TierStats []*StorageUsageStats `json:"tier_stats"` EnabledRanks *ranklist.RankSet `json:"enabled_ranks,omitempty"` DisabledRanks *ranklist.RankSet `json:"disabled_ranks,omitempty"` + SuspectRanks *ranklist.RankSet `json:"suspect_ranks,omitempty"` PoolLayoutVer uint32 `json:"pool_layout_ver"` UpgradeLayoutVer uint32 `json:"upgrade_layout_ver"` } @@ -104,7 +105,8 @@ type ( const ( // DefaultPoolQueryMask defines the default pool query mask. - DefaultPoolQueryMask = PoolQueryMask(^uint64(0) &^ (C.DPI_ENGINES_ENABLED | C.DPI_ENGINES_DISABLED)) + DefaultPoolQueryMask = PoolQueryMask(^uint64(0) &^ (C.DPI_ENGINES_ENABLED | + C.DPI_ENGINES_DISABLED | C.DPI_ENGINES_SUSPECT)) // HealthOnlyPoolQueryMask defines the mask for health-only queries. HealthOnlyPoolQueryMask = PoolQueryMask(^uint64(0) &^ (C.DPI_ENGINES_ENABLED | C.DPI_SPACE)) @@ -116,6 +118,8 @@ const ( PoolQueryOptionEnabledEngines = "enabled_engines" // PoolQueryOptionDisabledEngines retrieves disabled engines as part of the pool query. PoolQueryOptionDisabledEngines = "disabled_engines" + // PoolQueryOptionSuspectEngines retrieves suspect engines as part of the pool query. + PoolQueryOptionSuspectEngines = "suspect_engines" // PoolConnectFlagReadOnly indicates that the connection is read-only. PoolConnectFlagReadOnly = C.DAOS_PC_RO @@ -130,6 +134,7 @@ var poolQueryOptMap = map[C.int]string{ C.DPI_REBUILD_STATUS: PoolQueryOptionRebuild, C.DPI_ENGINES_ENABLED: PoolQueryOptionEnabledEngines, C.DPI_ENGINES_DISABLED: PoolQueryOptionDisabledEngines, + C.DPI_ENGINES_SUSPECT: PoolQueryOptionSuspectEngines, } func resolvePoolQueryOpt(name string) (C.int, error) { diff --git a/src/control/lib/daos/pool_test.go b/src/control/lib/daos/pool_test.go index e76f33f4c250..cced66b4f509 100644 --- a/src/control/lib/daos/pool_test.go +++ b/src/control/lib/daos/pool_test.go @@ -136,13 +136,14 @@ func TestDaos_PoolQueryMask(t *testing.T) { testMask: genTestMask(func(pqm *PoolQueryMask) { *pqm = HealthOnlyPoolQueryMask }), - expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionRebuild), + expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionRebuild, PoolQueryOptionSuspectEngines), }, "set query all=true": { testMask: genTestMask(func(pqm *PoolQueryMask) { pqm.SetAll() }), - expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, PoolQueryOptionRebuild, PoolQueryOptionSpace), + expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, + PoolQueryOptionRebuild, PoolQueryOptionSpace, PoolQueryOptionSuspectEngines), }, "set query all=false": { testMask: genTestMask(func(pqm *PoolQueryMask) { @@ -162,7 +163,8 @@ func TestDaos_PoolQueryMask(t *testing.T) { pqm.SetAll() pqm.ClearOptions(PoolQueryOptionSpace) }), - expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, PoolQueryOptionRebuild), + expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, + PoolQueryOptionRebuild, PoolQueryOptionSuspectEngines), }, "set query space=false (already false)": { testMask: genTestMask(func(pqm *PoolQueryMask) { @@ -181,7 +183,8 @@ func TestDaos_PoolQueryMask(t *testing.T) { pqm.SetAll() pqm.ClearOptions(PoolQueryOptionRebuild) }), - expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, PoolQueryOptionSpace), + expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, PoolQueryOptionSpace, + PoolQueryOptionSuspectEngines), }, "set query enabled_engines=true": { testMask: genTestMask(func(pqm *PoolQueryMask) { @@ -194,7 +197,8 @@ func TestDaos_PoolQueryMask(t *testing.T) { pqm.SetAll() pqm.ClearOptions(PoolQueryOptionEnabledEngines) }), - expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionRebuild, PoolQueryOptionSpace), + expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionRebuild, PoolQueryOptionSpace, + PoolQueryOptionSuspectEngines), }, "set query disabled_engines=true": { testMask: genTestMask(func(pqm *PoolQueryMask) { @@ -207,7 +211,8 @@ func TestDaos_PoolQueryMask(t *testing.T) { pqm.SetAll() pqm.ClearOptions(PoolQueryOptionDisabledEngines) }), - expString: genOptsStr(PoolQueryOptionEnabledEngines, PoolQueryOptionRebuild, PoolQueryOptionSpace), + expString: genOptsStr(PoolQueryOptionEnabledEngines, PoolQueryOptionRebuild, PoolQueryOptionSpace, + PoolQueryOptionSuspectEngines), }, } { t.Run(name, func(t *testing.T) { @@ -232,7 +237,7 @@ func TestDaos_PoolQueryMaskMarshalJSON(t *testing.T) { testMask: genTestMask(func(pqm *PoolQueryMask) { pqm.SetAll() }), - expJSON: []byte(`"disabled_engines,enabled_engines,rebuild,space"`), + expJSON: []byte(`"disabled_engines,enabled_engines,rebuild,space,suspect_engines"`), }, } { t.Run(name, func(t *testing.T) { @@ -262,7 +267,7 @@ func TestDaos_PoolQueryMaskUnmarshalJSON(t *testing.T) { }, "uint64 value": { testData: []byte("18446744073709551603"), - expString: "rebuild,space", + expString: "rebuild,space,suspect_engines", }, "string values": { testData: []byte("rebuild,disabled_engines"), diff --git a/src/include/daos_pool.h b/src/include/daos_pool.h index 73f443689131..a8ab2e6c6a2e 100644 --- a/src/include/daos_pool.h +++ b/src/include/daos_pool.h @@ -162,6 +162,8 @@ enum daos_pool_info_bit { DPI_ENGINES_ENABLED = 1ULL << 2, /** true to include (in \a ranks) engines with some or all targets disabled (down). */ DPI_ENGINES_DISABLED = 1ULL << 3, + /** true to include (in \a ranks) suspect engines. */ + DPI_ENGINES_SUSPECT = 1ULL << 4, /** query all above optional info */ DPI_ALL = -1, }; diff --git a/src/include/daos_srv/pool.h b/src/include/daos_srv/pool.h index de22d55ed5d6..3403fd049ef6 100644 --- a/src/include/daos_srv/pool.h +++ b/src/include/daos_srv/pool.h @@ -305,8 +305,9 @@ int dsc_pool_svc_delete_acl(uuid_t pool_uuid, d_rank_list_t *ranks, uint64_t dea const char *principal_name); int dsc_pool_svc_query(uuid_t pool_uuid, d_rank_list_t *ps_ranks, uint64_t deadline, - d_rank_list_t **ranks, daos_pool_info_t *pool_info, - uint32_t *pool_layout_ver, uint32_t *upgrade_layout_ver); + d_rank_list_t **enabled_ranks, d_rank_list_t **suspect_ranks, + daos_pool_info_t *pool_info, uint32_t *pool_layout_ver, + uint32_t *upgrade_layout_ver); int dsc_pool_svc_query_target(uuid_t pool_uuid, d_rank_list_t *ps_ranks, uint64_t deadline, d_rank_t rank, uint32_t tgt_idx, daos_target_info_t *ti); diff --git a/src/mgmt/pool.pb-c.c b/src/mgmt/pool.pb-c.c index 3859da6374c3..74817f9ce21c 100644 --- a/src/mgmt/pool.pb-c.c +++ b/src/mgmt/pool.pb-c.c @@ -3292,7 +3292,7 @@ const ProtobufCMessageDescriptor mgmt__pool_rebuild_status__descriptor = (ProtobufCMessageInit) mgmt__pool_rebuild_status__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor mgmt__pool_query_resp__field_descriptors[19] = +static const ProtobufCFieldDescriptor mgmt__pool_query_resp__field_descriptors[20] = { { "status", @@ -3522,6 +3522,18 @@ static const ProtobufCFieldDescriptor mgmt__pool_query_resp__field_descriptors[1 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "suspect_ranks", + 21, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Mgmt__PoolQueryResp, suspect_ranks), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned mgmt__pool_query_resp__field_indices_by_name[] = { 4, /* field[4] = active_targets */ @@ -3535,6 +3547,7 @@ static const unsigned mgmt__pool_query_resp__field_indices_by_name[] = { 6, /* field[6] = rebuild */ 15, /* field[15] = state */ 0, /* field[0] = status */ + 19, /* field[19] = suspect_ranks */ 16, /* field[16] = svc_ldr */ 17, /* field[17] = svc_reps */ 7, /* field[7] = tier_stats */ @@ -3548,7 +3561,7 @@ static const ProtobufCIntRange mgmt__pool_query_resp__number_ranges[2 + 1] = { { 1, 0 }, { 10, 8 }, - { 0, 19 } + { 0, 20 } }; const ProtobufCMessageDescriptor mgmt__pool_query_resp__descriptor = { @@ -3558,7 +3571,7 @@ const ProtobufCMessageDescriptor mgmt__pool_query_resp__descriptor = "Mgmt__PoolQueryResp", "mgmt", sizeof(Mgmt__PoolQueryResp), - 19, + 20, mgmt__pool_query_resp__field_descriptors, mgmt__pool_query_resp__field_indices_by_name, 2, mgmt__pool_query_resp__number_ranges, diff --git a/src/mgmt/pool.pb-c.h b/src/mgmt/pool.pb-c.h index ad8dee1e9d91..96b4b1c55ac6 100644 --- a/src/mgmt/pool.pb-c.h +++ b/src/mgmt/pool.pb-c.h @@ -861,10 +861,14 @@ struct _Mgmt__PoolQueryResp * Bitmask of pool query options used */ uint64_t query_mask; + /* + * optional set of suspect ranks + */ + char *suspect_ranks; }; #define MGMT__POOL_QUERY_RESP__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__pool_query_resp__descriptor) \ - , 0, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, 0, 0, NULL, 0,NULL, 0, 0, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, 0, 0, MGMT__POOL_SERVICE_STATE__Creating, 0, 0,NULL, 0 } + , 0, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, 0, 0, NULL, 0,NULL, 0, 0, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, 0, 0, MGMT__POOL_SERVICE_STATE__Creating, 0, 0,NULL, 0, (char *)protobuf_c_empty_string } typedef enum { diff --git a/src/mgmt/srv_drpc.c b/src/mgmt/srv_drpc.c index de4a951a050c..7a213daa47cb 100644 --- a/src/mgmt/srv_drpc.c +++ b/src/mgmt/srv_drpc.c @@ -394,7 +394,7 @@ static int pool_create_fill_resp(Mgmt__PoolCreateResp *resp, uuid_t uuid, d_rank D_DEBUG(DB_MGMT, "%d service replicas\n", svc_ranks->rl_nr); - rc = ds_mgmt_pool_query(uuid, svc_ranks, &enabled_ranks, &pool_info, NULL, NULL); + rc = ds_mgmt_pool_query(uuid, svc_ranks, &enabled_ranks, NULL, &pool_info, NULL, NULL); if (rc != 0) { D_ERROR("Failed to query created pool: rc=%d\n", rc); D_GOTO(out, rc); @@ -1744,10 +1744,14 @@ ds_mgmt_drpc_pool_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) uuid_t uuid; daos_pool_info_t pool_info = {0}; d_rank_list_t *svc_ranks; - d_rank_list_t *ranks; - d_rank_range_list_t *range_list; + d_rank_list_t *ranks = NULL; + d_rank_range_list_t *range_list = NULL; + d_rank_range_list_t *range_list1 = NULL; + d_rank_list_t *suspect_ranks = NULL; char *range_list_str = NULL; + char *suspect_ranks_str = NULL; bool truncated; + bool truncated1; size_t len; uint8_t *body; @@ -1778,7 +1782,7 @@ ds_mgmt_drpc_pool_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) D_GOTO(out, rc = -DER_NOMEM); pool_info.pi_bits = req->query_mask; - rc = ds_mgmt_pool_query(uuid, svc_ranks, &ranks, &pool_info, &resp.pool_layout_ver, + rc = ds_mgmt_pool_query(uuid, svc_ranks, &ranks, &suspect_ranks, &pool_info, &resp.pool_layout_ver, &resp.upgrade_layout_ver); if (rc != 0) { D_ERROR("Failed to query the pool, rc=%d\n", rc); @@ -1792,9 +1796,19 @@ ds_mgmt_drpc_pool_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) range_list_str = d_rank_range_list_str(range_list, &truncated); if (range_list_str == NULL) D_GOTO(out_ranges, rc = -DER_NOMEM); - D_DEBUG(DB_MGMT, DF_UUID": %s ranks: %s%s\n", DP_UUID(uuid), + range_list1 = d_rank_range_list_create_from_ranks(suspect_ranks); + if (range_list1 == NULL) + D_GOTO(out_suspect, rc = -DER_NOMEM); + suspect_ranks_str = d_rank_range_list_str(range_list1, &truncated1); + if (suspect_ranks_str == NULL) { + DL_ERROR(rc, DF_UUID ": Failed to serialize the list of suspect ranks", + DP_UUID(uuid)); + D_GOTO(out_suspect, rc = -DER_NOMEM); + } + D_DEBUG(DB_MGMT, DF_UUID": %s ranks: %s%s, suspect_ranks: %s%s\n", DP_UUID(uuid), pool_info.pi_bits & DPI_ENGINES_ENABLED ? "ENABLED" : "DISABLED", range_list_str, - truncated ? " ...(TRUNCATED)" : ""); + truncated ? " ...(TRUNCATED)" : "", suspect_ranks_str, + truncated1 ? "...(TRUNCATED)" : ""); /* Populate the response */ resp.query_mask = pool_info.pi_bits; @@ -1809,11 +1823,12 @@ ds_mgmt_drpc_pool_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) resp.version = pool_info.pi_map_ver; resp.enabled_ranks = (req->query_mask & DPI_ENGINES_ENABLED) ? range_list_str : ""; resp.disabled_ranks = (req->query_mask & DPI_ENGINES_DISABLED) ? range_list_str : ""; + if (suspect_ranks_str != NULL) + resp.suspect_ranks = suspect_ranks_str; D_ALLOC_ARRAY(resp.tier_stats, DAOS_MEDIA_MAX); - if (resp.tier_stats == NULL) { - D_GOTO(out_ranges, rc = -DER_NOMEM); - } + if (resp.tier_stats == NULL) + D_GOTO(out_suspect, rc = -DER_NOMEM); storage_usage_stats_from_pool_space(&scm, &pool_info.pi_space, DAOS_MEDIA_SCM); @@ -1828,6 +1843,9 @@ ds_mgmt_drpc_pool_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) pool_rebuild_status_from_info(&rebuild, &pool_info.pi_rebuild_st); resp.rebuild = &rebuild; +out_suspect: + d_rank_range_list_free(range_list1); + d_rank_list_free(suspect_ranks); out_ranges: d_rank_range_list_free(range_list); out_ranks: @@ -1848,6 +1866,7 @@ ds_mgmt_drpc_pool_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) } D_FREE(range_list_str); + D_FREE(suspect_ranks_str); mgmt__pool_query_req__free_unpacked(req, &alloc.alloc); diff --git a/src/mgmt/srv_internal.h b/src/mgmt/srv_internal.h index 88f7e9010080..e843218922aa 100644 --- a/src/mgmt/srv_internal.h +++ b/src/mgmt/srv_internal.h @@ -114,8 +114,8 @@ int ds_mgmt_pool_list_cont(uuid_t uuid, d_rank_list_t *svc_ranks, struct daos_pool_cont_info **containers, uint64_t *ncontainers); int ds_mgmt_pool_query(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_list_t **ranks, - daos_pool_info_t *pool_info, uint32_t *pool_layout_ver, - uint32_t *upgrade_layout_ver); + d_rank_list_t **suspect_ranks, daos_pool_info_t *pool_info, + uint32_t *pool_layout_ver, uint32_t *upgrade_layout_ver); int ds_mgmt_pool_query_targets(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_t rank, d_rank_list_t *tgts, daos_target_info_t **infos); diff --git a/src/mgmt/srv_pool.c b/src/mgmt/srv_pool.c index c1eb018cdb1b..da6f06cb5bf3 100644 --- a/src/mgmt/srv_pool.c +++ b/src/mgmt/srv_pool.c @@ -406,8 +406,8 @@ ds_mgmt_pool_list_cont(uuid_t uuid, d_rank_list_t *svc_ranks, */ int ds_mgmt_pool_query(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_list_t **ranks, - daos_pool_info_t *pool_info, uint32_t *pool_layout_ver, - uint32_t *upgrade_layout_ver) + d_rank_list_t **suspect_ranks, daos_pool_info_t *pool_info, + uint32_t *pool_layout_ver, uint32_t *upgrade_layout_ver) { if (pool_info == NULL) { D_ERROR("pool_info was NULL\n"); @@ -416,8 +416,8 @@ ds_mgmt_pool_query(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_list_t **r D_DEBUG(DB_MGMT, "Querying pool "DF_UUID"\n", DP_UUID(pool_uuid)); - return dsc_pool_svc_query(pool_uuid, svc_ranks, mgmt_ps_call_deadline(), ranks, pool_info, - pool_layout_ver, upgrade_layout_ver); + return dsc_pool_svc_query(pool_uuid, svc_ranks, mgmt_ps_call_deadline(), ranks, + suspect_ranks, pool_info, pool_layout_ver, upgrade_layout_ver); } /** diff --git a/src/mgmt/svc.pb-c.c b/src/mgmt/svc.pb-c.c index 94bb6cf59649..1b7a406c46e3 100644 --- a/src/mgmt/svc.pb-c.c +++ b/src/mgmt/svc.pb-c.c @@ -418,43 +418,50 @@ void mgmt__client_net_hint__free_unpacked assert(message->base.descriptor == &mgmt__client_net_hint__descriptor); protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } -void -mgmt__build_info__init(Mgmt__BuildInfo *message) +void mgmt__build_info__init + (Mgmt__BuildInfo *message) { static const Mgmt__BuildInfo init_value = MGMT__BUILD_INFO__INIT; - *message = init_value; + *message = init_value; } -size_t -mgmt__build_info__get_packed_size(const Mgmt__BuildInfo *message) +size_t mgmt__build_info__get_packed_size + (const Mgmt__BuildInfo *message) { assert(message->base.descriptor == &mgmt__build_info__descriptor); - return protobuf_c_message_get_packed_size((const ProtobufCMessage *)(message)); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); } -size_t -mgmt__build_info__pack(const Mgmt__BuildInfo *message, uint8_t *out) +size_t mgmt__build_info__pack + (const Mgmt__BuildInfo *message, + uint8_t *out) { assert(message->base.descriptor == &mgmt__build_info__descriptor); - return protobuf_c_message_pack((const ProtobufCMessage *)message, out); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); } -size_t -mgmt__build_info__pack_to_buffer(const Mgmt__BuildInfo *message, ProtobufCBuffer *buffer) +size_t mgmt__build_info__pack_to_buffer + (const Mgmt__BuildInfo *message, + ProtobufCBuffer *buffer) { assert(message->base.descriptor == &mgmt__build_info__descriptor); - return protobuf_c_message_pack_to_buffer((const ProtobufCMessage *)message, buffer); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); } Mgmt__BuildInfo * -mgmt__build_info__unpack(ProtobufCAllocator *allocator, size_t len, const uint8_t *data) + mgmt__build_info__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) { - return (Mgmt__BuildInfo *)protobuf_c_message_unpack(&mgmt__build_info__descriptor, allocator, len, - data); + return (Mgmt__BuildInfo *) + protobuf_c_message_unpack (&mgmt__build_info__descriptor, + allocator, len, data); } -void -mgmt__build_info__free_unpacked(Mgmt__BuildInfo *message, ProtobufCAllocator *allocator) +void mgmt__build_info__free_unpacked + (Mgmt__BuildInfo *message, + ProtobufCAllocator *allocator) { - if (!message) + if(!message) return; assert(message->base.descriptor == &mgmt__build_info__descriptor); - protobuf_c_message_free_unpacked((ProtobufCMessage *)message, allocator); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } void mgmt__get_attach_info_resp__rank_uri__init (Mgmt__GetAttachInfoResp__RankUri *message) @@ -1604,51 +1611,82 @@ const ProtobufCMessageDescriptor mgmt__client_net_hint__descriptor = (ProtobufCMessageInit) mgmt__client_net_hint__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor mgmt__build_info__field_descriptors[4] = { - { - "major", 1, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_UINT32, 0, /* quantifier_offset */ - offsetof(Mgmt__BuildInfo, major), NULL, NULL, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, - { - "minor", 2, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_UINT32, 0, /* quantifier_offset */ - offsetof(Mgmt__BuildInfo, minor), NULL, NULL, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, - { - "patch", 3, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_UINT32, 0, /* quantifier_offset */ - offsetof(Mgmt__BuildInfo, patch), NULL, NULL, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, - { - "tag", 4, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_STRING, 0, /* quantifier_offset */ - offsetof(Mgmt__BuildInfo, tag), NULL, &protobuf_c_empty_string, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, -}; -static const unsigned mgmt__build_info__field_indices_by_name[] = { - 0, /* field[0] = major */ - 1, /* field[1] = minor */ - 2, /* field[2] = patch */ - 3, /* field[3] = tag */ -}; -static const ProtobufCIntRange mgmt__build_info__number_ranges[1 + 1] = {{1, 0}, {0, 4}}; -const ProtobufCMessageDescriptor mgmt__build_info__descriptor = { - PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, - "mgmt.BuildInfo", - "BuildInfo", - "Mgmt__BuildInfo", - "mgmt", - sizeof(Mgmt__BuildInfo), - 4, - mgmt__build_info__field_descriptors, - mgmt__build_info__field_indices_by_name, +static const ProtobufCFieldDescriptor mgmt__build_info__field_descriptors[4] = +{ + { + "major", 1, - mgmt__build_info__number_ranges, - (ProtobufCMessageInit)mgmt__build_info__init, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__BuildInfo, major), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "minor", + 2, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__BuildInfo, minor), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "patch", + 3, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__BuildInfo, patch), NULL, NULL, - NULL /* reserved[123] */ + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "tag", + 4, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Mgmt__BuildInfo, tag), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__build_info__field_indices_by_name[] = { + 0, /* field[0] = major */ + 1, /* field[1] = minor */ + 2, /* field[2] = patch */ + 3, /* field[3] = tag */ +}; +static const ProtobufCIntRange mgmt__build_info__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 4 } +}; +const ProtobufCMessageDescriptor mgmt__build_info__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.BuildInfo", + "BuildInfo", + "Mgmt__BuildInfo", + "mgmt", + sizeof(Mgmt__BuildInfo), + 4, + mgmt__build_info__field_descriptors, + mgmt__build_info__field_indices_by_name, + 1, mgmt__build_info__number_ranges, + (ProtobufCMessageInit) mgmt__build_info__init, + NULL,NULL,NULL /* reserved[123] */ }; static const ProtobufCFieldDescriptor mgmt__get_attach_info_resp__rank_uri__field_descriptors[4] = { @@ -1727,91 +1765,147 @@ const ProtobufCMessageDescriptor mgmt__get_attach_info_resp__rank_uri__descripto (ProtobufCMessageInit) mgmt__get_attach_info_resp__rank_uri__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor mgmt__get_attach_info_resp__field_descriptors[9] = { - { - "status", 1, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_INT32, 0, /* quantifier_offset */ - offsetof(Mgmt__GetAttachInfoResp, status), NULL, NULL, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, - { - "rank_uris", 2, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE, - offsetof(Mgmt__GetAttachInfoResp, n_rank_uris), - offsetof(Mgmt__GetAttachInfoResp, rank_uris), - &mgmt__get_attach_info_resp__rank_uri__descriptor, NULL, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, - { - "ms_ranks", 3, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_UINT32, - offsetof(Mgmt__GetAttachInfoResp, n_ms_ranks), offsetof(Mgmt__GetAttachInfoResp, ms_ranks), - NULL, NULL, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, - { - "client_net_hint", 4, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_MESSAGE, - 0, /* quantifier_offset */ - offsetof(Mgmt__GetAttachInfoResp, client_net_hint), &mgmt__client_net_hint__descriptor, - NULL, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, - { - "data_version", 5, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_UINT64, 0, /* quantifier_offset */ - offsetof(Mgmt__GetAttachInfoResp, data_version), NULL, NULL, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, - { - "sys", 6, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_STRING, 0, /* quantifier_offset */ - offsetof(Mgmt__GetAttachInfoResp, sys), NULL, &protobuf_c_empty_string, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, - { - "secondary_rank_uris", 7, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE, - offsetof(Mgmt__GetAttachInfoResp, n_secondary_rank_uris), - offsetof(Mgmt__GetAttachInfoResp, secondary_rank_uris), - &mgmt__get_attach_info_resp__rank_uri__descriptor, NULL, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, - { - "secondary_client_net_hints", 8, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE, - offsetof(Mgmt__GetAttachInfoResp, n_secondary_client_net_hints), - offsetof(Mgmt__GetAttachInfoResp, secondary_client_net_hints), - &mgmt__client_net_hint__descriptor, NULL, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, - { - "build_info", 9, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_MESSAGE, 0, /* quantifier_offset */ - offsetof(Mgmt__GetAttachInfoResp, build_info), &mgmt__build_info__descriptor, NULL, - 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, -}; -static const unsigned mgmt__get_attach_info_resp__field_indices_by_name[] = { - 8, /* field[8] = build_info */ - 3, /* field[3] = client_net_hint */ - 4, /* field[4] = data_version */ - 2, /* field[2] = ms_ranks */ - 1, /* field[1] = rank_uris */ - 7, /* field[7] = secondary_client_net_hints */ - 6, /* field[6] = secondary_rank_uris */ - 0, /* field[0] = status */ - 5, /* field[5] = sys */ -}; -static const ProtobufCIntRange mgmt__get_attach_info_resp__number_ranges[1 + 1] = {{1, 0}, {0, 9}}; -const ProtobufCMessageDescriptor mgmt__get_attach_info_resp__descriptor = { - PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, - "mgmt.GetAttachInfoResp", - "GetAttachInfoResp", - "Mgmt__GetAttachInfoResp", - "mgmt", - sizeof(Mgmt__GetAttachInfoResp), - 9, - mgmt__get_attach_info_resp__field_descriptors, - mgmt__get_attach_info_resp__field_indices_by_name, +static const ProtobufCFieldDescriptor mgmt__get_attach_info_resp__field_descriptors[9] = +{ + { + "status", 1, - mgmt__get_attach_info_resp__number_ranges, - (ProtobufCMessageInit)mgmt__get_attach_info_resp__init, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_INT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__GetAttachInfoResp, status), NULL, NULL, - NULL /* reserved[123] */ + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "rank_uris", + 2, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Mgmt__GetAttachInfoResp, n_rank_uris), + offsetof(Mgmt__GetAttachInfoResp, rank_uris), + &mgmt__get_attach_info_resp__rank_uri__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "ms_ranks", + 3, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_UINT32, + offsetof(Mgmt__GetAttachInfoResp, n_ms_ranks), + offsetof(Mgmt__GetAttachInfoResp, ms_ranks), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "client_net_hint", + 4, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_MESSAGE, + 0, /* quantifier_offset */ + offsetof(Mgmt__GetAttachInfoResp, client_net_hint), + &mgmt__client_net_hint__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "data_version", + 5, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT64, + 0, /* quantifier_offset */ + offsetof(Mgmt__GetAttachInfoResp, data_version), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "sys", + 6, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Mgmt__GetAttachInfoResp, sys), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "secondary_rank_uris", + 7, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Mgmt__GetAttachInfoResp, n_secondary_rank_uris), + offsetof(Mgmt__GetAttachInfoResp, secondary_rank_uris), + &mgmt__get_attach_info_resp__rank_uri__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "secondary_client_net_hints", + 8, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Mgmt__GetAttachInfoResp, n_secondary_client_net_hints), + offsetof(Mgmt__GetAttachInfoResp, secondary_client_net_hints), + &mgmt__client_net_hint__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "build_info", + 9, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_MESSAGE, + 0, /* quantifier_offset */ + offsetof(Mgmt__GetAttachInfoResp, build_info), + &mgmt__build_info__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__get_attach_info_resp__field_indices_by_name[] = { + 8, /* field[8] = build_info */ + 3, /* field[3] = client_net_hint */ + 4, /* field[4] = data_version */ + 2, /* field[2] = ms_ranks */ + 1, /* field[1] = rank_uris */ + 7, /* field[7] = secondary_client_net_hints */ + 6, /* field[6] = secondary_rank_uris */ + 0, /* field[0] = status */ + 5, /* field[5] = sys */ +}; +static const ProtobufCIntRange mgmt__get_attach_info_resp__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 9 } +}; +const ProtobufCMessageDescriptor mgmt__get_attach_info_resp__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.GetAttachInfoResp", + "GetAttachInfoResp", + "Mgmt__GetAttachInfoResp", + "mgmt", + sizeof(Mgmt__GetAttachInfoResp), + 9, + mgmt__get_attach_info_resp__field_descriptors, + mgmt__get_attach_info_resp__field_indices_by_name, + 1, mgmt__get_attach_info_resp__number_ranges, + (ProtobufCMessageInit) mgmt__get_attach_info_resp__init, + NULL,NULL,NULL /* reserved[123] */ }; static const ProtobufCFieldDescriptor mgmt__prep_shutdown_req__field_descriptors[1] = { diff --git a/src/mgmt/svc.pb-c.h b/src/mgmt/svc.pb-c.h index 3405ed7c9556..82610110e5a6 100644 --- a/src/mgmt/svc.pb-c.h +++ b/src/mgmt/svc.pb-c.h @@ -25,7 +25,7 @@ typedef struct _Mgmt__LeaderQueryReq Mgmt__LeaderQueryReq; typedef struct _Mgmt__LeaderQueryResp Mgmt__LeaderQueryResp; typedef struct _Mgmt__GetAttachInfoReq Mgmt__GetAttachInfoReq; typedef struct _Mgmt__ClientNetHint Mgmt__ClientNetHint; -typedef struct _Mgmt__BuildInfo Mgmt__BuildInfo; +typedef struct _Mgmt__BuildInfo Mgmt__BuildInfo; typedef struct _Mgmt__GetAttachInfoResp Mgmt__GetAttachInfoResp; typedef struct _Mgmt__GetAttachInfoResp__RankUri Mgmt__GetAttachInfoResp__RankUri; typedef struct _Mgmt__PrepShutdownReq Mgmt__PrepShutdownReq; @@ -301,18 +301,19 @@ struct _Mgmt__ClientNetHint { PROTOBUF_C_MESSAGE_INIT (&mgmt__client_net_hint__descriptor) \ , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, 0, 0, 0,NULL, 0 } -struct _Mgmt__BuildInfo { + +struct _Mgmt__BuildInfo +{ ProtobufCMessage base; - uint32_t major; - uint32_t minor; - uint32_t patch; - char *tag; + uint32_t major; + uint32_t minor; + uint32_t patch; + char *tag; }; -#define MGMT__BUILD_INFO__INIT \ - { \ - PROTOBUF_C_MESSAGE_INIT(&mgmt__build_info__descriptor) \ - , 0, 0, 0, (char *)protobuf_c_empty_string \ - } +#define MGMT__BUILD_INFO__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__build_info__descriptor) \ + , 0, 0, 0, (char *)protobuf_c_empty_string } + struct _Mgmt__GetAttachInfoResp__RankUri { @@ -373,13 +374,12 @@ struct _Mgmt__GetAttachInfoResp /* * Structured server build information */ - Mgmt__BuildInfo *build_info; + Mgmt__BuildInfo *build_info; }; -#define MGMT__GET_ATTACH_INFO_RESP__INIT \ - { \ - PROTOBUF_C_MESSAGE_INIT(&mgmt__get_attach_info_resp__descriptor) \ - , 0, 0, NULL, 0, NULL, NULL, 0, (char *)protobuf_c_empty_string, 0, NULL, 0, NULL, NULL \ - } +#define MGMT__GET_ATTACH_INFO_RESP__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__get_attach_info_resp__descriptor) \ + , 0, 0,NULL, 0,NULL, NULL, 0, (char *)protobuf_c_empty_string, 0,NULL, 0,NULL, NULL } + struct _Mgmt__PrepShutdownReq { @@ -662,18 +662,24 @@ void mgmt__client_net_hint__free_unpacked (Mgmt__ClientNetHint *message, ProtobufCAllocator *allocator); /* Mgmt__BuildInfo methods */ -void -mgmt__build_info__init(Mgmt__BuildInfo *message); -size_t -mgmt__build_info__get_packed_size(const Mgmt__BuildInfo *message); -size_t -mgmt__build_info__pack(const Mgmt__BuildInfo *message, uint8_t *out); -size_t -mgmt__build_info__pack_to_buffer(const Mgmt__BuildInfo *message, ProtobufCBuffer *buffer); +void mgmt__build_info__init + (Mgmt__BuildInfo *message); +size_t mgmt__build_info__get_packed_size + (const Mgmt__BuildInfo *message); +size_t mgmt__build_info__pack + (const Mgmt__BuildInfo *message, + uint8_t *out); +size_t mgmt__build_info__pack_to_buffer + (const Mgmt__BuildInfo *message, + ProtobufCBuffer *buffer); Mgmt__BuildInfo * -mgmt__build_info__unpack(ProtobufCAllocator *allocator, size_t len, const uint8_t *data); -void - mgmt__build_info__free_unpacked(Mgmt__BuildInfo *message, ProtobufCAllocator *allocator); + mgmt__build_info__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void mgmt__build_info__free_unpacked + (Mgmt__BuildInfo *message, + ProtobufCAllocator *allocator); /* Mgmt__GetAttachInfoResp__RankUri methods */ void mgmt__get_attach_info_resp__rank_uri__init (Mgmt__GetAttachInfoResp__RankUri *message); @@ -842,7 +848,9 @@ typedef void (*Mgmt__GetAttachInfoReq_Closure) typedef void (*Mgmt__ClientNetHint_Closure) (const Mgmt__ClientNetHint *message, void *closure_data); -typedef void (*Mgmt__BuildInfo_Closure)(const Mgmt__BuildInfo *message, void *closure_data); +typedef void (*Mgmt__BuildInfo_Closure) + (const Mgmt__BuildInfo *message, + void *closure_data); typedef void (*Mgmt__GetAttachInfoResp__RankUri_Closure) (const Mgmt__GetAttachInfoResp__RankUri *message, void *closure_data); diff --git a/src/mgmt/tests/mocks.c b/src/mgmt/tests/mocks.c index eace651e60b6..42803b0d52bc 100644 --- a/src/mgmt/tests/mocks.c +++ b/src/mgmt/tests/mocks.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2019-2023 Intel Corporation. + * (C) Copyright 2019-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -280,11 +280,12 @@ daos_pool_info_t ds_mgmt_pool_query_info_out; daos_pool_info_t ds_mgmt_pool_query_info_in; void *ds_mgmt_pool_query_info_ptr; d_rank_list_t *ds_mgmt_pool_query_ranks_out; +d_rank_list_t *ds_mgmt_pool_query_suspect_ranks_out; int ds_mgmt_pool_query(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_list_t **ranks, - daos_pool_info_t *pool_info, uint32_t *pool_layout_ver, - uint32_t *upgrade_layout_ver) + d_rank_list_t **suspect_ranks, daos_pool_info_t *pool_info, + uint32_t *pool_layout_ver, uint32_t *upgrade_layout_ver) { /* If function is to return with an error, pool_info and ranks will not be filled. */ if (ds_mgmt_pool_query_return != 0) @@ -300,6 +301,13 @@ ds_mgmt_pool_query(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_list_t **r *ranks = d_rank_list_alloc(8); /* 0-7 ; caller must free this */ ds_mgmt_pool_query_ranks_out = *ranks; } + if ((pool_info->pi_bits & DPI_ENGINES_SUSPECT) != 0) { + D_ASSERT(suspect_ranks != NULL); + + *suspect_ranks = d_rank_list_alloc(2); /* 0-1 ; caller must free this */ + ds_mgmt_pool_query_suspect_ranks_out = *suspect_ranks; + } + return ds_mgmt_pool_query_return; /* 0 */ } @@ -311,6 +319,7 @@ mock_ds_mgmt_pool_query_setup(void) ds_mgmt_pool_query_info_ptr = NULL; memset(&ds_mgmt_pool_query_info_out, 0, sizeof(daos_pool_info_t)); ds_mgmt_pool_query_ranks_out = NULL; + ds_mgmt_pool_query_suspect_ranks_out = NULL; } int ds_mgmt_pool_query_targets_return; diff --git a/src/mgmt/tests/mocks.h b/src/mgmt/tests/mocks.h index 4f4ddbc9522b..53f6af95e56e 100644 --- a/src/mgmt/tests/mocks.h +++ b/src/mgmt/tests/mocks.h @@ -1,5 +1,5 @@ /* - * (C) Copyright 2019-2022 Intel Corporation. + * (C) Copyright 2019-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -109,6 +109,7 @@ extern daos_pool_info_t ds_mgmt_pool_query_info_in; extern daos_pool_info_t ds_mgmt_pool_query_info_out; extern void *ds_mgmt_pool_query_info_ptr; extern d_rank_list_t *ds_mgmt_pool_query_ranks_out; +extern d_rank_list_t *ds_mgmt_pool_query_suspect_ranks_out; void mock_ds_mgmt_pool_query_setup(void); /* diff --git a/src/mgmt/tests/srv_drpc_tests.c b/src/mgmt/tests/srv_drpc_tests.c index 599eb8db77e0..5d22f34deb2a 100644 --- a/src/mgmt/tests/srv_drpc_tests.c +++ b/src/mgmt/tests/srv_drpc_tests.c @@ -1429,7 +1429,8 @@ test_drpc_pool_query_success(void **state) init_test_rebuild_status(&exp_info.pi_rebuild_st); ds_mgmt_pool_query_info_out = exp_info; - setup_pool_query_drpc_call(&call, TEST_UUID, DPI_ENGINES_ENABLED); + setup_pool_query_drpc_call(&call, TEST_UUID, + DPI_ENGINES_ENABLED | DPI_ENGINES_SUSPECT); ds_mgmt_drpc_pool_query(&call, &resp); @@ -1439,8 +1440,9 @@ test_drpc_pool_query_success(void **state) assert_int_equal(uuid_compare(exp_uuid, ds_mgmt_pool_query_uuid), 0); assert_non_null(ds_mgmt_pool_query_info_ptr); assert_non_null(ds_mgmt_pool_query_ranks_out); - assert_int_equal(ds_mgmt_pool_query_info_in.pi_bits, - DEFAULT_QUERY_BITS | DPI_ENGINES_ENABLED); + assert_non_null(ds_mgmt_pool_query_suspect_ranks_out); + uint64_t flags = DPI_ENGINES_ENABLED | DPI_ENGINES_SUSPECT; + assert_int_equal(ds_mgmt_pool_query_info_in.pi_bits, flags | DEFAULT_QUERY_BITS); expect_query_resp_with_info(&exp_info, MGMT__POOL_REBUILD_STATUS__STATE__IDLE, diff --git a/src/pool/srv_cli.c b/src/pool/srv_cli.c index 1decb57b3ec5..855900c0c9de 100644 --- a/src/pool/srv_cli.c +++ b/src/pool/srv_cli.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2017-2023 Intel Corporation. + * (C) Copyright 2017-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -336,6 +336,7 @@ dsc_pool_svc_call(uuid_t uuid, d_rank_list_t *ranks, struct dsc_pool_svc_call_cb struct pool_query_arg { d_rank_list_t **pqa_ranks; + d_rank_list_t **pqa_suspect_ranks; daos_pool_info_t *pqa_info; uint32_t *pqa_layout_ver; uint32_t *pqa_upgrade_layout_ver; @@ -367,13 +368,63 @@ pool_query_init(uuid_t pool_uuid, crt_rpc_t *rpc, void *varg) } static int -process_query_result(d_rank_list_t **ranks, daos_pool_info_t *info, uuid_t pool_uuid, +pool_map_get_suspect_ranks(struct pool_map *map, d_rank_list_t **ranks) +{ + crt_group_t *primary_grp; + struct pool_domain *doms; + int doms_cnt; + int i; + int rc = 0; + d_rank_list_t *rank_list = NULL; + + doms_cnt = pool_map_find_ranks(map, PO_COMP_ID_ALL, &doms); + D_ASSERT(doms_cnt >= 0); + primary_grp = crt_group_lookup(NULL); + D_ASSERT(primary_grp != NULL); + + rank_list = d_rank_list_alloc(0); + if (!rank_list) + return -DER_NOMEM; + + for (i = 0; i < doms_cnt; i++) { + struct swim_member_state state; + + if (!(doms[i].do_comp.co_status & PO_COMP_ST_UPIN)) + continue; + + rc = crt_rank_state_get(primary_grp, doms[i].do_comp.co_rank, &state); + if (rc != 0 && rc != -DER_NONEXIST) { + D_ERROR("failed to get status of rank %u: %d\n", doms[i].do_comp.co_rank, + rc); + break; + } + + D_DEBUG(DB_MD, "rank/state %d/%d\n", doms[i].do_comp.co_rank, + rc == -DER_NONEXIST ? -1 : state.sms_status); + if (rc == -DER_NONEXIST || state.sms_status == SWIM_MEMBER_DEAD) { + rc = d_rank_list_append(rank_list, doms[i].do_comp.co_rank); + if (rc) + D_GOTO(err, rc); + } + } +err: + if (rc == 0) + *ranks = rank_list; + else + d_rank_list_free(rank_list); + return rc; +} + +static int +process_query_result(d_rank_list_t **ranks, d_rank_list_t **suspect_ranks, + daos_pool_info_t *info, uuid_t pool_uuid, uint32_t map_version, uint32_t leader_rank, struct daos_pool_space *ps, struct daos_rebuild_status *rs, struct pool_buf *map_buf) { struct pool_map *map; int rc; unsigned int num_disabled = 0; + d_rank_list_t *suspect_rank_list = NULL; rc = pool_map_create(map_buf, map_version, &map); if (rc != 0) { @@ -402,10 +453,30 @@ process_query_result(d_rank_list_t **ranks, daos_pool_info_t *info, uuid_t pool_ D_DEBUG(DB_MD, DF_UUID": found %u %s ranks in pool map\n", DP_UUID(pool_uuid), (*ranks)->rl_nr, get_enabled ? "ENABLED" : "DISABLED"); } + if (info && (info->pi_bits & DPI_ENGINES_SUSPECT) != 0) { + if (suspect_ranks == NULL) { + DL_ERROR(-DER_INVAL, + DF_UUID ": query pool requested suspect ranks, but ptr is NULL", + DP_UUID(pool_uuid)); + D_GOTO(out, rc = -DER_INVAL); + } + + rc = pool_map_get_suspect_ranks(map, &suspect_rank_list); + if (rc != 0) { + DL_ERROR(rc, DF_UUID ": pool_map_get_ranks() failed", DP_UUID(pool_uuid)); + D_GOTO(out, rc); + } + D_DEBUG(DB_MD, DF_UUID ": found %" PRIu32 " suspect ranks in pool map\n", + DP_UUID(pool_uuid), suspect_rank_list->rl_nr); + } pool_query_reply_to_info(pool_uuid, map_buf, map_version, leader_rank, ps, rs, info); out: + if (rc == 0 && suspect_rank_list != NULL) + *suspect_ranks = suspect_rank_list; + else if (rc) + d_rank_list_free(suspect_rank_list); pool_map_decref(map); return rc; } @@ -432,8 +503,8 @@ pool_query_consume(uuid_t pool_uuid, crt_rpc_t *rpc, void *varg) D_DEBUG(DB_MGMT, DF_UUID": Successfully queried pool\n", DP_UUID(pool_uuid)); - rc = process_query_result(arg->pqa_ranks, arg->pqa_info, pool_uuid, - out->pqo_op.po_map_version, out->pqo_op.po_hint.sh_rank, + rc = process_query_result(arg->pqa_ranks, arg->pqa_suspect_ranks, arg->pqa_info, + pool_uuid, out->pqo_op.po_map_version, out->pqo_op.po_hint.sh_rank, &out->pqo_space, &out->pqo_rebuild_st, arg->pqa_map_buf); if (arg->pqa_layout_ver) *arg->pqa_layout_ver = out->pqo_pool_layout_ver; @@ -474,6 +545,8 @@ static struct dsc_pool_svc_call_cbs pool_query_cbs = { * targets according to #pi_bits (DPI_ENGINES_ENABLED bit). * Note: ranks may be empty (i.e., *ranks->rl_nr may be 0). * The caller must free the list with d_rank_list_free(). + * \param[out suspect_ranks Optional, suspect ranks marked as DEAD by the SWIM + * protocol, but were not excluded from the system. * \param[out] pool_info Results of the pool query * \param[out] pool_layout_ver Results of the current pool global version * \param[out] pool_upgrade_layout_ver Results of the target latest pool global version @@ -484,11 +557,13 @@ static struct dsc_pool_svc_call_cbs pool_query_cbs = { */ int dsc_pool_svc_query(uuid_t pool_uuid, d_rank_list_t *ps_ranks, uint64_t deadline, - d_rank_list_t **ranks, daos_pool_info_t *pool_info, uint32_t *pool_layout_ver, + d_rank_list_t **ranks, d_rank_list_t **suspect_ranks, + daos_pool_info_t *pool_info, uint32_t *pool_layout_ver, uint32_t *upgrade_layout_ver) { struct pool_query_arg arg = { .pqa_ranks = ranks, + .pqa_suspect_ranks = suspect_ranks, .pqa_info = pool_info, .pqa_layout_ver = pool_layout_ver, .pqa_upgrade_layout_ver = upgrade_layout_ver, diff --git a/src/proto/mgmt/pool.proto b/src/proto/mgmt/pool.proto index d54ceca42802..8b6361f594d8 100644 --- a/src/proto/mgmt/pool.proto +++ b/src/proto/mgmt/pool.proto @@ -238,6 +238,7 @@ message PoolQueryResp { uint32 svc_ldr = 18; // current raft leader (2.6+) repeated uint32 svc_reps = 19; // service replica ranks uint64 query_mask = 20; // Bitmask of pool query options used + string suspect_ranks = 21; // optional set of suspect ranks } message PoolProperty { diff --git a/src/tests/ftest/control/dmg_pool_query_ranks.py b/src/tests/ftest/control/dmg_pool_query_ranks.py index cb23d2c5af13..f9f1da775a90 100644 --- a/src/tests/ftest/control/dmg_pool_query_ranks.py +++ b/src/tests/ftest/control/dmg_pool_query_ranks.py @@ -38,38 +38,24 @@ def test_pool_query_ranks_basic(self): """ self.log.info("Basic tests of pool query with ranks state") - self.log.debug("Checking without ranks state information") + self.log_step("Checking pool query without ranks state information") data = self.dmg.pool_query(self.pool.identifier) - self.assertIsNone( - data['response'].get('enabled_ranks'), - "Invalid enabled_ranks field: want=None, got={}".format( - data['response'].get('enabled_ranks'))) - self.assertIsNone( - data['response'].get('disabled_ranks'), - "Invalid disabled_ranks field: want=None, got={}".format( - data['response'].get('disabled_ranks'))) - - self.log.debug("Checking enabled ranks state information") + + self._verify_ranks(None, data, "enabled_ranks") + self._verify_ranks(None, data, "disabled_ranks") + + self.log_step("Checking pool query with enabled ranks state information") data = self.dmg.pool_query(self.pool.identifier, show_enabled=True) - self.assertListEqual( - data['response'].get('enabled_ranks'), [0, 1, 2], - "Invalid enabled_ranks field: want=[0, 1, 2], got={}".format( - data['response'].get('enabled_ranks'))) - self.assertIsNone( - data['response'].get('disabled_ranks'), - "Invalid disabled_ranks field: want=None, got={}".format( - data['response'].get('disabled_ranks'))) - - self.log.debug("Checking disabled ranks state information") + self._verify_ranks([0, 1, 2, 3, 4], data, "enabled_ranks") + self._verify_ranks(None, data, "disabled_ranks") + + self.log_step("Checking pool query with suspect ranks state information") + data = self.dmg.pool_query(self.pool.identifier, health_only=True) + self._verify_ranks([], data, "suspect_ranks") + data = self.dmg.pool_query(self.pool.identifier, show_disabled=True) - self.assertIsNone( - data['response'].get('enabled_ranks'), - "Invalid enabled_ranks field: want=None, got={}".format( - data['response'].get('enabled_ranks'))) - self.assertListEqual( - data['response'].get('disabled_ranks'), [], - "Invalid disabled_ranks field: want=[], got={}".format( - data['response'].get('disabled_ranks'))) + self._verify_ranks(None, data, "enabled_ranks") + self._verify_ranks([], data, "disabled_ranks") def test_pool_query_ranks_error(self): """Test that ranks state option are mutually exclusive. @@ -97,9 +83,10 @@ def test_pool_query_ranks_mgmt(self): """Test the state of ranks after excluding and reintegrate them. Test Description: - Create a pool with some engines exclude them one by one and check the consistency of the - list of enabled and disabled ranks. Then, reintegrate them and check the consistency of - the list of enabled and disabled ranks. + Create a pool with 5 engines, first excluded engine marked as "Disabled" + second stopped one as “Suspect,” restarting it, ensuring rebuild completes, + clearing the “Suspect” status, reintegrating the excluded first engine, and + finally verifying that all engines are enabled with the excluded rank now empty. :avocado: tags=all,daily_regression :avocado: tags=vm @@ -113,64 +100,73 @@ def test_pool_query_ranks_mgmt(self): all_ranks = enabled_ranks.copy() self.random.shuffle(all_ranks) - self.log.info("Starting excluding ranks: all_ranks=%s", all_ranks) - for rank in all_ranks: - self.log.debug("Excluding rank %d", rank) - self.pool.exclude([rank]) - enabled_ranks.remove(rank) - disabled_ranks = sorted(disabled_ranks + [rank]) - - self.log.debug("Checking enabled ranks state information") - data = self.dmg.pool_query(self.pool.identifier, show_enabled=True) - self.assertListEqual( - data['response'].get('enabled_ranks'), enabled_ranks, - "Invalid enabled_ranks field: want={}, got={}".format( - enabled_ranks, data['response'].get('enabled_ranks'))) + exclude_rank = all_ranks[0] + suspect_rank = all_ranks[1] + self.log_step(f"Excluding pool rank:{exclude_rank} all_ranks={all_ranks}") + self.pool.exclude([exclude_rank]) + enabled_ranks.remove(exclude_rank) + disabled_ranks = sorted(disabled_ranks + [exclude_rank]) + + self.log_step("Checking enabled ranks state information") + data = self.dmg.pool_query(self.pool.identifier, show_enabled=True) + self._verify_ranks(enabled_ranks, data, "enabled_ranks") - self.log.debug("Checking disabled ranks state information") - data = self.dmg.pool_query(self.pool.identifier, show_disabled=True) - self.assertListEqual( - data['response'].get('disabled_ranks'), disabled_ranks, - "Invalid disabled_ranks field: want={}, got={}".format( - disabled_ranks, data['response'].get('disabled_ranks'))) + self.log_step(f"Waiting for rebuild to start after excluding pool rank {exclude_rank}") + self.pool.wait_for_rebuild_to_start() - self.log.debug("Waiting for pool to be rebuild") - self.pool.wait_for_rebuild_to_start() - self.pool.wait_for_rebuild_to_end() + # kill second rank. + self.log_step(f"Stopping rank:{suspect_rank} all_ranks={all_ranks}") + self.server_managers[0].stop_ranks([suspect_rank], self.d_log) - self.random.shuffle(all_ranks) - self.log.info("Starting reintegrating ranks: all_ranks=%s", all_ranks) - for rank in all_ranks: - self.log.debug("Reintegrating rank %d", rank) - - cmd_succeed = False - for _ in range(3): - try: - result = self.pool.reintegrate(rank) - cmd_succeed = True - break - except CommandFailure: - self.log.debug("dmg command failed retry") - time.sleep(3) - - self.assertTrue(cmd_succeed, "pool reintegrate failed: {}".format(result)) - enabled_ranks = sorted(enabled_ranks + [rank]) - disabled_ranks.remove(rank) - - self.log.debug("Checking enabled ranks state information") - data = self.dmg.pool_query(self.pool.identifier, show_enabled=True) - self.assertListEqual( - data['response'].get('enabled_ranks'), enabled_ranks, - "Invalid enabled_ranks field: want={}, got={}".format( - enabled_ranks, data['response'].get('enabled_ranks'))) + self.log_step(f"Waiting for pool rank {suspect_rank} to be suspected") + self.pool.wait_pool_suspect_ranks([suspect_rank], timeout=30) + data = self.dmg.pool_query(self.pool.identifier, show_disabled=True) + self._verify_ranks([disabled_ranks], data, "disabled_ranks") - self.log.debug("Checking disabled ranks state information") - data = self.dmg.pool_query(self.pool.identifier, show_disabled=True) - self.assertListEqual( - data['response'].get('disabled_ranks'), disabled_ranks, - "Invalid disabled_ranks field: want={}, got={}".format( - disabled_ranks, data['response'].get('disabled_ranks'))) + self.log_step(f"Starting rank {suspect_rank}") + self.server_managers[0].start_ranks([suspect_rank], self.d_log) - self.log.debug("Waiting for pool to be rebuild") - self.pool.wait_for_rebuild_to_start() - self.pool.wait_for_rebuild_to_end() + self.log_step("Waiting for pool ranks to no longer be suspected") + self.pool.wait_pool_suspect_ranks([], timeout=30) + + self.log_step("Waiting for rebuild to complete") + self.pool.wait_for_rebuild_to_end() + + self.log_step(f"Reintegrating rank {exclude_rank}") + cmd_succeed = False + for _ in range(3): + try: + self.pool.reintegrate(exclude_rank) + cmd_succeed = True + break + except CommandFailure: + self.log.debug("dmg command failed retry") + time.sleep(3) + + self.assertTrue(cmd_succeed, "pool reintegrate failed") + self.log_step(f"Waiting for rebuild to complete after reintegrating rank {exclude_rank}") + self.pool.wait_for_rebuild_to_start() + self.pool.wait_for_rebuild_to_end() + + enabled_ranks = sorted(enabled_ranks + [exclude_rank]) + disabled_ranks.remove(exclude_rank) + + self.log_step("Checking enabled ranks state information") + data = self.dmg.pool_query(self.pool.identifier, show_enabled=True) + self._verify_ranks([enabled_ranks], data, "enabled_ranks") + + def _verify_ranks(self, expect, data, key): + """Verify the expected and actual rank lists are equal. + + Args: + expect (list): list of ranks to expect + data (dict): dmg json response containing actual list of ranks + key (str): the dmg json response key used to access the actual list of ranks + """ + actual = data["response"].get(key) + if expect is None: + self.assertIsNone(actual, f"Invalid {key} field: want=None, got={actual}") + else: + self.assertListEqual( + actual, expect, f"Invalid {key} field: want={expect}, got={actual}") + self.log.debug("Check of %s passed: %s == %s", key, expect, actual) diff --git a/src/tests/ftest/control/dmg_pool_query_ranks.yaml b/src/tests/ftest/control/dmg_pool_query_ranks.yaml index 78edac3bd2ce..6e4f7eae7ea7 100644 --- a/src/tests/ftest/control/dmg_pool_query_ranks.yaml +++ b/src/tests/ftest/control/dmg_pool_query_ranks.yaml @@ -1,16 +1,21 @@ hosts: - test_servers: 3 + test_servers: 5 timeouts: test_pool_query_ranks_basic: 120 test_pool_query_ranks_error: 120 test_pool_query_ranks_mgmt: 480 server_config: name: daos_server + crt_timeout: 5 engines_per_host: 1 engines: 0: targets: 4 nr_xs_helpers: 0 + env_vars: + - SWIM_SUSPECT_TIMEOUT=10000 + - DAOS_POOL_RF=1 + - DD_MASK=io,epc,rebuild storage: 0: class: ram @@ -19,3 +24,5 @@ server_config: pool: control_method: dmg size: 4GB + svcn: 5 + properties: rd_fac:1 diff --git a/src/tests/ftest/util/dmg_utils.py b/src/tests/ftest/util/dmg_utils.py index effc3172bac9..a34f5d74b48f 100644 --- a/src/tests/ftest/util/dmg_utils.py +++ b/src/tests/ftest/util/dmg_utils.py @@ -625,13 +625,14 @@ def pool_create(self, scm_size, uid=None, gid=None, nvme_size=None, return data - def pool_query(self, pool, show_enabled=False, show_disabled=False): + def pool_query(self, pool, show_enabled=False, show_disabled=False, health_only=False): """Query a pool with the dmg command. Args: pool (str): Pool UUID or label to query. show_enabled (bool, optional): Display enabled ranks. show_disabled (bool, optional): Display disabled ranks. + health_only (bool, optional): Only perform pool health related queries. Raises: CommandFailure: if the dmg pool query command fails. @@ -678,7 +679,8 @@ def pool_query(self, pool, show_enabled=False, show_disabled=False): # "status": 0 # } return self._get_json_result(("pool", "query"), pool=pool, - show_enabled=show_enabled, show_disabled=show_disabled) + show_enabled=show_enabled, show_disabled=show_disabled, + health_only=health_only) def pool_query_targets(self, pool, rank=None, target_idx=None): """Call dmg pool query-targets. diff --git a/src/tests/ftest/util/dmg_utils_base.py b/src/tests/ftest/util/dmg_utils_base.py index 39109320af11..8691f8736314 100644 --- a/src/tests/ftest/util/dmg_utils_base.py +++ b/src/tests/ftest/util/dmg_utils_base.py @@ -534,6 +534,7 @@ def __init__(self): self.pool = BasicParameter(None, position=1) self.show_enabled = FormattedParameter("--show-enabled", False) self.show_disabled = FormattedParameter("--show-disabled", False) + self.health_only = FormattedParameter("--health-only", False) class QueryTargetsSubCommand(CommandWithParameters): """Defines an object for the dmg pool query-targets command.""" diff --git a/src/tests/ftest/util/server_utils_params.py b/src/tests/ftest/util/server_utils_params.py index 7cda958d2423..46db48912200 100644 --- a/src/tests/ftest/util/server_utils_params.py +++ b/src/tests/ftest/util/server_utils_params.py @@ -539,9 +539,11 @@ def get_params(self, test): # Update the env vars with any missing or different required setting update = False - env_var_dict = {env.split("=")[0]: env.split("=")[1] for env in self.env_vars.value} + env_var_dict = { + env.split("=", maxsplit=1)[0]: env.split("=", maxsplit=1)[1] + for env in self.env_vars.value} for key in sorted(required_env_vars): - if key not in env_var_dict or env_var_dict[key] != required_env_vars[key]: + if key not in env_var_dict: env_var_dict[key] = required_env_vars[key] update = True if update: diff --git a/src/tests/ftest/util/test_utils_pool.py b/src/tests/ftest/util/test_utils_pool.py index fbb6484e2925..7ca6912396f6 100644 --- a/src/tests/ftest/util/test_utils_pool.py +++ b/src/tests/ftest/util/test_utils_pool.py @@ -1450,6 +1450,33 @@ def check_pool_files(self, hosts, uuid, scm_mount): status = False return status + def wait_pool_suspect_ranks(self, expected, interval=1, timeout=30): + """Wait for the pool suspect ranks. + + Args: + expected (list): suspect ranks check to wait. + interval (int, optional): number of seconds to wait in between pool query checks + timeout(int, optional): time to fail test if it could not match + expected values. + + Raises: + DaosTestError: if waiting for timeout. + + """ + self.log.info("waiting for pool ranks %s to be suspected", expected) + + start = time() + data = self.dmg.pool_query(self.identifier, health_only=True) + while data['response'].get('suspect_ranks') != expected: + self.log.info(" suspect ranks is %s ...", data['response'].get('suspect_ranks')) + if time() - start > timeout: + raise DaosTestError("TIMEOUT detected after {} seconds while for waiting " + "for ranks {} suspect".format(timeout, expected)) + sleep(interval) + data = self.dmg.pool_query(self.identifier, health_only=True) + + self.log.info("Wait for suspect ranks complete: suspect ranks %s", expected) + def verify_uuid_directory(self, host, scm_mount): """Check if pool folder exist on server.