diff --git a/docs/admin/pool_operations.md b/docs/admin/pool_operations.md index 36907a2e31fa..efc827ae1e78 100644 --- a/docs/admin/pool_operations.md +++ b/docs/admin/pool_operations.md @@ -286,6 +286,28 @@ The example below shows a rebuild in progress and NVMe space allocated. Rebuild busy, 75 objs, 9722 recs ``` +After experiencing significant failures, the pool may retain some suspect +engines that have been marked as DEAD by the SWIM protocol but were not excluded +from the pool to prevent potential data inconsistency. An administrator can bring +these engines back online by restarting them. The example below illustrates the +system’s status with suspect and disabled engines. + +```bash +$ dmg pool query tank -t +``` + +NB: The --health-only/-t option is necessary to conduct pool health-related queries only. +This is important because suspect ranks may cause commands to hang and timeout so identifying +and restarting them is a useful procedure. + +```bash +Pool 6f450a68-8c7d-4da9-8900-02691650f6a2, ntarget=8, disabled=2, leader=3, version=4, state=Degraded + Pool health info: + - Disabled ranks: 1 + - Suspect ranks: 2 + - Rebuild busy, 0 objs, 0 recs +``` + Additional status and telemetry data is planned to be exported through management tools and will be documented here once available. diff --git a/src/control/cmd/daos/health.go b/src/control/cmd/daos/health.go index cbc29c1e3baf..41398d11c921 100644 --- a/src/control/cmd/daos/health.go +++ b/src/control/cmd/daos/health.go @@ -95,24 +95,19 @@ func (cmd *healthCheckCmd) Execute([]string) error { } }() - queryMask := daos.MustNewPoolQueryMask(daos.PoolQueryOptionEnabledEngines) + queryMask := daos.MustNewPoolQueryMask(daos.PoolQueryOptionEnabledEngines, + daos.PoolQueryOptionSuspectEngines) + if pool.DisabledTargets > 0 { + queryMask.SetOptions(daos.PoolQueryOptionDisabledEngines) + } tpi, err := queryPool(poolHdl, queryMask) if err != nil { cmd.Errorf("failed to query pool %s: %v", pool.Label, err) continue } pool.EnabledRanks = tpi.EnabledRanks - - if pool.DisabledTargets > 0 { - queryMask.ClearAll() - queryMask.SetOptions(daos.PoolQueryOptionDisabledEngines) - tpi, err = queryPool(poolHdl, queryMask) - if err != nil { - cmd.Errorf("failed to query pool %s: %v", pool.Label, err) - continue - } - pool.DisabledRanks = tpi.DisabledRanks - } + pool.DisabledRanks = tpi.DisabledRanks + pool.SuspectRanks = tpi.SuspectRanks poolConts, err := listContainers(poolHdl) if err != nil { diff --git a/src/control/cmd/daos/pool.go b/src/control/cmd/daos/pool.go index 2aae717766e8..4964a5cf54f7 100644 --- a/src/control/cmd/daos/pool.go +++ b/src/control/cmd/daos/pool.go @@ -296,11 +296,12 @@ func convertPoolInfo(pinfo *C.daos_pool_info_t) (*daos.PoolInfo, error) { return poolInfo, nil } -func queryPool(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.PoolInfo, error) { +func queryPoolRankLists(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.PoolInfo, error) { var rlPtr **C.d_rank_list_t = nil var rl *C.d_rank_list_t = nil - if queryMask.HasOption(daos.PoolQueryOptionEnabledEngines) || queryMask.HasOption(daos.PoolQueryOptionDisabledEngines) { + if queryMask.HasOption(daos.PoolQueryOptionEnabledEngines) || queryMask.HasOption(daos.PoolQueryOptionDisabledEngines) || + queryMask.HasOption(daos.PoolQueryOptionSuspectEngines) { rlPtr = &rl } @@ -330,6 +331,68 @@ func queryPool(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.Poo if queryMask.HasOption(daos.PoolQueryOptionDisabledEngines) { poolInfo.DisabledRanks = rs } + if queryMask.HasOption(daos.PoolQueryOptionSuspectEngines) { + poolInfo.SuspectRanks = rs + } + } + + return poolInfo, nil +} +func queryPool(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.PoolInfo, error) { + poolInfo := &daos.PoolInfo{} + originalMask := queryMask // Save the original queryMask + + // Function to handle the query and return a single RankList + queryAndUpdate := func(option string) error { + // Clear previous options and set new option + queryMask.ClearAll() + queryMask.SetOptions(option) + + poolInfo1, err := queryPoolRankLists(poolHdl, queryMask) + if err != nil { + return err + } + + switch option { + case daos.PoolQueryOptionEnabledEngines: + poolInfo.EnabledRanks = poolInfo1.EnabledRanks + case daos.PoolQueryOptionDisabledEngines: + poolInfo.DisabledRanks = poolInfo1.DisabledRanks + case daos.PoolQueryOptionSuspectEngines: + poolInfo.SuspectRanks = poolInfo1.SuspectRanks + } + return nil + } + + // Preprocess queryMask, select one option for the first query + var firstOption string + if originalMask.HasOption(daos.PoolQueryOptionEnabledEngines) { + firstOption = daos.PoolQueryOptionEnabledEngines + } else if originalMask.HasOption(daos.PoolQueryOptionDisabledEngines) { + firstOption = daos.PoolQueryOptionDisabledEngines + } else if originalMask.HasOption(daos.PoolQueryOptionSuspectEngines) { + firstOption = daos.PoolQueryOptionSuspectEngines + } + + // Perform the first query to get basic information + if err := queryAndUpdate(firstOption); err != nil { + return nil, err + } + + // Check the original query mask and update fields as needed + queryOptions := []string{ + daos.PoolQueryOptionEnabledEngines, + daos.PoolQueryOptionDisabledEngines, + daos.PoolQueryOptionSuspectEngines, + } + + // Process each option sequentially + for _, opt := range queryOptions { + if originalMask.HasOption(opt) && opt != firstOption { + if err := queryAndUpdate(opt); err != nil { + return nil, err + } + } } return poolInfo, nil diff --git a/src/control/cmd/daos/pretty/health.go b/src/control/cmd/daos/pretty/health.go index 25c94e10f998..ee77cd723718 100644 --- a/src/control/cmd/daos/pretty/health.go +++ b/src/control/cmd/daos/pretty/health.go @@ -61,6 +61,13 @@ func printPoolHealth(out io.Writer, pi *daos.PoolInfo, verbose bool) { } var healthStrings []string + if pi.SuspectRanks != nil && pi.SuspectRanks.Count() > 0 { + degStr := "Suspect" + if verbose { + degStr += fmt.Sprintf(" %s", pi.SuspectRanks) + } + healthStrings = append(healthStrings, degStr) + } if pi.DisabledTargets > 0 { degStr := "Degraded" if verbose { diff --git a/src/control/cmd/daos/pretty/pool.go b/src/control/cmd/daos/pretty/pool.go index aa70115152df..631db00ad1a9 100644 --- a/src/control/cmd/daos/pretty/pool.go +++ b/src/control/cmd/daos/pretty/pool.go @@ -53,6 +53,10 @@ func PrintPoolInfo(pi *daos.PoolInfo, out io.Writer) error { if pi.DisabledRanks != nil && pi.DisabledRanks.Count() > 0 { fmt.Fprintf(w, "- Disabled ranks: %s\n", pi.DisabledRanks) } + if pi.QueryMask.HasOption(daos.PoolQueryOptionSuspectEngines) && + pi.SuspectRanks != nil && pi.SuspectRanks.Count() > 0 { + fmt.Fprintf(w, "- Suspect ranks: %s\n", pi.SuspectRanks) + } if pi.Rebuild != nil { if pi.Rebuild.Status == 0 { fmt.Fprintf(w, "- Rebuild %s, %d objs, %d recs\n", diff --git a/src/control/cmd/daos/pretty/pool_test.go b/src/control/cmd/daos/pretty/pool_test.go index 3a1724e1ddae..11f50c0e6be5 100644 --- a/src/control/cmd/daos/pretty/pool_test.go +++ b/src/control/cmd/daos/pretty/pool_test.go @@ -122,6 +122,45 @@ Pool space info: - Storage tier 1 (NVMe): Total size: 2 B Free: 1 B, min:0 B, max:0 B, mean:0 B +`, poolUUID.String()), + }, + "normal response; suspect ranks": { + pi: &daos.PoolInfo{ + QueryMask: daos.HealthOnlyPoolQueryMask, + State: daos.PoolServiceStateDegraded, + UUID: poolUUID, + TotalTargets: 2, + DisabledTargets: 1, + ActiveTargets: 1, + ServiceLeader: 42, + Version: 100, + PoolLayoutVer: 1, + UpgradeLayoutVer: 2, + DisabledRanks: ranklist.MustCreateRankSet("[0,1,3]"), + SuspectRanks: ranklist.MustCreateRankSet("[2]"), + Rebuild: &daos.PoolRebuildStatus{ + State: daos.PoolRebuildStateBusy, + Objects: 42, + Records: 21, + }, + TierStats: []*daos.StorageUsageStats{ + { + Total: 2, + Free: 1, + }, + { + Total: 2, + Free: 1, + }, + }, + }, + expPrintStr: fmt.Sprintf(` +Pool %s, ntarget=2, disabled=1, leader=42, version=100, state=Degraded +Pool layout out of date (1 < 2) -- see `+backtickStr+` for details. +Pool health info: +- Disabled ranks: 0-1,3 +- Suspect ranks: 2 +- Rebuild busy, 42 objs, 21 recs `, poolUUID.String()), }, "normal response; disabled ranks": { diff --git a/src/control/common/proto/ctl/storage_nvme.pb.go b/src/control/common/proto/ctl/storage_nvme.pb.go index cb2dc5099d45..bbda4c5e5c34 100644 --- a/src/control/common/proto/ctl/storage_nvme.pb.go +++ b/src/control/common/proto/ctl/storage_nvme.pb.go @@ -1,12 +1,12 @@ // -// (C) Copyright 2019-2023 Intel Corporation. +// (C) Copyright 2019-2024 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.31.0 +// protoc-gen-go v1.33.0 // protoc v3.5.0 // source: ctl/storage_nvme.proto diff --git a/src/control/common/proto/ctl/support.pb.go b/src/control/common/proto/ctl/support.pb.go index 7253f2ee183c..7c3905c20245 100644 --- a/src/control/common/proto/ctl/support.pb.go +++ b/src/control/common/proto/ctl/support.pb.go @@ -6,7 +6,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.28.1 +// protoc-gen-go v1.33.0 // protoc v3.5.0 // source: ctl/support.proto diff --git a/src/control/common/proto/mgmt/pool.pb.go b/src/control/common/proto/mgmt/pool.pb.go index b699d1f55dc2..7c8e36c80089 100644 --- a/src/control/common/proto/mgmt/pool.pb.go +++ b/src/control/common/proto/mgmt/pool.pb.go @@ -6,7 +6,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.31.0 +// protoc-gen-go v1.33.0 // protoc v3.5.0 // source: mgmt/pool.proto @@ -1842,6 +1842,7 @@ type PoolQueryResp struct { SvcLdr uint32 `protobuf:"varint,18,opt,name=svc_ldr,json=svcLdr,proto3" json:"svc_ldr,omitempty"` // current raft leader (2.6+) SvcReps []uint32 `protobuf:"varint,19,rep,packed,name=svc_reps,json=svcReps,proto3" json:"svc_reps,omitempty"` // service replica ranks QueryMask uint64 `protobuf:"varint,20,opt,name=query_mask,json=queryMask,proto3" json:"query_mask,omitempty"` // Bitmask of pool query options used + SuspectRanks string `protobuf:"bytes,21,opt,name=suspect_ranks,json=suspectRanks,proto3" json:"suspect_ranks,omitempty"` // optional set of suspect ranks } func (x *PoolQueryResp) Reset() { @@ -2009,6 +2010,13 @@ func (x *PoolQueryResp) GetQueryMask() uint64 { return 0 } +func (x *PoolQueryResp) GetSuspectRanks() string { + if x != nil { + return x.SuspectRanks + } + return "" +} + type PoolProperty struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -3032,7 +3040,7 @@ var file_mgmt_pool_proto_rawDesc = []byte{ 0x04, 0x52, 0x07, 0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x22, 0x25, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x08, 0x0a, 0x04, 0x49, 0x44, 0x4c, 0x45, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x44, 0x4f, 0x4e, 0x45, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, - 0x02, 0x22, 0xc0, 0x05, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, + 0x02, 0x22, 0xe5, 0x05, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, @@ -3075,103 +3083,105 @@ var file_mgmt_pool_proto_rawDesc = []byte{ 0x65, 0x70, 0x73, 0x18, 0x13, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, 0x52, 0x65, 0x70, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x71, 0x75, 0x65, 0x72, 0x79, 0x5f, 0x6d, 0x61, 0x73, 0x6b, 0x18, 0x14, 0x20, 0x01, 0x28, 0x04, 0x52, 0x09, 0x71, 0x75, 0x65, 0x72, 0x79, 0x4d, 0x61, 0x73, - 0x6b, 0x4a, 0x04, 0x08, 0x09, 0x10, 0x0a, 0x52, 0x0b, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x6e, - 0x6f, 0x64, 0x65, 0x73, 0x22, 0x63, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, - 0x65, 0x72, 0x74, 0x79, 0x12, 0x16, 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x12, 0x18, 0x0a, 0x06, - 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x06, - 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x12, 0x18, 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x76, 0x61, 0x6c, - 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x48, 0x00, 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x76, 0x61, 0x6c, - 0x42, 0x07, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, - 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, - 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, - 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x32, - 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, - 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, - 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, - 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, - 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, - 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, - 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, - 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, - 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, - 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, - 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, - 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, - 0x69, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, - 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, - 0x22, 0x5d, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, - 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x32, 0x0a, 0x0a, 0x70, - 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, - 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, - 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x22, - 0x4f, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, + 0x6b, 0x12, 0x23, 0x0a, 0x0d, 0x73, 0x75, 0x73, 0x70, 0x65, 0x63, 0x74, 0x5f, 0x72, 0x61, 0x6e, + 0x6b, 0x73, 0x18, 0x15, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x73, 0x75, 0x73, 0x70, 0x65, 0x63, + 0x74, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x4a, 0x04, 0x08, 0x09, 0x10, 0x0a, 0x52, 0x0b, 0x74, 0x6f, + 0x74, 0x61, 0x6c, 0x5f, 0x6e, 0x6f, 0x64, 0x65, 0x73, 0x22, 0x63, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, + 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x12, 0x16, 0x0a, 0x06, 0x6e, 0x75, 0x6d, + 0x62, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, + 0x72, 0x12, 0x18, 0x0a, 0x06, 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x09, 0x48, 0x00, 0x52, 0x06, 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x12, 0x18, 0x0a, 0x06, 0x6e, + 0x75, 0x6d, 0x76, 0x61, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x48, 0x00, 0x52, 0x06, 0x6e, + 0x75, 0x6d, 0x76, 0x61, 0x6c, 0x42, 0x07, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x22, 0x83, + 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, - 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, - 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, - 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x81, 0x01, 0x0a, 0x12, - 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, + 0x02, 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, + 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, + 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, + 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, + 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, + 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, + 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, + 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, - 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x61, 0x72, 0x67, - 0x65, 0x74, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, - 0x74, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, - 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, - 0x75, 0x0a, 0x12, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, - 0x55, 0x73, 0x61, 0x67, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x12, 0x0a, 0x04, 0x66, - 0x72, 0x65, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x66, 0x72, 0x65, 0x65, 0x12, - 0x35, 0x0a, 0x0a, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, - 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, - 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x52, 0x09, 0x6d, 0x65, 0x64, - 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x22, 0xda, 0x02, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x51, - 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x38, - 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x6d, + 0x52, 0x02, 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, + 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, + 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, + 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, + 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, + 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x5d, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, + 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, + 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x02, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, + 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, + 0x74, 0x69, 0x65, 0x73, 0x22, 0x4f, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, 0x72, + 0x61, 0x64, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, + 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, + 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, + 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, + 0x22, 0x81, 0x01, 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, + 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x18, 0x0a, + 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, + 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, + 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, + 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x75, 0x0a, 0x12, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x54, + 0x61, 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, + 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, + 0x12, 0x12, 0x0a, 0x04, 0x66, 0x72, 0x65, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, + 0x66, 0x72, 0x65, 0x65, 0x12, 0x35, 0x0a, 0x0a, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x5f, 0x74, 0x79, + 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, + 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, + 0x52, 0x09, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x22, 0xda, 0x02, 0x0a, 0x13, + 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, + 0x6e, 0x66, 0x6f, 0x12, 0x38, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x0e, 0x32, 0x24, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, + 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x54, 0x61, 0x72, + 0x67, 0x65, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x3b, 0x0a, + 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x25, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, - 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x54, 0x79, - 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x3b, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, - 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x25, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, - 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, - 0x66, 0x6f, 0x2e, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, - 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x2e, 0x0a, 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x03, - 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, - 0x61, 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x52, 0x05, - 0x73, 0x70, 0x61, 0x63, 0x65, 0x22, 0x3b, 0x0a, 0x0a, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x54, - 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, - 0x12, 0x07, 0x0a, 0x03, 0x48, 0x44, 0x44, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x53, 0x44, - 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, 0x50, 0x4d, 0x10, 0x03, 0x12, 0x06, 0x0a, 0x02, 0x56, 0x4d, - 0x10, 0x04, 0x22, 0x5f, 0x0a, 0x0b, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, - 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, - 0x57, 0x4e, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, 0x44, 0x4f, 0x57, 0x4e, 0x5f, 0x4f, 0x55, 0x54, - 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x44, 0x4f, 0x57, 0x4e, 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, - 0x55, 0x50, 0x10, 0x03, 0x12, 0x09, 0x0a, 0x05, 0x55, 0x50, 0x5f, 0x49, 0x4e, 0x10, 0x04, 0x12, - 0x07, 0x0a, 0x03, 0x4e, 0x45, 0x57, 0x10, 0x05, 0x12, 0x09, 0x0a, 0x05, 0x44, 0x52, 0x41, 0x49, - 0x4e, 0x10, 0x06, 0x22, 0x5e, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, - 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, - 0x75, 0x73, 0x12, 0x2f, 0x0a, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, - 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x05, 0x69, 0x6e, - 0x66, 0x6f, 0x73, 0x2a, 0x25, 0x0a, 0x10, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, - 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x43, 0x4d, 0x10, 0x00, - 0x12, 0x08, 0x0a, 0x04, 0x4e, 0x56, 0x4d, 0x45, 0x10, 0x01, 0x2a, 0x56, 0x0a, 0x10, 0x50, 0x6f, - 0x6f, 0x6c, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x0c, - 0x0a, 0x08, 0x43, 0x72, 0x65, 0x61, 0x74, 0x69, 0x6e, 0x67, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, - 0x52, 0x65, 0x61, 0x64, 0x79, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, 0x44, 0x65, 0x73, 0x74, 0x72, - 0x6f, 0x79, 0x69, 0x6e, 0x67, 0x10, 0x02, 0x12, 0x0c, 0x0a, 0x08, 0x44, 0x65, 0x67, 0x72, 0x61, - 0x64, 0x65, 0x64, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x6e, 0x6b, 0x6e, 0x6f, 0x77, 0x6e, - 0x10, 0x04, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, - 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, - 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, - 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, + 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x2e, 0x0a, 0x05, 0x73, 0x70, + 0x61, 0x63, 0x65, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x67, 0x6d, 0x74, + 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, + 0x61, 0x67, 0x65, 0x52, 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, 0x22, 0x3b, 0x0a, 0x0a, 0x54, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, + 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x48, 0x44, 0x44, 0x10, 0x01, 0x12, 0x07, + 0x0a, 0x03, 0x53, 0x53, 0x44, 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, 0x50, 0x4d, 0x10, 0x03, 0x12, + 0x06, 0x0a, 0x02, 0x56, 0x4d, 0x10, 0x04, 0x22, 0x5f, 0x0a, 0x0b, 0x54, 0x61, 0x72, 0x67, 0x65, + 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, + 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, 0x44, 0x4f, 0x57, + 0x4e, 0x5f, 0x4f, 0x55, 0x54, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x44, 0x4f, 0x57, 0x4e, 0x10, + 0x02, 0x12, 0x06, 0x0a, 0x02, 0x55, 0x50, 0x10, 0x03, 0x12, 0x09, 0x0a, 0x05, 0x55, 0x50, 0x5f, + 0x49, 0x4e, 0x10, 0x04, 0x12, 0x07, 0x0a, 0x03, 0x4e, 0x45, 0x57, 0x10, 0x05, 0x12, 0x09, 0x0a, + 0x05, 0x44, 0x52, 0x41, 0x49, 0x4e, 0x10, 0x06, 0x22, 0x5e, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, + 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, + 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, + 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x2f, 0x0a, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, + 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, + 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, + 0x6f, 0x52, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x2a, 0x25, 0x0a, 0x10, 0x53, 0x74, 0x6f, 0x72, + 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x12, 0x07, 0x0a, 0x03, + 0x53, 0x43, 0x4d, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x4e, 0x56, 0x4d, 0x45, 0x10, 0x01, 0x2a, + 0x56, 0x0a, 0x10, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x74, + 0x61, 0x74, 0x65, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x72, 0x65, 0x61, 0x74, 0x69, 0x6e, 0x67, 0x10, + 0x00, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x65, 0x61, 0x64, 0x79, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, + 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x69, 0x6e, 0x67, 0x10, 0x02, 0x12, 0x0c, 0x0a, 0x08, + 0x44, 0x65, 0x67, 0x72, 0x61, 0x64, 0x65, 0x64, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x6e, + 0x6b, 0x6e, 0x6f, 0x77, 0x6e, 0x10, 0x04, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, + 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, + 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, + 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, + 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/src/control/common/proto/mgmt/svc.pb.go b/src/control/common/proto/mgmt/svc.pb.go index d0a0039f2822..a21b05bc0b44 100644 --- a/src/control/common/proto/mgmt/svc.pb.go +++ b/src/control/common/proto/mgmt/svc.pb.go @@ -6,7 +6,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.31.0 +// protoc-gen-go v1.33.0 // protoc v3.5.0 // source: mgmt/svc.proto diff --git a/src/control/lib/control/pool_test.go b/src/control/lib/control/pool_test.go index aace54a277bb..51304dd05963 100644 --- a/src/control/lib/control/pool_test.go +++ b/src/control/lib/control/pool_test.go @@ -814,7 +814,7 @@ func TestControl_PoolQueryResp_MarshalJSON(t *testing.T) { }, exp: `{"query_mask":"rebuild,space","state":"Ready","uuid":"` + poolUUID.String() + `","total_targets":1,"active_targets":2,"total_engines":3,"disabled_targets":4,"version":5,"svc_ldr":6,"svc_reps":[0,1,2],"rebuild":null,"tier_stats":null,"pool_layout_ver":7,"upgrade_layout_ver":8,"status":42}`, }, - "valid rankset": { + "valid rankset default query": { pqr: &PoolQueryResp{ Status: 42, PoolInfo: daos.PoolInfo{ @@ -836,6 +836,28 @@ func TestControl_PoolQueryResp_MarshalJSON(t *testing.T) { }, exp: `{"query_mask":"rebuild,space","state":"Ready","uuid":"` + poolUUID.String() + `","total_targets":1,"active_targets":2,"total_engines":3,"disabled_targets":4,"version":5,"svc_ldr":6,"svc_reps":[0,1,2],"rebuild":null,"tier_stats":null,"enabled_ranks":[0,1,2,3,5],"disabled_ranks":[],"pool_layout_ver":7,"upgrade_layout_ver":8,"status":42}`, }, + "valid rankset health query": { + pqr: &PoolQueryResp{ + Status: 42, + PoolInfo: daos.PoolInfo{ + QueryMask: daos.HealthOnlyPoolQueryMask, + State: daos.PoolServiceStateReady, + UUID: poolUUID, + TotalTargets: 1, + ActiveTargets: 2, + TotalEngines: 3, + DisabledTargets: 4, + Version: 5, + ServiceLeader: 6, + ServiceReplicas: []ranklist.Rank{0, 1, 2}, + DisabledRanks: &ranklist.RankSet{}, + SuspectRanks: ranklist.MustCreateRankSet("[7,8,9]"), + PoolLayoutVer: 7, + UpgradeLayoutVer: 8, + }, + }, + exp: `{"query_mask":"disabled_engines,rebuild,suspect_engines","state":"Ready","uuid":"` + poolUUID.String() + `","total_targets":1,"active_targets":2,"total_engines":3,"disabled_targets":4,"version":5,"svc_ldr":6,"svc_reps":[0,1,2],"rebuild":null,"tier_stats":null,"disabled_ranks":[],"suspect_ranks":[7,8,9],"pool_layout_ver":7,"upgrade_layout_ver":8,"status":42}`, + }, } { t.Run(name, func(t *testing.T) { got, err := json.Marshal(tc.pqr) @@ -876,7 +898,7 @@ func TestControl_PoolQueryResp_UnmarshalJSON(t *testing.T) { }, }, "valid rankset": { - data: `{"enabled_ranks":"[0,1-3,5]","disabled_ranks":"[]","status":0,"uuid":"` + poolUUID.String() + `","total_targets":1,"active_targets":2,"total_engines":3,"disabled_targets":4,"version":5,"svc_ldr":6,"svc_reps":null,"rebuild":null,"tier_stats":null,"pool_layout_ver":7,"upgrade_layout_ver":8}`, + data: `{"enabled_ranks":"[0,1-3,5]","disabled_ranks":"[]","suspect_ranks":"[4]","status":0,"uuid":"` + poolUUID.String() + `","total_targets":1,"active_targets":2,"total_engines":3,"disabled_targets":4,"version":5,"svc_ldr":6,"svc_reps":null,"rebuild":null,"tier_stats":null,"pool_layout_ver":7,"upgrade_layout_ver":8}`, expResp: PoolQueryResp{ Status: 0, PoolInfo: daos.PoolInfo{ @@ -889,6 +911,7 @@ func TestControl_PoolQueryResp_UnmarshalJSON(t *testing.T) { ServiceLeader: 6, EnabledRanks: ranklist.MustCreateRankSet("[0-3,5]"), DisabledRanks: &ranklist.RankSet{}, + SuspectRanks: ranklist.MustCreateRankSet("[4]"), PoolLayoutVer: 7, UpgradeLayoutVer: 8, }, @@ -1159,6 +1182,80 @@ func TestControl_PoolQuery(t *testing.T) { }, }, }, + "query succeeds suspect ranks": { + mic: &MockInvokerConfig{ + UnaryResponse: MockMSResponse("host1", nil, + &mgmtpb.PoolQueryResp{ + Uuid: poolUUID.String(), + TotalTargets: 42, + ActiveTargets: 16, + DisabledTargets: 17, + PoolLayoutVer: 1, + UpgradeLayoutVer: 2, + State: mgmtpb.PoolServiceState_Degraded, + Rebuild: &mgmtpb.PoolRebuildStatus{ + State: mgmtpb.PoolRebuildStatus_BUSY, + Objects: 1, + Records: 2, + }, + TierStats: []*mgmtpb.StorageUsageStats{ + { + Total: 123456, + Free: 0, + Min: 1, + Max: 2, + Mean: 3, + MediaType: mgmtpb.StorageMediaType(daos.StorageMediaTypeScm), + }, + { + Total: 123456, + Free: 0, + Min: 1, + Max: 2, + Mean: 3, + MediaType: mgmtpb.StorageMediaType(daos.StorageMediaTypeNvme), + }, + }, + SuspectRanks: "[1,2,3,7]", + }, + ), + }, + expResp: &PoolQueryResp{ + PoolInfo: daos.PoolInfo{ + UUID: poolUUID, + TotalTargets: 42, + ActiveTargets: 16, + DisabledTargets: 17, + PoolLayoutVer: 1, + UpgradeLayoutVer: 2, + State: daos.PoolServiceStateDegraded, + Rebuild: &daos.PoolRebuildStatus{ + State: daos.PoolRebuildStateBusy, + Objects: 1, + Records: 2, + }, + TierStats: []*daos.StorageUsageStats{ + { + Total: 123456, + Free: 0, + Min: 1, + Max: 2, + Mean: 3, + MediaType: daos.StorageMediaTypeScm, + }, + { + Total: 123456, + Free: 0, + Min: 1, + Max: 2, + Mean: 3, + MediaType: daos.StorageMediaTypeNvme, + }, + }, + SuspectRanks: ranklist.MustCreateRankSet("[1-3,7]"), + }, + }, + }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) diff --git a/src/control/lib/daos/pool.go b/src/control/lib/daos/pool.go index fe44a00e210a..e96b3c588d12 100644 --- a/src/control/lib/daos/pool.go +++ b/src/control/lib/daos/pool.go @@ -77,6 +77,7 @@ type ( TierStats []*StorageUsageStats `json:"tier_stats"` EnabledRanks *ranklist.RankSet `json:"enabled_ranks,omitempty"` DisabledRanks *ranklist.RankSet `json:"disabled_ranks,omitempty"` + SuspectRanks *ranklist.RankSet `json:"suspect_ranks,omitempty"` PoolLayoutVer uint32 `json:"pool_layout_ver"` UpgradeLayoutVer uint32 `json:"upgrade_layout_ver"` } @@ -104,7 +105,8 @@ type ( const ( // DefaultPoolQueryMask defines the default pool query mask. - DefaultPoolQueryMask = PoolQueryMask(^uint64(0) &^ (C.DPI_ENGINES_ENABLED | C.DPI_ENGINES_DISABLED)) + DefaultPoolQueryMask = PoolQueryMask(^uint64(0) &^ (C.DPI_ENGINES_ENABLED | + C.DPI_ENGINES_DISABLED | C.DPI_ENGINES_SUSPECT)) // HealthOnlyPoolQueryMask defines the mask for health-only queries. HealthOnlyPoolQueryMask = PoolQueryMask(^uint64(0) &^ (C.DPI_ENGINES_ENABLED | C.DPI_SPACE)) @@ -116,6 +118,8 @@ const ( PoolQueryOptionEnabledEngines = "enabled_engines" // PoolQueryOptionDisabledEngines retrieves disabled engines as part of the pool query. PoolQueryOptionDisabledEngines = "disabled_engines" + // PoolQueryOptionSuspectEngines retrieves suspect engines as part of the pool query. + PoolQueryOptionSuspectEngines = "suspect_engines" // PoolConnectFlagReadOnly indicates that the connection is read-only. PoolConnectFlagReadOnly = C.DAOS_PC_RO @@ -130,6 +134,7 @@ var poolQueryOptMap = map[C.int]string{ C.DPI_REBUILD_STATUS: PoolQueryOptionRebuild, C.DPI_ENGINES_ENABLED: PoolQueryOptionEnabledEngines, C.DPI_ENGINES_DISABLED: PoolQueryOptionDisabledEngines, + C.DPI_ENGINES_SUSPECT: PoolQueryOptionSuspectEngines, } func resolvePoolQueryOpt(name string) (C.int, error) { diff --git a/src/control/lib/daos/pool_test.go b/src/control/lib/daos/pool_test.go index e76f33f4c250..cced66b4f509 100644 --- a/src/control/lib/daos/pool_test.go +++ b/src/control/lib/daos/pool_test.go @@ -136,13 +136,14 @@ func TestDaos_PoolQueryMask(t *testing.T) { testMask: genTestMask(func(pqm *PoolQueryMask) { *pqm = HealthOnlyPoolQueryMask }), - expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionRebuild), + expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionRebuild, PoolQueryOptionSuspectEngines), }, "set query all=true": { testMask: genTestMask(func(pqm *PoolQueryMask) { pqm.SetAll() }), - expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, PoolQueryOptionRebuild, PoolQueryOptionSpace), + expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, + PoolQueryOptionRebuild, PoolQueryOptionSpace, PoolQueryOptionSuspectEngines), }, "set query all=false": { testMask: genTestMask(func(pqm *PoolQueryMask) { @@ -162,7 +163,8 @@ func TestDaos_PoolQueryMask(t *testing.T) { pqm.SetAll() pqm.ClearOptions(PoolQueryOptionSpace) }), - expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, PoolQueryOptionRebuild), + expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, + PoolQueryOptionRebuild, PoolQueryOptionSuspectEngines), }, "set query space=false (already false)": { testMask: genTestMask(func(pqm *PoolQueryMask) { @@ -181,7 +183,8 @@ func TestDaos_PoolQueryMask(t *testing.T) { pqm.SetAll() pqm.ClearOptions(PoolQueryOptionRebuild) }), - expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, PoolQueryOptionSpace), + expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionEnabledEngines, PoolQueryOptionSpace, + PoolQueryOptionSuspectEngines), }, "set query enabled_engines=true": { testMask: genTestMask(func(pqm *PoolQueryMask) { @@ -194,7 +197,8 @@ func TestDaos_PoolQueryMask(t *testing.T) { pqm.SetAll() pqm.ClearOptions(PoolQueryOptionEnabledEngines) }), - expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionRebuild, PoolQueryOptionSpace), + expString: genOptsStr(PoolQueryOptionDisabledEngines, PoolQueryOptionRebuild, PoolQueryOptionSpace, + PoolQueryOptionSuspectEngines), }, "set query disabled_engines=true": { testMask: genTestMask(func(pqm *PoolQueryMask) { @@ -207,7 +211,8 @@ func TestDaos_PoolQueryMask(t *testing.T) { pqm.SetAll() pqm.ClearOptions(PoolQueryOptionDisabledEngines) }), - expString: genOptsStr(PoolQueryOptionEnabledEngines, PoolQueryOptionRebuild, PoolQueryOptionSpace), + expString: genOptsStr(PoolQueryOptionEnabledEngines, PoolQueryOptionRebuild, PoolQueryOptionSpace, + PoolQueryOptionSuspectEngines), }, } { t.Run(name, func(t *testing.T) { @@ -232,7 +237,7 @@ func TestDaos_PoolQueryMaskMarshalJSON(t *testing.T) { testMask: genTestMask(func(pqm *PoolQueryMask) { pqm.SetAll() }), - expJSON: []byte(`"disabled_engines,enabled_engines,rebuild,space"`), + expJSON: []byte(`"disabled_engines,enabled_engines,rebuild,space,suspect_engines"`), }, } { t.Run(name, func(t *testing.T) { @@ -262,7 +267,7 @@ func TestDaos_PoolQueryMaskUnmarshalJSON(t *testing.T) { }, "uint64 value": { testData: []byte("18446744073709551603"), - expString: "rebuild,space", + expString: "rebuild,space,suspect_engines", }, "string values": { testData: []byte("rebuild,disabled_engines"), diff --git a/src/include/daos_pool.h b/src/include/daos_pool.h index 73f443689131..a8ab2e6c6a2e 100644 --- a/src/include/daos_pool.h +++ b/src/include/daos_pool.h @@ -162,6 +162,8 @@ enum daos_pool_info_bit { DPI_ENGINES_ENABLED = 1ULL << 2, /** true to include (in \a ranks) engines with some or all targets disabled (down). */ DPI_ENGINES_DISABLED = 1ULL << 3, + /** true to include (in \a ranks) suspect engines. */ + DPI_ENGINES_SUSPECT = 1ULL << 4, /** query all above optional info */ DPI_ALL = -1, }; diff --git a/src/include/daos_srv/pool.h b/src/include/daos_srv/pool.h index de22d55ed5d6..3403fd049ef6 100644 --- a/src/include/daos_srv/pool.h +++ b/src/include/daos_srv/pool.h @@ -305,8 +305,9 @@ int dsc_pool_svc_delete_acl(uuid_t pool_uuid, d_rank_list_t *ranks, uint64_t dea const char *principal_name); int dsc_pool_svc_query(uuid_t pool_uuid, d_rank_list_t *ps_ranks, uint64_t deadline, - d_rank_list_t **ranks, daos_pool_info_t *pool_info, - uint32_t *pool_layout_ver, uint32_t *upgrade_layout_ver); + d_rank_list_t **enabled_ranks, d_rank_list_t **suspect_ranks, + daos_pool_info_t *pool_info, uint32_t *pool_layout_ver, + uint32_t *upgrade_layout_ver); int dsc_pool_svc_query_target(uuid_t pool_uuid, d_rank_list_t *ps_ranks, uint64_t deadline, d_rank_t rank, uint32_t tgt_idx, daos_target_info_t *ti); diff --git a/src/mgmt/pool.pb-c.c b/src/mgmt/pool.pb-c.c index 3859da6374c3..74817f9ce21c 100644 --- a/src/mgmt/pool.pb-c.c +++ b/src/mgmt/pool.pb-c.c @@ -3292,7 +3292,7 @@ const ProtobufCMessageDescriptor mgmt__pool_rebuild_status__descriptor = (ProtobufCMessageInit) mgmt__pool_rebuild_status__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor mgmt__pool_query_resp__field_descriptors[19] = +static const ProtobufCFieldDescriptor mgmt__pool_query_resp__field_descriptors[20] = { { "status", @@ -3522,6 +3522,18 @@ static const ProtobufCFieldDescriptor mgmt__pool_query_resp__field_descriptors[1 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "suspect_ranks", + 21, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Mgmt__PoolQueryResp, suspect_ranks), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned mgmt__pool_query_resp__field_indices_by_name[] = { 4, /* field[4] = active_targets */ @@ -3535,6 +3547,7 @@ static const unsigned mgmt__pool_query_resp__field_indices_by_name[] = { 6, /* field[6] = rebuild */ 15, /* field[15] = state */ 0, /* field[0] = status */ + 19, /* field[19] = suspect_ranks */ 16, /* field[16] = svc_ldr */ 17, /* field[17] = svc_reps */ 7, /* field[7] = tier_stats */ @@ -3548,7 +3561,7 @@ static const ProtobufCIntRange mgmt__pool_query_resp__number_ranges[2 + 1] = { { 1, 0 }, { 10, 8 }, - { 0, 19 } + { 0, 20 } }; const ProtobufCMessageDescriptor mgmt__pool_query_resp__descriptor = { @@ -3558,7 +3571,7 @@ const ProtobufCMessageDescriptor mgmt__pool_query_resp__descriptor = "Mgmt__PoolQueryResp", "mgmt", sizeof(Mgmt__PoolQueryResp), - 19, + 20, mgmt__pool_query_resp__field_descriptors, mgmt__pool_query_resp__field_indices_by_name, 2, mgmt__pool_query_resp__number_ranges, diff --git a/src/mgmt/pool.pb-c.h b/src/mgmt/pool.pb-c.h index ad8dee1e9d91..96b4b1c55ac6 100644 --- a/src/mgmt/pool.pb-c.h +++ b/src/mgmt/pool.pb-c.h @@ -861,10 +861,14 @@ struct _Mgmt__PoolQueryResp * Bitmask of pool query options used */ uint64_t query_mask; + /* + * optional set of suspect ranks + */ + char *suspect_ranks; }; #define MGMT__POOL_QUERY_RESP__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__pool_query_resp__descriptor) \ - , 0, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, 0, 0, NULL, 0,NULL, 0, 0, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, 0, 0, MGMT__POOL_SERVICE_STATE__Creating, 0, 0,NULL, 0 } + , 0, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, 0, 0, NULL, 0,NULL, 0, 0, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, 0, 0, MGMT__POOL_SERVICE_STATE__Creating, 0, 0,NULL, 0, (char *)protobuf_c_empty_string } typedef enum { diff --git a/src/mgmt/srv_drpc.c b/src/mgmt/srv_drpc.c index de4a951a050c..7a213daa47cb 100644 --- a/src/mgmt/srv_drpc.c +++ b/src/mgmt/srv_drpc.c @@ -394,7 +394,7 @@ static int pool_create_fill_resp(Mgmt__PoolCreateResp *resp, uuid_t uuid, d_rank D_DEBUG(DB_MGMT, "%d service replicas\n", svc_ranks->rl_nr); - rc = ds_mgmt_pool_query(uuid, svc_ranks, &enabled_ranks, &pool_info, NULL, NULL); + rc = ds_mgmt_pool_query(uuid, svc_ranks, &enabled_ranks, NULL, &pool_info, NULL, NULL); if (rc != 0) { D_ERROR("Failed to query created pool: rc=%d\n", rc); D_GOTO(out, rc); @@ -1744,10 +1744,14 @@ ds_mgmt_drpc_pool_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) uuid_t uuid; daos_pool_info_t pool_info = {0}; d_rank_list_t *svc_ranks; - d_rank_list_t *ranks; - d_rank_range_list_t *range_list; + d_rank_list_t *ranks = NULL; + d_rank_range_list_t *range_list = NULL; + d_rank_range_list_t *range_list1 = NULL; + d_rank_list_t *suspect_ranks = NULL; char *range_list_str = NULL; + char *suspect_ranks_str = NULL; bool truncated; + bool truncated1; size_t len; uint8_t *body; @@ -1778,7 +1782,7 @@ ds_mgmt_drpc_pool_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) D_GOTO(out, rc = -DER_NOMEM); pool_info.pi_bits = req->query_mask; - rc = ds_mgmt_pool_query(uuid, svc_ranks, &ranks, &pool_info, &resp.pool_layout_ver, + rc = ds_mgmt_pool_query(uuid, svc_ranks, &ranks, &suspect_ranks, &pool_info, &resp.pool_layout_ver, &resp.upgrade_layout_ver); if (rc != 0) { D_ERROR("Failed to query the pool, rc=%d\n", rc); @@ -1792,9 +1796,19 @@ ds_mgmt_drpc_pool_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) range_list_str = d_rank_range_list_str(range_list, &truncated); if (range_list_str == NULL) D_GOTO(out_ranges, rc = -DER_NOMEM); - D_DEBUG(DB_MGMT, DF_UUID": %s ranks: %s%s\n", DP_UUID(uuid), + range_list1 = d_rank_range_list_create_from_ranks(suspect_ranks); + if (range_list1 == NULL) + D_GOTO(out_suspect, rc = -DER_NOMEM); + suspect_ranks_str = d_rank_range_list_str(range_list1, &truncated1); + if (suspect_ranks_str == NULL) { + DL_ERROR(rc, DF_UUID ": Failed to serialize the list of suspect ranks", + DP_UUID(uuid)); + D_GOTO(out_suspect, rc = -DER_NOMEM); + } + D_DEBUG(DB_MGMT, DF_UUID": %s ranks: %s%s, suspect_ranks: %s%s\n", DP_UUID(uuid), pool_info.pi_bits & DPI_ENGINES_ENABLED ? "ENABLED" : "DISABLED", range_list_str, - truncated ? " ...(TRUNCATED)" : ""); + truncated ? " ...(TRUNCATED)" : "", suspect_ranks_str, + truncated1 ? "...(TRUNCATED)" : ""); /* Populate the response */ resp.query_mask = pool_info.pi_bits; @@ -1809,11 +1823,12 @@ ds_mgmt_drpc_pool_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) resp.version = pool_info.pi_map_ver; resp.enabled_ranks = (req->query_mask & DPI_ENGINES_ENABLED) ? range_list_str : ""; resp.disabled_ranks = (req->query_mask & DPI_ENGINES_DISABLED) ? range_list_str : ""; + if (suspect_ranks_str != NULL) + resp.suspect_ranks = suspect_ranks_str; D_ALLOC_ARRAY(resp.tier_stats, DAOS_MEDIA_MAX); - if (resp.tier_stats == NULL) { - D_GOTO(out_ranges, rc = -DER_NOMEM); - } + if (resp.tier_stats == NULL) + D_GOTO(out_suspect, rc = -DER_NOMEM); storage_usage_stats_from_pool_space(&scm, &pool_info.pi_space, DAOS_MEDIA_SCM); @@ -1828,6 +1843,9 @@ ds_mgmt_drpc_pool_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) pool_rebuild_status_from_info(&rebuild, &pool_info.pi_rebuild_st); resp.rebuild = &rebuild; +out_suspect: + d_rank_range_list_free(range_list1); + d_rank_list_free(suspect_ranks); out_ranges: d_rank_range_list_free(range_list); out_ranks: @@ -1848,6 +1866,7 @@ ds_mgmt_drpc_pool_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) } D_FREE(range_list_str); + D_FREE(suspect_ranks_str); mgmt__pool_query_req__free_unpacked(req, &alloc.alloc); diff --git a/src/mgmt/srv_internal.h b/src/mgmt/srv_internal.h index 88f7e9010080..e843218922aa 100644 --- a/src/mgmt/srv_internal.h +++ b/src/mgmt/srv_internal.h @@ -114,8 +114,8 @@ int ds_mgmt_pool_list_cont(uuid_t uuid, d_rank_list_t *svc_ranks, struct daos_pool_cont_info **containers, uint64_t *ncontainers); int ds_mgmt_pool_query(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_list_t **ranks, - daos_pool_info_t *pool_info, uint32_t *pool_layout_ver, - uint32_t *upgrade_layout_ver); + d_rank_list_t **suspect_ranks, daos_pool_info_t *pool_info, + uint32_t *pool_layout_ver, uint32_t *upgrade_layout_ver); int ds_mgmt_pool_query_targets(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_t rank, d_rank_list_t *tgts, daos_target_info_t **infos); diff --git a/src/mgmt/srv_pool.c b/src/mgmt/srv_pool.c index c1eb018cdb1b..da6f06cb5bf3 100644 --- a/src/mgmt/srv_pool.c +++ b/src/mgmt/srv_pool.c @@ -406,8 +406,8 @@ ds_mgmt_pool_list_cont(uuid_t uuid, d_rank_list_t *svc_ranks, */ int ds_mgmt_pool_query(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_list_t **ranks, - daos_pool_info_t *pool_info, uint32_t *pool_layout_ver, - uint32_t *upgrade_layout_ver) + d_rank_list_t **suspect_ranks, daos_pool_info_t *pool_info, + uint32_t *pool_layout_ver, uint32_t *upgrade_layout_ver) { if (pool_info == NULL) { D_ERROR("pool_info was NULL\n"); @@ -416,8 +416,8 @@ ds_mgmt_pool_query(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_list_t **r D_DEBUG(DB_MGMT, "Querying pool "DF_UUID"\n", DP_UUID(pool_uuid)); - return dsc_pool_svc_query(pool_uuid, svc_ranks, mgmt_ps_call_deadline(), ranks, pool_info, - pool_layout_ver, upgrade_layout_ver); + return dsc_pool_svc_query(pool_uuid, svc_ranks, mgmt_ps_call_deadline(), ranks, + suspect_ranks, pool_info, pool_layout_ver, upgrade_layout_ver); } /** diff --git a/src/mgmt/svc.pb-c.c b/src/mgmt/svc.pb-c.c index 94bb6cf59649..1b7a406c46e3 100644 --- a/src/mgmt/svc.pb-c.c +++ b/src/mgmt/svc.pb-c.c @@ -418,43 +418,50 @@ void mgmt__client_net_hint__free_unpacked assert(message->base.descriptor == &mgmt__client_net_hint__descriptor); protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } -void -mgmt__build_info__init(Mgmt__BuildInfo *message) +void mgmt__build_info__init + (Mgmt__BuildInfo *message) { static const Mgmt__BuildInfo init_value = MGMT__BUILD_INFO__INIT; - *message = init_value; + *message = init_value; } -size_t -mgmt__build_info__get_packed_size(const Mgmt__BuildInfo *message) +size_t mgmt__build_info__get_packed_size + (const Mgmt__BuildInfo *message) { assert(message->base.descriptor == &mgmt__build_info__descriptor); - return protobuf_c_message_get_packed_size((const ProtobufCMessage *)(message)); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); } -size_t -mgmt__build_info__pack(const Mgmt__BuildInfo *message, uint8_t *out) +size_t mgmt__build_info__pack + (const Mgmt__BuildInfo *message, + uint8_t *out) { assert(message->base.descriptor == &mgmt__build_info__descriptor); - return protobuf_c_message_pack((const ProtobufCMessage *)message, out); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); } -size_t -mgmt__build_info__pack_to_buffer(const Mgmt__BuildInfo *message, ProtobufCBuffer *buffer) +size_t mgmt__build_info__pack_to_buffer + (const Mgmt__BuildInfo *message, + ProtobufCBuffer *buffer) { assert(message->base.descriptor == &mgmt__build_info__descriptor); - return protobuf_c_message_pack_to_buffer((const ProtobufCMessage *)message, buffer); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); } Mgmt__BuildInfo * -mgmt__build_info__unpack(ProtobufCAllocator *allocator, size_t len, const uint8_t *data) + mgmt__build_info__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) { - return (Mgmt__BuildInfo *)protobuf_c_message_unpack(&mgmt__build_info__descriptor, allocator, len, - data); + return (Mgmt__BuildInfo *) + protobuf_c_message_unpack (&mgmt__build_info__descriptor, + allocator, len, data); } -void -mgmt__build_info__free_unpacked(Mgmt__BuildInfo *message, ProtobufCAllocator *allocator) +void mgmt__build_info__free_unpacked + (Mgmt__BuildInfo *message, + ProtobufCAllocator *allocator) { - if (!message) + if(!message) return; assert(message->base.descriptor == &mgmt__build_info__descriptor); - protobuf_c_message_free_unpacked((ProtobufCMessage *)message, allocator); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } void mgmt__get_attach_info_resp__rank_uri__init (Mgmt__GetAttachInfoResp__RankUri *message) @@ -1604,51 +1611,82 @@ const ProtobufCMessageDescriptor mgmt__client_net_hint__descriptor = (ProtobufCMessageInit) mgmt__client_net_hint__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor mgmt__build_info__field_descriptors[4] = { - { - "major", 1, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_UINT32, 0, /* quantifier_offset */ - offsetof(Mgmt__BuildInfo, major), NULL, NULL, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, - { - "minor", 2, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_UINT32, 0, /* quantifier_offset */ - offsetof(Mgmt__BuildInfo, minor), NULL, NULL, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, - { - "patch", 3, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_UINT32, 0, /* quantifier_offset */ - offsetof(Mgmt__BuildInfo, patch), NULL, NULL, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, - { - "tag", 4, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_STRING, 0, /* quantifier_offset */ - offsetof(Mgmt__BuildInfo, tag), NULL, &protobuf_c_empty_string, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, -}; -static const unsigned mgmt__build_info__field_indices_by_name[] = { - 0, /* field[0] = major */ - 1, /* field[1] = minor */ - 2, /* field[2] = patch */ - 3, /* field[3] = tag */ -}; -static const ProtobufCIntRange mgmt__build_info__number_ranges[1 + 1] = {{1, 0}, {0, 4}}; -const ProtobufCMessageDescriptor mgmt__build_info__descriptor = { - PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, - "mgmt.BuildInfo", - "BuildInfo", - "Mgmt__BuildInfo", - "mgmt", - sizeof(Mgmt__BuildInfo), - 4, - mgmt__build_info__field_descriptors, - mgmt__build_info__field_indices_by_name, +static const ProtobufCFieldDescriptor mgmt__build_info__field_descriptors[4] = +{ + { + "major", 1, - mgmt__build_info__number_ranges, - (ProtobufCMessageInit)mgmt__build_info__init, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__BuildInfo, major), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "minor", + 2, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__BuildInfo, minor), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "patch", + 3, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__BuildInfo, patch), NULL, NULL, - NULL /* reserved[123] */ + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "tag", + 4, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Mgmt__BuildInfo, tag), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__build_info__field_indices_by_name[] = { + 0, /* field[0] = major */ + 1, /* field[1] = minor */ + 2, /* field[2] = patch */ + 3, /* field[3] = tag */ +}; +static const ProtobufCIntRange mgmt__build_info__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 4 } +}; +const ProtobufCMessageDescriptor mgmt__build_info__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.BuildInfo", + "BuildInfo", + "Mgmt__BuildInfo", + "mgmt", + sizeof(Mgmt__BuildInfo), + 4, + mgmt__build_info__field_descriptors, + mgmt__build_info__field_indices_by_name, + 1, mgmt__build_info__number_ranges, + (ProtobufCMessageInit) mgmt__build_info__init, + NULL,NULL,NULL /* reserved[123] */ }; static const ProtobufCFieldDescriptor mgmt__get_attach_info_resp__rank_uri__field_descriptors[4] = { @@ -1727,91 +1765,147 @@ const ProtobufCMessageDescriptor mgmt__get_attach_info_resp__rank_uri__descripto (ProtobufCMessageInit) mgmt__get_attach_info_resp__rank_uri__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor mgmt__get_attach_info_resp__field_descriptors[9] = { - { - "status", 1, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_INT32, 0, /* quantifier_offset */ - offsetof(Mgmt__GetAttachInfoResp, status), NULL, NULL, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, - { - "rank_uris", 2, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE, - offsetof(Mgmt__GetAttachInfoResp, n_rank_uris), - offsetof(Mgmt__GetAttachInfoResp, rank_uris), - &mgmt__get_attach_info_resp__rank_uri__descriptor, NULL, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, - { - "ms_ranks", 3, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_UINT32, - offsetof(Mgmt__GetAttachInfoResp, n_ms_ranks), offsetof(Mgmt__GetAttachInfoResp, ms_ranks), - NULL, NULL, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, - { - "client_net_hint", 4, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_MESSAGE, - 0, /* quantifier_offset */ - offsetof(Mgmt__GetAttachInfoResp, client_net_hint), &mgmt__client_net_hint__descriptor, - NULL, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, - { - "data_version", 5, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_UINT64, 0, /* quantifier_offset */ - offsetof(Mgmt__GetAttachInfoResp, data_version), NULL, NULL, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, - { - "sys", 6, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_STRING, 0, /* quantifier_offset */ - offsetof(Mgmt__GetAttachInfoResp, sys), NULL, &protobuf_c_empty_string, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, - { - "secondary_rank_uris", 7, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE, - offsetof(Mgmt__GetAttachInfoResp, n_secondary_rank_uris), - offsetof(Mgmt__GetAttachInfoResp, secondary_rank_uris), - &mgmt__get_attach_info_resp__rank_uri__descriptor, NULL, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, - { - "secondary_client_net_hints", 8, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE, - offsetof(Mgmt__GetAttachInfoResp, n_secondary_client_net_hints), - offsetof(Mgmt__GetAttachInfoResp, secondary_client_net_hints), - &mgmt__client_net_hint__descriptor, NULL, 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, - { - "build_info", 9, PROTOBUF_C_LABEL_NONE, PROTOBUF_C_TYPE_MESSAGE, 0, /* quantifier_offset */ - offsetof(Mgmt__GetAttachInfoResp, build_info), &mgmt__build_info__descriptor, NULL, - 0, /* flags */ - 0, NULL, NULL /* reserved1,reserved2, etc */ - }, -}; -static const unsigned mgmt__get_attach_info_resp__field_indices_by_name[] = { - 8, /* field[8] = build_info */ - 3, /* field[3] = client_net_hint */ - 4, /* field[4] = data_version */ - 2, /* field[2] = ms_ranks */ - 1, /* field[1] = rank_uris */ - 7, /* field[7] = secondary_client_net_hints */ - 6, /* field[6] = secondary_rank_uris */ - 0, /* field[0] = status */ - 5, /* field[5] = sys */ -}; -static const ProtobufCIntRange mgmt__get_attach_info_resp__number_ranges[1 + 1] = {{1, 0}, {0, 9}}; -const ProtobufCMessageDescriptor mgmt__get_attach_info_resp__descriptor = { - PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, - "mgmt.GetAttachInfoResp", - "GetAttachInfoResp", - "Mgmt__GetAttachInfoResp", - "mgmt", - sizeof(Mgmt__GetAttachInfoResp), - 9, - mgmt__get_attach_info_resp__field_descriptors, - mgmt__get_attach_info_resp__field_indices_by_name, +static const ProtobufCFieldDescriptor mgmt__get_attach_info_resp__field_descriptors[9] = +{ + { + "status", 1, - mgmt__get_attach_info_resp__number_ranges, - (ProtobufCMessageInit)mgmt__get_attach_info_resp__init, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_INT32, + 0, /* quantifier_offset */ + offsetof(Mgmt__GetAttachInfoResp, status), NULL, NULL, - NULL /* reserved[123] */ + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "rank_uris", + 2, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Mgmt__GetAttachInfoResp, n_rank_uris), + offsetof(Mgmt__GetAttachInfoResp, rank_uris), + &mgmt__get_attach_info_resp__rank_uri__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "ms_ranks", + 3, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_UINT32, + offsetof(Mgmt__GetAttachInfoResp, n_ms_ranks), + offsetof(Mgmt__GetAttachInfoResp, ms_ranks), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "client_net_hint", + 4, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_MESSAGE, + 0, /* quantifier_offset */ + offsetof(Mgmt__GetAttachInfoResp, client_net_hint), + &mgmt__client_net_hint__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "data_version", + 5, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_UINT64, + 0, /* quantifier_offset */ + offsetof(Mgmt__GetAttachInfoResp, data_version), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "sys", + 6, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Mgmt__GetAttachInfoResp, sys), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "secondary_rank_uris", + 7, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Mgmt__GetAttachInfoResp, n_secondary_rank_uris), + offsetof(Mgmt__GetAttachInfoResp, secondary_rank_uris), + &mgmt__get_attach_info_resp__rank_uri__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "secondary_client_net_hints", + 8, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Mgmt__GetAttachInfoResp, n_secondary_client_net_hints), + offsetof(Mgmt__GetAttachInfoResp, secondary_client_net_hints), + &mgmt__client_net_hint__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "build_info", + 9, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_MESSAGE, + 0, /* quantifier_offset */ + offsetof(Mgmt__GetAttachInfoResp, build_info), + &mgmt__build_info__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned mgmt__get_attach_info_resp__field_indices_by_name[] = { + 8, /* field[8] = build_info */ + 3, /* field[3] = client_net_hint */ + 4, /* field[4] = data_version */ + 2, /* field[2] = ms_ranks */ + 1, /* field[1] = rank_uris */ + 7, /* field[7] = secondary_client_net_hints */ + 6, /* field[6] = secondary_rank_uris */ + 0, /* field[0] = status */ + 5, /* field[5] = sys */ +}; +static const ProtobufCIntRange mgmt__get_attach_info_resp__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 9 } +}; +const ProtobufCMessageDescriptor mgmt__get_attach_info_resp__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "mgmt.GetAttachInfoResp", + "GetAttachInfoResp", + "Mgmt__GetAttachInfoResp", + "mgmt", + sizeof(Mgmt__GetAttachInfoResp), + 9, + mgmt__get_attach_info_resp__field_descriptors, + mgmt__get_attach_info_resp__field_indices_by_name, + 1, mgmt__get_attach_info_resp__number_ranges, + (ProtobufCMessageInit) mgmt__get_attach_info_resp__init, + NULL,NULL,NULL /* reserved[123] */ }; static const ProtobufCFieldDescriptor mgmt__prep_shutdown_req__field_descriptors[1] = { diff --git a/src/mgmt/svc.pb-c.h b/src/mgmt/svc.pb-c.h index 3405ed7c9556..82610110e5a6 100644 --- a/src/mgmt/svc.pb-c.h +++ b/src/mgmt/svc.pb-c.h @@ -25,7 +25,7 @@ typedef struct _Mgmt__LeaderQueryReq Mgmt__LeaderQueryReq; typedef struct _Mgmt__LeaderQueryResp Mgmt__LeaderQueryResp; typedef struct _Mgmt__GetAttachInfoReq Mgmt__GetAttachInfoReq; typedef struct _Mgmt__ClientNetHint Mgmt__ClientNetHint; -typedef struct _Mgmt__BuildInfo Mgmt__BuildInfo; +typedef struct _Mgmt__BuildInfo Mgmt__BuildInfo; typedef struct _Mgmt__GetAttachInfoResp Mgmt__GetAttachInfoResp; typedef struct _Mgmt__GetAttachInfoResp__RankUri Mgmt__GetAttachInfoResp__RankUri; typedef struct _Mgmt__PrepShutdownReq Mgmt__PrepShutdownReq; @@ -301,18 +301,19 @@ struct _Mgmt__ClientNetHint { PROTOBUF_C_MESSAGE_INIT (&mgmt__client_net_hint__descriptor) \ , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, 0, 0, 0,NULL, 0 } -struct _Mgmt__BuildInfo { + +struct _Mgmt__BuildInfo +{ ProtobufCMessage base; - uint32_t major; - uint32_t minor; - uint32_t patch; - char *tag; + uint32_t major; + uint32_t minor; + uint32_t patch; + char *tag; }; -#define MGMT__BUILD_INFO__INIT \ - { \ - PROTOBUF_C_MESSAGE_INIT(&mgmt__build_info__descriptor) \ - , 0, 0, 0, (char *)protobuf_c_empty_string \ - } +#define MGMT__BUILD_INFO__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__build_info__descriptor) \ + , 0, 0, 0, (char *)protobuf_c_empty_string } + struct _Mgmt__GetAttachInfoResp__RankUri { @@ -373,13 +374,12 @@ struct _Mgmt__GetAttachInfoResp /* * Structured server build information */ - Mgmt__BuildInfo *build_info; + Mgmt__BuildInfo *build_info; }; -#define MGMT__GET_ATTACH_INFO_RESP__INIT \ - { \ - PROTOBUF_C_MESSAGE_INIT(&mgmt__get_attach_info_resp__descriptor) \ - , 0, 0, NULL, 0, NULL, NULL, 0, (char *)protobuf_c_empty_string, 0, NULL, 0, NULL, NULL \ - } +#define MGMT__GET_ATTACH_INFO_RESP__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&mgmt__get_attach_info_resp__descriptor) \ + , 0, 0,NULL, 0,NULL, NULL, 0, (char *)protobuf_c_empty_string, 0,NULL, 0,NULL, NULL } + struct _Mgmt__PrepShutdownReq { @@ -662,18 +662,24 @@ void mgmt__client_net_hint__free_unpacked (Mgmt__ClientNetHint *message, ProtobufCAllocator *allocator); /* Mgmt__BuildInfo methods */ -void -mgmt__build_info__init(Mgmt__BuildInfo *message); -size_t -mgmt__build_info__get_packed_size(const Mgmt__BuildInfo *message); -size_t -mgmt__build_info__pack(const Mgmt__BuildInfo *message, uint8_t *out); -size_t -mgmt__build_info__pack_to_buffer(const Mgmt__BuildInfo *message, ProtobufCBuffer *buffer); +void mgmt__build_info__init + (Mgmt__BuildInfo *message); +size_t mgmt__build_info__get_packed_size + (const Mgmt__BuildInfo *message); +size_t mgmt__build_info__pack + (const Mgmt__BuildInfo *message, + uint8_t *out); +size_t mgmt__build_info__pack_to_buffer + (const Mgmt__BuildInfo *message, + ProtobufCBuffer *buffer); Mgmt__BuildInfo * -mgmt__build_info__unpack(ProtobufCAllocator *allocator, size_t len, const uint8_t *data); -void - mgmt__build_info__free_unpacked(Mgmt__BuildInfo *message, ProtobufCAllocator *allocator); + mgmt__build_info__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void mgmt__build_info__free_unpacked + (Mgmt__BuildInfo *message, + ProtobufCAllocator *allocator); /* Mgmt__GetAttachInfoResp__RankUri methods */ void mgmt__get_attach_info_resp__rank_uri__init (Mgmt__GetAttachInfoResp__RankUri *message); @@ -842,7 +848,9 @@ typedef void (*Mgmt__GetAttachInfoReq_Closure) typedef void (*Mgmt__ClientNetHint_Closure) (const Mgmt__ClientNetHint *message, void *closure_data); -typedef void (*Mgmt__BuildInfo_Closure)(const Mgmt__BuildInfo *message, void *closure_data); +typedef void (*Mgmt__BuildInfo_Closure) + (const Mgmt__BuildInfo *message, + void *closure_data); typedef void (*Mgmt__GetAttachInfoResp__RankUri_Closure) (const Mgmt__GetAttachInfoResp__RankUri *message, void *closure_data); diff --git a/src/mgmt/tests/mocks.c b/src/mgmt/tests/mocks.c index eace651e60b6..42803b0d52bc 100644 --- a/src/mgmt/tests/mocks.c +++ b/src/mgmt/tests/mocks.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2019-2023 Intel Corporation. + * (C) Copyright 2019-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -280,11 +280,12 @@ daos_pool_info_t ds_mgmt_pool_query_info_out; daos_pool_info_t ds_mgmt_pool_query_info_in; void *ds_mgmt_pool_query_info_ptr; d_rank_list_t *ds_mgmt_pool_query_ranks_out; +d_rank_list_t *ds_mgmt_pool_query_suspect_ranks_out; int ds_mgmt_pool_query(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_list_t **ranks, - daos_pool_info_t *pool_info, uint32_t *pool_layout_ver, - uint32_t *upgrade_layout_ver) + d_rank_list_t **suspect_ranks, daos_pool_info_t *pool_info, + uint32_t *pool_layout_ver, uint32_t *upgrade_layout_ver) { /* If function is to return with an error, pool_info and ranks will not be filled. */ if (ds_mgmt_pool_query_return != 0) @@ -300,6 +301,13 @@ ds_mgmt_pool_query(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_list_t **r *ranks = d_rank_list_alloc(8); /* 0-7 ; caller must free this */ ds_mgmt_pool_query_ranks_out = *ranks; } + if ((pool_info->pi_bits & DPI_ENGINES_SUSPECT) != 0) { + D_ASSERT(suspect_ranks != NULL); + + *suspect_ranks = d_rank_list_alloc(2); /* 0-1 ; caller must free this */ + ds_mgmt_pool_query_suspect_ranks_out = *suspect_ranks; + } + return ds_mgmt_pool_query_return; /* 0 */ } @@ -311,6 +319,7 @@ mock_ds_mgmt_pool_query_setup(void) ds_mgmt_pool_query_info_ptr = NULL; memset(&ds_mgmt_pool_query_info_out, 0, sizeof(daos_pool_info_t)); ds_mgmt_pool_query_ranks_out = NULL; + ds_mgmt_pool_query_suspect_ranks_out = NULL; } int ds_mgmt_pool_query_targets_return; diff --git a/src/mgmt/tests/mocks.h b/src/mgmt/tests/mocks.h index 4f4ddbc9522b..53f6af95e56e 100644 --- a/src/mgmt/tests/mocks.h +++ b/src/mgmt/tests/mocks.h @@ -1,5 +1,5 @@ /* - * (C) Copyright 2019-2022 Intel Corporation. + * (C) Copyright 2019-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -109,6 +109,7 @@ extern daos_pool_info_t ds_mgmt_pool_query_info_in; extern daos_pool_info_t ds_mgmt_pool_query_info_out; extern void *ds_mgmt_pool_query_info_ptr; extern d_rank_list_t *ds_mgmt_pool_query_ranks_out; +extern d_rank_list_t *ds_mgmt_pool_query_suspect_ranks_out; void mock_ds_mgmt_pool_query_setup(void); /* diff --git a/src/mgmt/tests/srv_drpc_tests.c b/src/mgmt/tests/srv_drpc_tests.c index 599eb8db77e0..5d22f34deb2a 100644 --- a/src/mgmt/tests/srv_drpc_tests.c +++ b/src/mgmt/tests/srv_drpc_tests.c @@ -1429,7 +1429,8 @@ test_drpc_pool_query_success(void **state) init_test_rebuild_status(&exp_info.pi_rebuild_st); ds_mgmt_pool_query_info_out = exp_info; - setup_pool_query_drpc_call(&call, TEST_UUID, DPI_ENGINES_ENABLED); + setup_pool_query_drpc_call(&call, TEST_UUID, + DPI_ENGINES_ENABLED | DPI_ENGINES_SUSPECT); ds_mgmt_drpc_pool_query(&call, &resp); @@ -1439,8 +1440,9 @@ test_drpc_pool_query_success(void **state) assert_int_equal(uuid_compare(exp_uuid, ds_mgmt_pool_query_uuid), 0); assert_non_null(ds_mgmt_pool_query_info_ptr); assert_non_null(ds_mgmt_pool_query_ranks_out); - assert_int_equal(ds_mgmt_pool_query_info_in.pi_bits, - DEFAULT_QUERY_BITS | DPI_ENGINES_ENABLED); + assert_non_null(ds_mgmt_pool_query_suspect_ranks_out); + uint64_t flags = DPI_ENGINES_ENABLED | DPI_ENGINES_SUSPECT; + assert_int_equal(ds_mgmt_pool_query_info_in.pi_bits, flags | DEFAULT_QUERY_BITS); expect_query_resp_with_info(&exp_info, MGMT__POOL_REBUILD_STATUS__STATE__IDLE, diff --git a/src/pool/srv_cli.c b/src/pool/srv_cli.c index 1decb57b3ec5..855900c0c9de 100644 --- a/src/pool/srv_cli.c +++ b/src/pool/srv_cli.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2017-2023 Intel Corporation. + * (C) Copyright 2017-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -336,6 +336,7 @@ dsc_pool_svc_call(uuid_t uuid, d_rank_list_t *ranks, struct dsc_pool_svc_call_cb struct pool_query_arg { d_rank_list_t **pqa_ranks; + d_rank_list_t **pqa_suspect_ranks; daos_pool_info_t *pqa_info; uint32_t *pqa_layout_ver; uint32_t *pqa_upgrade_layout_ver; @@ -367,13 +368,63 @@ pool_query_init(uuid_t pool_uuid, crt_rpc_t *rpc, void *varg) } static int -process_query_result(d_rank_list_t **ranks, daos_pool_info_t *info, uuid_t pool_uuid, +pool_map_get_suspect_ranks(struct pool_map *map, d_rank_list_t **ranks) +{ + crt_group_t *primary_grp; + struct pool_domain *doms; + int doms_cnt; + int i; + int rc = 0; + d_rank_list_t *rank_list = NULL; + + doms_cnt = pool_map_find_ranks(map, PO_COMP_ID_ALL, &doms); + D_ASSERT(doms_cnt >= 0); + primary_grp = crt_group_lookup(NULL); + D_ASSERT(primary_grp != NULL); + + rank_list = d_rank_list_alloc(0); + if (!rank_list) + return -DER_NOMEM; + + for (i = 0; i < doms_cnt; i++) { + struct swim_member_state state; + + if (!(doms[i].do_comp.co_status & PO_COMP_ST_UPIN)) + continue; + + rc = crt_rank_state_get(primary_grp, doms[i].do_comp.co_rank, &state); + if (rc != 0 && rc != -DER_NONEXIST) { + D_ERROR("failed to get status of rank %u: %d\n", doms[i].do_comp.co_rank, + rc); + break; + } + + D_DEBUG(DB_MD, "rank/state %d/%d\n", doms[i].do_comp.co_rank, + rc == -DER_NONEXIST ? -1 : state.sms_status); + if (rc == -DER_NONEXIST || state.sms_status == SWIM_MEMBER_DEAD) { + rc = d_rank_list_append(rank_list, doms[i].do_comp.co_rank); + if (rc) + D_GOTO(err, rc); + } + } +err: + if (rc == 0) + *ranks = rank_list; + else + d_rank_list_free(rank_list); + return rc; +} + +static int +process_query_result(d_rank_list_t **ranks, d_rank_list_t **suspect_ranks, + daos_pool_info_t *info, uuid_t pool_uuid, uint32_t map_version, uint32_t leader_rank, struct daos_pool_space *ps, struct daos_rebuild_status *rs, struct pool_buf *map_buf) { struct pool_map *map; int rc; unsigned int num_disabled = 0; + d_rank_list_t *suspect_rank_list = NULL; rc = pool_map_create(map_buf, map_version, &map); if (rc != 0) { @@ -402,10 +453,30 @@ process_query_result(d_rank_list_t **ranks, daos_pool_info_t *info, uuid_t pool_ D_DEBUG(DB_MD, DF_UUID": found %u %s ranks in pool map\n", DP_UUID(pool_uuid), (*ranks)->rl_nr, get_enabled ? "ENABLED" : "DISABLED"); } + if (info && (info->pi_bits & DPI_ENGINES_SUSPECT) != 0) { + if (suspect_ranks == NULL) { + DL_ERROR(-DER_INVAL, + DF_UUID ": query pool requested suspect ranks, but ptr is NULL", + DP_UUID(pool_uuid)); + D_GOTO(out, rc = -DER_INVAL); + } + + rc = pool_map_get_suspect_ranks(map, &suspect_rank_list); + if (rc != 0) { + DL_ERROR(rc, DF_UUID ": pool_map_get_ranks() failed", DP_UUID(pool_uuid)); + D_GOTO(out, rc); + } + D_DEBUG(DB_MD, DF_UUID ": found %" PRIu32 " suspect ranks in pool map\n", + DP_UUID(pool_uuid), suspect_rank_list->rl_nr); + } pool_query_reply_to_info(pool_uuid, map_buf, map_version, leader_rank, ps, rs, info); out: + if (rc == 0 && suspect_rank_list != NULL) + *suspect_ranks = suspect_rank_list; + else if (rc) + d_rank_list_free(suspect_rank_list); pool_map_decref(map); return rc; } @@ -432,8 +503,8 @@ pool_query_consume(uuid_t pool_uuid, crt_rpc_t *rpc, void *varg) D_DEBUG(DB_MGMT, DF_UUID": Successfully queried pool\n", DP_UUID(pool_uuid)); - rc = process_query_result(arg->pqa_ranks, arg->pqa_info, pool_uuid, - out->pqo_op.po_map_version, out->pqo_op.po_hint.sh_rank, + rc = process_query_result(arg->pqa_ranks, arg->pqa_suspect_ranks, arg->pqa_info, + pool_uuid, out->pqo_op.po_map_version, out->pqo_op.po_hint.sh_rank, &out->pqo_space, &out->pqo_rebuild_st, arg->pqa_map_buf); if (arg->pqa_layout_ver) *arg->pqa_layout_ver = out->pqo_pool_layout_ver; @@ -474,6 +545,8 @@ static struct dsc_pool_svc_call_cbs pool_query_cbs = { * targets according to #pi_bits (DPI_ENGINES_ENABLED bit). * Note: ranks may be empty (i.e., *ranks->rl_nr may be 0). * The caller must free the list with d_rank_list_free(). + * \param[out suspect_ranks Optional, suspect ranks marked as DEAD by the SWIM + * protocol, but were not excluded from the system. * \param[out] pool_info Results of the pool query * \param[out] pool_layout_ver Results of the current pool global version * \param[out] pool_upgrade_layout_ver Results of the target latest pool global version @@ -484,11 +557,13 @@ static struct dsc_pool_svc_call_cbs pool_query_cbs = { */ int dsc_pool_svc_query(uuid_t pool_uuid, d_rank_list_t *ps_ranks, uint64_t deadline, - d_rank_list_t **ranks, daos_pool_info_t *pool_info, uint32_t *pool_layout_ver, + d_rank_list_t **ranks, d_rank_list_t **suspect_ranks, + daos_pool_info_t *pool_info, uint32_t *pool_layout_ver, uint32_t *upgrade_layout_ver) { struct pool_query_arg arg = { .pqa_ranks = ranks, + .pqa_suspect_ranks = suspect_ranks, .pqa_info = pool_info, .pqa_layout_ver = pool_layout_ver, .pqa_upgrade_layout_ver = upgrade_layout_ver, diff --git a/src/proto/mgmt/pool.proto b/src/proto/mgmt/pool.proto index d54ceca42802..8b6361f594d8 100644 --- a/src/proto/mgmt/pool.proto +++ b/src/proto/mgmt/pool.proto @@ -238,6 +238,7 @@ message PoolQueryResp { uint32 svc_ldr = 18; // current raft leader (2.6+) repeated uint32 svc_reps = 19; // service replica ranks uint64 query_mask = 20; // Bitmask of pool query options used + string suspect_ranks = 21; // optional set of suspect ranks } message PoolProperty { diff --git a/src/tests/ftest/control/dmg_pool_query_ranks.py b/src/tests/ftest/control/dmg_pool_query_ranks.py index cb23d2c5af13..f9f1da775a90 100644 --- a/src/tests/ftest/control/dmg_pool_query_ranks.py +++ b/src/tests/ftest/control/dmg_pool_query_ranks.py @@ -38,38 +38,24 @@ def test_pool_query_ranks_basic(self): """ self.log.info("Basic tests of pool query with ranks state") - self.log.debug("Checking without ranks state information") + self.log_step("Checking pool query without ranks state information") data = self.dmg.pool_query(self.pool.identifier) - self.assertIsNone( - data['response'].get('enabled_ranks'), - "Invalid enabled_ranks field: want=None, got={}".format( - data['response'].get('enabled_ranks'))) - self.assertIsNone( - data['response'].get('disabled_ranks'), - "Invalid disabled_ranks field: want=None, got={}".format( - data['response'].get('disabled_ranks'))) - - self.log.debug("Checking enabled ranks state information") + + self._verify_ranks(None, data, "enabled_ranks") + self._verify_ranks(None, data, "disabled_ranks") + + self.log_step("Checking pool query with enabled ranks state information") data = self.dmg.pool_query(self.pool.identifier, show_enabled=True) - self.assertListEqual( - data['response'].get('enabled_ranks'), [0, 1, 2], - "Invalid enabled_ranks field: want=[0, 1, 2], got={}".format( - data['response'].get('enabled_ranks'))) - self.assertIsNone( - data['response'].get('disabled_ranks'), - "Invalid disabled_ranks field: want=None, got={}".format( - data['response'].get('disabled_ranks'))) - - self.log.debug("Checking disabled ranks state information") + self._verify_ranks([0, 1, 2, 3, 4], data, "enabled_ranks") + self._verify_ranks(None, data, "disabled_ranks") + + self.log_step("Checking pool query with suspect ranks state information") + data = self.dmg.pool_query(self.pool.identifier, health_only=True) + self._verify_ranks([], data, "suspect_ranks") + data = self.dmg.pool_query(self.pool.identifier, show_disabled=True) - self.assertIsNone( - data['response'].get('enabled_ranks'), - "Invalid enabled_ranks field: want=None, got={}".format( - data['response'].get('enabled_ranks'))) - self.assertListEqual( - data['response'].get('disabled_ranks'), [], - "Invalid disabled_ranks field: want=[], got={}".format( - data['response'].get('disabled_ranks'))) + self._verify_ranks(None, data, "enabled_ranks") + self._verify_ranks([], data, "disabled_ranks") def test_pool_query_ranks_error(self): """Test that ranks state option are mutually exclusive. @@ -97,9 +83,10 @@ def test_pool_query_ranks_mgmt(self): """Test the state of ranks after excluding and reintegrate them. Test Description: - Create a pool with some engines exclude them one by one and check the consistency of the - list of enabled and disabled ranks. Then, reintegrate them and check the consistency of - the list of enabled and disabled ranks. + Create a pool with 5 engines, first excluded engine marked as "Disabled" + second stopped one as “Suspect,” restarting it, ensuring rebuild completes, + clearing the “Suspect” status, reintegrating the excluded first engine, and + finally verifying that all engines are enabled with the excluded rank now empty. :avocado: tags=all,daily_regression :avocado: tags=vm @@ -113,64 +100,73 @@ def test_pool_query_ranks_mgmt(self): all_ranks = enabled_ranks.copy() self.random.shuffle(all_ranks) - self.log.info("Starting excluding ranks: all_ranks=%s", all_ranks) - for rank in all_ranks: - self.log.debug("Excluding rank %d", rank) - self.pool.exclude([rank]) - enabled_ranks.remove(rank) - disabled_ranks = sorted(disabled_ranks + [rank]) - - self.log.debug("Checking enabled ranks state information") - data = self.dmg.pool_query(self.pool.identifier, show_enabled=True) - self.assertListEqual( - data['response'].get('enabled_ranks'), enabled_ranks, - "Invalid enabled_ranks field: want={}, got={}".format( - enabled_ranks, data['response'].get('enabled_ranks'))) + exclude_rank = all_ranks[0] + suspect_rank = all_ranks[1] + self.log_step(f"Excluding pool rank:{exclude_rank} all_ranks={all_ranks}") + self.pool.exclude([exclude_rank]) + enabled_ranks.remove(exclude_rank) + disabled_ranks = sorted(disabled_ranks + [exclude_rank]) + + self.log_step("Checking enabled ranks state information") + data = self.dmg.pool_query(self.pool.identifier, show_enabled=True) + self._verify_ranks(enabled_ranks, data, "enabled_ranks") - self.log.debug("Checking disabled ranks state information") - data = self.dmg.pool_query(self.pool.identifier, show_disabled=True) - self.assertListEqual( - data['response'].get('disabled_ranks'), disabled_ranks, - "Invalid disabled_ranks field: want={}, got={}".format( - disabled_ranks, data['response'].get('disabled_ranks'))) + self.log_step(f"Waiting for rebuild to start after excluding pool rank {exclude_rank}") + self.pool.wait_for_rebuild_to_start() - self.log.debug("Waiting for pool to be rebuild") - self.pool.wait_for_rebuild_to_start() - self.pool.wait_for_rebuild_to_end() + # kill second rank. + self.log_step(f"Stopping rank:{suspect_rank} all_ranks={all_ranks}") + self.server_managers[0].stop_ranks([suspect_rank], self.d_log) - self.random.shuffle(all_ranks) - self.log.info("Starting reintegrating ranks: all_ranks=%s", all_ranks) - for rank in all_ranks: - self.log.debug("Reintegrating rank %d", rank) - - cmd_succeed = False - for _ in range(3): - try: - result = self.pool.reintegrate(rank) - cmd_succeed = True - break - except CommandFailure: - self.log.debug("dmg command failed retry") - time.sleep(3) - - self.assertTrue(cmd_succeed, "pool reintegrate failed: {}".format(result)) - enabled_ranks = sorted(enabled_ranks + [rank]) - disabled_ranks.remove(rank) - - self.log.debug("Checking enabled ranks state information") - data = self.dmg.pool_query(self.pool.identifier, show_enabled=True) - self.assertListEqual( - data['response'].get('enabled_ranks'), enabled_ranks, - "Invalid enabled_ranks field: want={}, got={}".format( - enabled_ranks, data['response'].get('enabled_ranks'))) + self.log_step(f"Waiting for pool rank {suspect_rank} to be suspected") + self.pool.wait_pool_suspect_ranks([suspect_rank], timeout=30) + data = self.dmg.pool_query(self.pool.identifier, show_disabled=True) + self._verify_ranks([disabled_ranks], data, "disabled_ranks") - self.log.debug("Checking disabled ranks state information") - data = self.dmg.pool_query(self.pool.identifier, show_disabled=True) - self.assertListEqual( - data['response'].get('disabled_ranks'), disabled_ranks, - "Invalid disabled_ranks field: want={}, got={}".format( - disabled_ranks, data['response'].get('disabled_ranks'))) + self.log_step(f"Starting rank {suspect_rank}") + self.server_managers[0].start_ranks([suspect_rank], self.d_log) - self.log.debug("Waiting for pool to be rebuild") - self.pool.wait_for_rebuild_to_start() - self.pool.wait_for_rebuild_to_end() + self.log_step("Waiting for pool ranks to no longer be suspected") + self.pool.wait_pool_suspect_ranks([], timeout=30) + + self.log_step("Waiting for rebuild to complete") + self.pool.wait_for_rebuild_to_end() + + self.log_step(f"Reintegrating rank {exclude_rank}") + cmd_succeed = False + for _ in range(3): + try: + self.pool.reintegrate(exclude_rank) + cmd_succeed = True + break + except CommandFailure: + self.log.debug("dmg command failed retry") + time.sleep(3) + + self.assertTrue(cmd_succeed, "pool reintegrate failed") + self.log_step(f"Waiting for rebuild to complete after reintegrating rank {exclude_rank}") + self.pool.wait_for_rebuild_to_start() + self.pool.wait_for_rebuild_to_end() + + enabled_ranks = sorted(enabled_ranks + [exclude_rank]) + disabled_ranks.remove(exclude_rank) + + self.log_step("Checking enabled ranks state information") + data = self.dmg.pool_query(self.pool.identifier, show_enabled=True) + self._verify_ranks([enabled_ranks], data, "enabled_ranks") + + def _verify_ranks(self, expect, data, key): + """Verify the expected and actual rank lists are equal. + + Args: + expect (list): list of ranks to expect + data (dict): dmg json response containing actual list of ranks + key (str): the dmg json response key used to access the actual list of ranks + """ + actual = data["response"].get(key) + if expect is None: + self.assertIsNone(actual, f"Invalid {key} field: want=None, got={actual}") + else: + self.assertListEqual( + actual, expect, f"Invalid {key} field: want={expect}, got={actual}") + self.log.debug("Check of %s passed: %s == %s", key, expect, actual) diff --git a/src/tests/ftest/control/dmg_pool_query_ranks.yaml b/src/tests/ftest/control/dmg_pool_query_ranks.yaml index 78edac3bd2ce..6e4f7eae7ea7 100644 --- a/src/tests/ftest/control/dmg_pool_query_ranks.yaml +++ b/src/tests/ftest/control/dmg_pool_query_ranks.yaml @@ -1,16 +1,21 @@ hosts: - test_servers: 3 + test_servers: 5 timeouts: test_pool_query_ranks_basic: 120 test_pool_query_ranks_error: 120 test_pool_query_ranks_mgmt: 480 server_config: name: daos_server + crt_timeout: 5 engines_per_host: 1 engines: 0: targets: 4 nr_xs_helpers: 0 + env_vars: + - SWIM_SUSPECT_TIMEOUT=10000 + - DAOS_POOL_RF=1 + - DD_MASK=io,epc,rebuild storage: 0: class: ram @@ -19,3 +24,5 @@ server_config: pool: control_method: dmg size: 4GB + svcn: 5 + properties: rd_fac:1 diff --git a/src/tests/ftest/util/dmg_utils.py b/src/tests/ftest/util/dmg_utils.py index effc3172bac9..a34f5d74b48f 100644 --- a/src/tests/ftest/util/dmg_utils.py +++ b/src/tests/ftest/util/dmg_utils.py @@ -625,13 +625,14 @@ def pool_create(self, scm_size, uid=None, gid=None, nvme_size=None, return data - def pool_query(self, pool, show_enabled=False, show_disabled=False): + def pool_query(self, pool, show_enabled=False, show_disabled=False, health_only=False): """Query a pool with the dmg command. Args: pool (str): Pool UUID or label to query. show_enabled (bool, optional): Display enabled ranks. show_disabled (bool, optional): Display disabled ranks. + health_only (bool, optional): Only perform pool health related queries. Raises: CommandFailure: if the dmg pool query command fails. @@ -678,7 +679,8 @@ def pool_query(self, pool, show_enabled=False, show_disabled=False): # "status": 0 # } return self._get_json_result(("pool", "query"), pool=pool, - show_enabled=show_enabled, show_disabled=show_disabled) + show_enabled=show_enabled, show_disabled=show_disabled, + health_only=health_only) def pool_query_targets(self, pool, rank=None, target_idx=None): """Call dmg pool query-targets. diff --git a/src/tests/ftest/util/dmg_utils_base.py b/src/tests/ftest/util/dmg_utils_base.py index 39109320af11..8691f8736314 100644 --- a/src/tests/ftest/util/dmg_utils_base.py +++ b/src/tests/ftest/util/dmg_utils_base.py @@ -534,6 +534,7 @@ def __init__(self): self.pool = BasicParameter(None, position=1) self.show_enabled = FormattedParameter("--show-enabled", False) self.show_disabled = FormattedParameter("--show-disabled", False) + self.health_only = FormattedParameter("--health-only", False) class QueryTargetsSubCommand(CommandWithParameters): """Defines an object for the dmg pool query-targets command.""" diff --git a/src/tests/ftest/util/server_utils_params.py b/src/tests/ftest/util/server_utils_params.py index 7cda958d2423..46db48912200 100644 --- a/src/tests/ftest/util/server_utils_params.py +++ b/src/tests/ftest/util/server_utils_params.py @@ -539,9 +539,11 @@ def get_params(self, test): # Update the env vars with any missing or different required setting update = False - env_var_dict = {env.split("=")[0]: env.split("=")[1] for env in self.env_vars.value} + env_var_dict = { + env.split("=", maxsplit=1)[0]: env.split("=", maxsplit=1)[1] + for env in self.env_vars.value} for key in sorted(required_env_vars): - if key not in env_var_dict or env_var_dict[key] != required_env_vars[key]: + if key not in env_var_dict: env_var_dict[key] = required_env_vars[key] update = True if update: diff --git a/src/tests/ftest/util/test_utils_pool.py b/src/tests/ftest/util/test_utils_pool.py index fbb6484e2925..7ca6912396f6 100644 --- a/src/tests/ftest/util/test_utils_pool.py +++ b/src/tests/ftest/util/test_utils_pool.py @@ -1450,6 +1450,33 @@ def check_pool_files(self, hosts, uuid, scm_mount): status = False return status + def wait_pool_suspect_ranks(self, expected, interval=1, timeout=30): + """Wait for the pool suspect ranks. + + Args: + expected (list): suspect ranks check to wait. + interval (int, optional): number of seconds to wait in between pool query checks + timeout(int, optional): time to fail test if it could not match + expected values. + + Raises: + DaosTestError: if waiting for timeout. + + """ + self.log.info("waiting for pool ranks %s to be suspected", expected) + + start = time() + data = self.dmg.pool_query(self.identifier, health_only=True) + while data['response'].get('suspect_ranks') != expected: + self.log.info(" suspect ranks is %s ...", data['response'].get('suspect_ranks')) + if time() - start > timeout: + raise DaosTestError("TIMEOUT detected after {} seconds while for waiting " + "for ranks {} suspect".format(timeout, expected)) + sleep(interval) + data = self.dmg.pool_query(self.identifier, health_only=True) + + self.log.info("Wait for suspect ranks complete: suspect ranks %s", expected) + def verify_uuid_directory(self, host, scm_mount): """Check if pool folder exist on server.