Skip to content

Commit

Permalink
DAOS-12189 chk: Include removed pools in query count (#11746)
Browse files Browse the repository at this point in the history
When a pool has been removed, it will no longer appear
in the pools list for the response. We can still detect
that a pool was removed by checking the reports list,
however. If we find a removed pool, include it in the count
of checked pools when displaying the query results.

Includes misc. improvements to JSON output as well.

Signed-off-by: Michael MacDonald <[email protected]>
  • Loading branch information
mjmac authored Mar 21, 2023
1 parent a3b5964 commit 7c5e443
Show file tree
Hide file tree
Showing 3 changed files with 261 additions and 14 deletions.
34 changes: 25 additions & 9 deletions src/control/cmd/dmg/pretty/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ package pretty
import (
"fmt"
"io"
"sort"

"github.com/dustin/go-humanize/english"

"github.com/daos-stack/daos/src/control/common"
"github.com/daos-stack/daos/src/control/lib/control"
"github.com/daos-stack/daos/src/control/lib/txtfmt"
)
Expand Down Expand Up @@ -51,16 +53,25 @@ func countResultPools(resp *control.SystemCheckQueryResp) int {
}
poolMap[pool.UUID] = struct{}{}
}
for _, report := range resp.Reports {
if report.IsRemovedPool() && report.PoolUuid != "" {
poolMap[report.PoolUuid] = struct{}{}
}
}

return len(poolMap)
}

func PrintCheckQueryResp(out io.Writer, resp *control.SystemCheckQueryResp, verbose bool) {
fmt.Fprintln(out, "DAOS System Checker Info")
if resp == nil {
fmt.Fprintln(out, " No results found.")
return
}

statusMsg := fmt.Sprintf("Current status: %s", resp.Status)
if resp.Status > control.SystemCheckStatusInit && resp.Status < control.SystemCheckStatusCompleted {
statusMsg += fmt.Sprintf(" (started at: %s)", resp.StartTime)
statusMsg += fmt.Sprintf(" (started at: %s)", common.FormatTime(resp.StartTime))
}
fmt.Fprintf(out, " %s\n", statusMsg)
fmt.Fprintf(out, " Current phase: %s (%s)\n", resp.ScanPhase, resp.ScanPhase.Description())
Expand All @@ -77,18 +88,23 @@ func PrintCheckQueryResp(out io.Writer, resp *control.SystemCheckQueryResp, verb
fmt.Fprintf(out, " %s %s\n", action, english.Plural(poolCount, "pool", ""))
}

if len(resp.Pools) > 0 {
if verbose {
fmt.Fprintln(out, "\nPer-Pool Checker Info:")
for _, pool := range resp.Pools {
fmt.Fprintf(out, " %+v\n", pool)
}
if len(resp.Pools) > 0 && verbose {
pools := make([]*control.SystemCheckPoolInfo, 0, len(resp.Pools))
for _, pool := range resp.Pools {
pools = append(pools, pool)
}
sort.Slice(pools, func(i, j int) bool {
return pools[i].UUID < pools[j].UUID
})
fmt.Fprintln(out, "\nPer-Pool Checker Info:")
for _, pool := range pools {
fmt.Fprintf(out, " %+v\n", pool)
}
fmt.Fprintln(out)
}

fmt.Fprintln(out)
if len(resp.Reports) == 0 {
fmt.Fprintln(out, "No reports to display")
fmt.Fprintln(out, "No reports to display.")
return
}

Expand Down
191 changes: 191 additions & 0 deletions src/control/cmd/dmg/pretty/check_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
//
// (C) Copyright 2023 Intel Corporation.
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//

package pretty_test

import (
"bytes"
"strings"
"testing"
"time"

"github.com/google/go-cmp/cmp"

"github.com/daos-stack/daos/src/control/cmd/dmg/pretty"
chkpb "github.com/daos-stack/daos/src/control/common/proto/chk"
"github.com/daos-stack/daos/src/control/lib/control"
)

func TestPretty_PrintCheckQueryResp(t *testing.T) {
checkTime, err := time.Parse(time.RFC822Z, "20 Mar 23 10:07 -0500")
if err != nil {
t.Fatal(err)
}

for name, tc := range map[string]struct {
resp *control.SystemCheckQueryResp
verbose bool
expOut string
}{
"empty": {
expOut: `
DAOS System Checker Info
No results found.
`,
},
"(verbose) 2 pools being checked": {
resp: &control.SystemCheckQueryResp{
Status: control.SystemCheckStatusRunning,
ScanPhase: control.SystemCheckScanPhaseContainerList,
StartTime: checkTime,
Pools: map[string]*control.SystemCheckPoolInfo{
"pool-1": {
UUID: "pool-1",
Status: chkpb.CheckPoolStatus_CPS_CHECKING.String(),
Phase: chkpb.CheckScanPhase_CSP_PREPARE.String(),
StartTime: checkTime,
},
"pool-2": {
UUID: "pool-2",
Status: chkpb.CheckPoolStatus_CPS_CHECKING.String(),
Phase: chkpb.CheckScanPhase_CSP_PREPARE.String(),
StartTime: checkTime,
},
},
},
verbose: true,
expOut: `
DAOS System Checker Info
Current status: RUNNING (started at: 2023-03-20T10:07:00.000-05:00)
Current phase: CONT_LIST (Comparing container list on PS and storage nodes)
Checking 2 pools
Per-Pool Checker Info:
Pool pool-1: 0 ranks, status: CPS_CHECKING, phase: CSP_PREPARE, started: 2023-03-20T10:07:00.000-05:00
Pool pool-2: 0 ranks, status: CPS_CHECKING, phase: CSP_PREPARE, started: 2023-03-20T10:07:00.000-05:00
No reports to display.
`,
},
"(verbose) 3 pools repaired; 1 unchecked; 1 removed": {
resp: &control.SystemCheckQueryResp{
Status: control.SystemCheckStatusCompleted,
ScanPhase: control.SystemCheckScanPhaseDone,
Pools: map[string]*control.SystemCheckPoolInfo{
"pool-1": {
UUID: "pool-1",
Status: chkpb.CheckPoolStatus_CPS_CHECKED.String(),
Phase: chkpb.CheckScanPhase_CSP_DONE.String(),
StartTime: checkTime,
},
"pool-2": {
UUID: "pool-2",
Status: chkpb.CheckPoolStatus_CPS_CHECKED.String(),
Phase: chkpb.CheckScanPhase_CSP_DONE.String(),
StartTime: checkTime,
},
"pool-3": {
UUID: "pool-3",
Status: chkpb.CheckPoolStatus_CPS_CHECKED.String(),
Phase: chkpb.CheckScanPhase_CSP_DONE.String(),
StartTime: checkTime,
},
"pool-5": {
UUID: "pool-5",
Status: chkpb.CheckPoolStatus_CPS_UNCHECKED.String(),
Phase: chkpb.CheckScanPhase_CSP_PREPARE.String(),
},
},
Reports: []*control.SystemCheckReport{
{
CheckReport: chkpb.CheckReport{
Seq: 1,
Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_SVCL,
Action: chkpb.CheckInconsistAction_CIA_IGNORE,
Msg: "message 1",
PoolUuid: "pool-1",
},
},
{
CheckReport: chkpb.CheckReport{
Seq: 2,
Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL,
Action: chkpb.CheckInconsistAction_CIA_TRUST_MS,
Msg: "message 2",
PoolUuid: "pool-2",
},
},
{
CheckReport: chkpb.CheckReport{
Seq: 3,
Class: chkpb.CheckInconsistClass_CIC_POOL_LESS_SVC_WITHOUT_QUORUM,
Action: chkpb.CheckInconsistAction_CIA_TRUST_PS,
Msg: "message 3",
PoolUuid: "pool-3",
},
},
{
CheckReport: chkpb.CheckReport{
Seq: 4,
Class: chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_ENGINE,
Action: chkpb.CheckInconsistAction_CIA_DISCARD,
Msg: "message 4",
PoolUuid: "pool-4",
},
},
},
},
verbose: true,
expOut: `
DAOS System Checker Info
Current status: COMPLETED
Current phase: DSP_DONE (Check completed)
Checked 4 pools
Per-Pool Checker Info:
Pool pool-1: 0 ranks, status: CPS_CHECKED, phase: DSP_DONE, started: 2023-03-20T10:07:00.000-05:00
Pool pool-2: 0 ranks, status: CPS_CHECKED, phase: DSP_DONE, started: 2023-03-20T10:07:00.000-05:00
Pool pool-3: 0 ranks, status: CPS_CHECKED, phase: DSP_DONE, started: 2023-03-20T10:07:00.000-05:00
Pool pool-5: 0 ranks, status: CPS_UNCHECKED, phase: CSP_PREPARE
Inconsistency Reports:
ID: 0x1
Class: POOL_BAD_SVCL
Message: message 1
Pool: pool-1
Resolution: IGNORE
ID: 0x2
Class: POOL_BAD_LABEL
Message: message 2
Pool: pool-2
Resolution: TRUST_MS
ID: 0x3
Class: POOL_LESS_SVC_WITHOUT_QUORUM
Message: message 3
Pool: pool-3
Resolution: TRUST_PS
ID: 0x4
Class: POOL_NONEXIST_ON_ENGINE
Message: message 4
Pool: pool-4
Resolution: DISCARD
`,
},
} {
t.Run(name, func(t *testing.T) {
var buf bytes.Buffer
pretty.PrintCheckQueryResp(&buf, tc.resp, tc.verbose)
got := buf.String()
if diff := cmp.Diff(strings.TrimLeft(tc.expOut, "\n"), got); diff != "" {
t.Fatalf("unexpected output (-want, +got):\n%s", diff)
}
})
}
}
50 changes: 45 additions & 5 deletions src/control/lib/control/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package control

import (
"context"
"encoding/json"
"fmt"
"sort"
"strings"
Expand All @@ -17,6 +18,7 @@ import (
"google.golang.org/grpc"
"google.golang.org/protobuf/proto"

"github.com/daos-stack/daos/src/control/common"
pbutil "github.com/daos-stack/daos/src/control/common/proto"
chkpb "github.com/daos-stack/daos/src/control/common/proto/chk"
mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt"
Expand Down Expand Up @@ -422,6 +424,12 @@ func (r *SystemCheckReport) IsInteractive() bool {
return r.Action == chkpb.CheckInconsistAction_CIA_INTERACT
}

func (r *SystemCheckReport) IsRemovedPool() bool {
return r.Action == chkpb.CheckInconsistAction_CIA_DISCARD &&
(r.Class == chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_ENGINE ||
r.Class == chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_MS)
}

func (r *SystemCheckReport) Resolution() string {
msg := SystemCheckRepairAction(r.Action).String()
if len(r.ActMsgs) == 1 {
Expand All @@ -438,9 +446,26 @@ type SystemCheckPoolInfo struct {
Label string `json:"label"`
Status string `json:"status"`
Phase string `json:"phase"`
StartTime time.Time `json:"start_time"`
Remaining time.Duration `json:"remaining"`
Elapsed time.Duration `json:"elapsed"`
StartTime time.Time `json:"-"`
Remaining time.Duration `json:"-"`
Elapsed time.Duration `json:"-"`
}

func (p *SystemCheckPoolInfo) MarshalJSON() ([]byte, error) {
type toJSON SystemCheckPoolInfo
return json.Marshal(&struct {
*toJSON
RankCount int `json:"rank_count"`
StartTime string `json:"start_time"`
Remaining float64 `json:"remaining"`
Elapsed float64 `json:"elapsed"`
}{
toJSON: (*toJSON)(p),
RankCount: len(p.RawRankInfo),
StartTime: common.FormatTime(p.StartTime),
Remaining: p.Remaining.Seconds(),
Elapsed: p.Elapsed.Seconds(),
})
}

func (p *SystemCheckPoolInfo) String() string {
Expand All @@ -450,8 +475,12 @@ func (p *SystemCheckPoolInfo) String() string {
} else if p.Remaining > 0 {
remOrElapsed = fmt.Sprintf(" remaining: %s", p.Remaining)
}
return fmt.Sprintf("Pool %s: %d ranks, status: %s, phase: %s, started: %s%s",
p.UUID, len(p.RawRankInfo), p.Status, p.Phase, p.StartTime, remOrElapsed)
timeStr := ""
if !p.StartTime.IsZero() {
timeStr = fmt.Sprintf(", started: %s%s", common.FormatTime(p.StartTime), remOrElapsed)
}
return fmt.Sprintf("Pool %s: %d ranks, status: %s, phase: %s%s",
p.UUID, len(p.RawRankInfo), p.Status, p.Phase, timeStr)
}

func (p *SystemCheckPoolInfo) Unchecked() bool {
Expand Down Expand Up @@ -507,6 +536,17 @@ type SystemCheckQueryResp struct {
Reports []*SystemCheckReport `json:"reports"`
}

func (r *SystemCheckQueryResp) MarshalJSON() ([]byte, error) {
type toJSON SystemCheckQueryResp
return json.Marshal(struct {
StartTime string `json:"start_time"`
*toJSON
}{
StartTime: common.FormatTime(r.StartTime),
toJSON: (*toJSON)(r),
})
}

// SystemCheckQuery queries the system checker status.
func SystemCheckQuery(ctx context.Context, rpcClient UnaryInvoker, req *SystemCheckQueryReq) (*SystemCheckQueryResp, error) {
if req == nil {
Expand Down

0 comments on commit 7c5e443

Please sign in to comment.