Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-12189 chk: Include removed pools in query count #11746

Merged
merged 1 commit into from
Mar 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 25 additions & 9 deletions src/control/cmd/dmg/pretty/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ package pretty
import (
"fmt"
"io"
"sort"

"github.com/dustin/go-humanize/english"

"github.com/daos-stack/daos/src/control/common"
"github.com/daos-stack/daos/src/control/lib/control"
"github.com/daos-stack/daos/src/control/lib/txtfmt"
)
Expand Down Expand Up @@ -51,16 +53,25 @@ func countResultPools(resp *control.SystemCheckQueryResp) int {
}
poolMap[pool.UUID] = struct{}{}
}
for _, report := range resp.Reports {
if report.IsRemovedPool() && report.PoolUuid != "" {
poolMap[report.PoolUuid] = struct{}{}
}
}

return len(poolMap)
}

func PrintCheckQueryResp(out io.Writer, resp *control.SystemCheckQueryResp, verbose bool) {
fmt.Fprintln(out, "DAOS System Checker Info")
if resp == nil {
fmt.Fprintln(out, " No results found.")
return
}

statusMsg := fmt.Sprintf("Current status: %s", resp.Status)
if resp.Status > control.SystemCheckStatusInit && resp.Status < control.SystemCheckStatusCompleted {
statusMsg += fmt.Sprintf(" (started at: %s)", resp.StartTime)
statusMsg += fmt.Sprintf(" (started at: %s)", common.FormatTime(resp.StartTime))
}
fmt.Fprintf(out, " %s\n", statusMsg)
fmt.Fprintf(out, " Current phase: %s (%s)\n", resp.ScanPhase, resp.ScanPhase.Description())
Expand All @@ -77,18 +88,23 @@ func PrintCheckQueryResp(out io.Writer, resp *control.SystemCheckQueryResp, verb
fmt.Fprintf(out, " %s %s\n", action, english.Plural(poolCount, "pool", ""))
}

if len(resp.Pools) > 0 {
if verbose {
fmt.Fprintln(out, "\nPer-Pool Checker Info:")
for _, pool := range resp.Pools {
fmt.Fprintf(out, " %+v\n", pool)
}
if len(resp.Pools) > 0 && verbose {
pools := make([]*control.SystemCheckPoolInfo, 0, len(resp.Pools))
for _, pool := range resp.Pools {
pools = append(pools, pool)
}
sort.Slice(pools, func(i, j int) bool {
return pools[i].UUID < pools[j].UUID
})
fmt.Fprintln(out, "\nPer-Pool Checker Info:")
for _, pool := range pools {
fmt.Fprintf(out, " %+v\n", pool)
}
fmt.Fprintln(out)
}

fmt.Fprintln(out)
if len(resp.Reports) == 0 {
fmt.Fprintln(out, "No reports to display")
fmt.Fprintln(out, "No reports to display.")
return
}

Expand Down
191 changes: 191 additions & 0 deletions src/control/cmd/dmg/pretty/check_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
//
// (C) Copyright 2023 Intel Corporation.
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//

package pretty_test

import (
"bytes"
"strings"
"testing"
"time"

"github.com/google/go-cmp/cmp"

"github.com/daos-stack/daos/src/control/cmd/dmg/pretty"
chkpb "github.com/daos-stack/daos/src/control/common/proto/chk"
"github.com/daos-stack/daos/src/control/lib/control"
)

func TestPretty_PrintCheckQueryResp(t *testing.T) {
checkTime, err := time.Parse(time.RFC822Z, "20 Mar 23 10:07 -0500")
if err != nil {
t.Fatal(err)
}

for name, tc := range map[string]struct {
resp *control.SystemCheckQueryResp
verbose bool
expOut string
}{
"empty": {
expOut: `
DAOS System Checker Info
No results found.
`,
},
"(verbose) 2 pools being checked": {
resp: &control.SystemCheckQueryResp{
Status: control.SystemCheckStatusRunning,
ScanPhase: control.SystemCheckScanPhaseContainerList,
StartTime: checkTime,
Pools: map[string]*control.SystemCheckPoolInfo{
"pool-1": {
UUID: "pool-1",
Status: chkpb.CheckPoolStatus_CPS_CHECKING.String(),
Phase: chkpb.CheckScanPhase_CSP_PREPARE.String(),
StartTime: checkTime,
},
"pool-2": {
UUID: "pool-2",
Status: chkpb.CheckPoolStatus_CPS_CHECKING.String(),
Phase: chkpb.CheckScanPhase_CSP_PREPARE.String(),
StartTime: checkTime,
},
},
},
verbose: true,
expOut: `
DAOS System Checker Info
Current status: RUNNING (started at: 2023-03-20T10:07:00.000-05:00)
Current phase: CONT_LIST (Comparing container list on PS and storage nodes)
Checking 2 pools

Per-Pool Checker Info:
Pool pool-1: 0 ranks, status: CPS_CHECKING, phase: CSP_PREPARE, started: 2023-03-20T10:07:00.000-05:00
Pool pool-2: 0 ranks, status: CPS_CHECKING, phase: CSP_PREPARE, started: 2023-03-20T10:07:00.000-05:00

No reports to display.
`,
},
"(verbose) 3 pools repaired; 1 unchecked; 1 removed": {
resp: &control.SystemCheckQueryResp{
Status: control.SystemCheckStatusCompleted,
ScanPhase: control.SystemCheckScanPhaseDone,
Pools: map[string]*control.SystemCheckPoolInfo{
"pool-1": {
UUID: "pool-1",
Status: chkpb.CheckPoolStatus_CPS_CHECKED.String(),
Phase: chkpb.CheckScanPhase_CSP_DONE.String(),
StartTime: checkTime,
},
"pool-2": {
UUID: "pool-2",
Status: chkpb.CheckPoolStatus_CPS_CHECKED.String(),
Phase: chkpb.CheckScanPhase_CSP_DONE.String(),
StartTime: checkTime,
},
"pool-3": {
UUID: "pool-3",
Status: chkpb.CheckPoolStatus_CPS_CHECKED.String(),
Phase: chkpb.CheckScanPhase_CSP_DONE.String(),
StartTime: checkTime,
},
"pool-5": {
UUID: "pool-5",
Status: chkpb.CheckPoolStatus_CPS_UNCHECKED.String(),
Phase: chkpb.CheckScanPhase_CSP_PREPARE.String(),
},
},
Reports: []*control.SystemCheckReport{
{
CheckReport: chkpb.CheckReport{
Seq: 1,
Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_SVCL,
Action: chkpb.CheckInconsistAction_CIA_IGNORE,
Msg: "message 1",
PoolUuid: "pool-1",
},
},
{
CheckReport: chkpb.CheckReport{
Seq: 2,
Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL,
Action: chkpb.CheckInconsistAction_CIA_TRUST_MS,
Msg: "message 2",
PoolUuid: "pool-2",
},
},
{
CheckReport: chkpb.CheckReport{
Seq: 3,
Class: chkpb.CheckInconsistClass_CIC_POOL_LESS_SVC_WITHOUT_QUORUM,
Action: chkpb.CheckInconsistAction_CIA_TRUST_PS,
Msg: "message 3",
PoolUuid: "pool-3",
},
},
{
CheckReport: chkpb.CheckReport{
Seq: 4,
Class: chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_ENGINE,
Action: chkpb.CheckInconsistAction_CIA_DISCARD,
Msg: "message 4",
PoolUuid: "pool-4",
},
},
},
},
verbose: true,
expOut: `
DAOS System Checker Info
Current status: COMPLETED
Current phase: DSP_DONE (Check completed)
Checked 4 pools

Per-Pool Checker Info:
Pool pool-1: 0 ranks, status: CPS_CHECKED, phase: DSP_DONE, started: 2023-03-20T10:07:00.000-05:00
Pool pool-2: 0 ranks, status: CPS_CHECKED, phase: DSP_DONE, started: 2023-03-20T10:07:00.000-05:00
Pool pool-3: 0 ranks, status: CPS_CHECKED, phase: DSP_DONE, started: 2023-03-20T10:07:00.000-05:00
Pool pool-5: 0 ranks, status: CPS_UNCHECKED, phase: CSP_PREPARE

Inconsistency Reports:
ID: 0x1
Class: POOL_BAD_SVCL
Message: message 1
Pool: pool-1
Resolution: IGNORE

ID: 0x2
Class: POOL_BAD_LABEL
Message: message 2
Pool: pool-2
Resolution: TRUST_MS

ID: 0x3
Class: POOL_LESS_SVC_WITHOUT_QUORUM
Message: message 3
Pool: pool-3
Resolution: TRUST_PS

ID: 0x4
Class: POOL_NONEXIST_ON_ENGINE
Message: message 4
Pool: pool-4
Resolution: DISCARD

`,
},
} {
t.Run(name, func(t *testing.T) {
var buf bytes.Buffer
pretty.PrintCheckQueryResp(&buf, tc.resp, tc.verbose)
got := buf.String()
if diff := cmp.Diff(strings.TrimLeft(tc.expOut, "\n"), got); diff != "" {
t.Fatalf("unexpected output (-want, +got):\n%s", diff)
}
})
}
}
50 changes: 45 additions & 5 deletions src/control/lib/control/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package control

import (
"context"
"encoding/json"
"fmt"
"sort"
"strings"
Expand All @@ -17,6 +18,7 @@ import (
"google.golang.org/grpc"
"google.golang.org/protobuf/proto"

"github.com/daos-stack/daos/src/control/common"
pbutil "github.com/daos-stack/daos/src/control/common/proto"
chkpb "github.com/daos-stack/daos/src/control/common/proto/chk"
mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt"
Expand Down Expand Up @@ -422,6 +424,12 @@ func (r *SystemCheckReport) IsInteractive() bool {
return r.Action == chkpb.CheckInconsistAction_CIA_INTERACT
}

func (r *SystemCheckReport) IsRemovedPool() bool {
return r.Action == chkpb.CheckInconsistAction_CIA_DISCARD &&
(r.Class == chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_ENGINE ||
r.Class == chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_MS)
}

func (r *SystemCheckReport) Resolution() string {
msg := SystemCheckRepairAction(r.Action).String()
if len(r.ActMsgs) == 1 {
Expand All @@ -438,9 +446,26 @@ type SystemCheckPoolInfo struct {
Label string `json:"label"`
Status string `json:"status"`
Phase string `json:"phase"`
StartTime time.Time `json:"start_time"`
Remaining time.Duration `json:"remaining"`
Elapsed time.Duration `json:"elapsed"`
StartTime time.Time `json:"-"`
Remaining time.Duration `json:"-"`
Elapsed time.Duration `json:"-"`
}

func (p *SystemCheckPoolInfo) MarshalJSON() ([]byte, error) {
type toJSON SystemCheckPoolInfo
return json.Marshal(&struct {
*toJSON
RankCount int `json:"rank_count"`
StartTime string `json:"start_time"`
Remaining float64 `json:"remaining"`
Elapsed float64 `json:"elapsed"`
}{
toJSON: (*toJSON)(p),
RankCount: len(p.RawRankInfo),
StartTime: common.FormatTime(p.StartTime),
Remaining: p.Remaining.Seconds(),
Elapsed: p.Elapsed.Seconds(),
})
}

func (p *SystemCheckPoolInfo) String() string {
Expand All @@ -450,8 +475,12 @@ func (p *SystemCheckPoolInfo) String() string {
} else if p.Remaining > 0 {
remOrElapsed = fmt.Sprintf(" remaining: %s", p.Remaining)
}
return fmt.Sprintf("Pool %s: %d ranks, status: %s, phase: %s, started: %s%s",
p.UUID, len(p.RawRankInfo), p.Status, p.Phase, p.StartTime, remOrElapsed)
timeStr := ""
if !p.StartTime.IsZero() {
timeStr = fmt.Sprintf(", started: %s%s", common.FormatTime(p.StartTime), remOrElapsed)
}
return fmt.Sprintf("Pool %s: %d ranks, status: %s, phase: %s%s",
p.UUID, len(p.RawRankInfo), p.Status, p.Phase, timeStr)
}

func (p *SystemCheckPoolInfo) Unchecked() bool {
Expand Down Expand Up @@ -507,6 +536,17 @@ type SystemCheckQueryResp struct {
Reports []*SystemCheckReport `json:"reports"`
}

func (r *SystemCheckQueryResp) MarshalJSON() ([]byte, error) {
type toJSON SystemCheckQueryResp
return json.Marshal(struct {
StartTime string `json:"start_time"`
*toJSON
}{
StartTime: common.FormatTime(r.StartTime),
toJSON: (*toJSON)(r),
})
}

// SystemCheckQuery queries the system checker status.
func SystemCheckQuery(ctx context.Context, rpcClient UnaryInvoker, req *SystemCheckQueryReq) (*SystemCheckQueryResp, error) {
if req == nil {
Expand Down