Skip to content

Commit

Permalink
DAOS-9799 chk: Add fault injection capability for MS checker
Browse files Browse the repository at this point in the history
Add a new `dmg faults add-checker-report` command to allow
manual injection of checker reports for prototyping and testing.

Signed-off-by: Michael MacDonald <[email protected]>
  • Loading branch information
mjmac committed Apr 19, 2022
1 parent 8384076 commit 65b0a7b
Show file tree
Hide file tree
Showing 11 changed files with 497 additions and 206 deletions.
2 changes: 2 additions & 0 deletions src/control/SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ def get_build_tags(benv):
tags.append("firmware")
if is_ucx_build(benv):
tags.append("ucx")
if not is_release_build(benv):
tags.append("fault_injection")
if len(tags) == 0:
return ""
return "-tags {}".format(','.join(tags))
Expand Down
152 changes: 152 additions & 0 deletions src/control/cmd/dmg/fi.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
//
// (C) Copyright 2019-2022 Intel Corporation.
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//
//go:build fault_injection
// +build fault_injection

package main

import (
"context"
"encoding/json"
"io/ioutil"
"math/rand"
"strconv"
"strings"
"time"

"github.com/google/uuid"
"github.com/jessevdk/go-flags"
"github.com/pkg/errors"
"google.golang.org/grpc"
"google.golang.org/protobuf/proto"

"github.com/daos-stack/daos/src/control/common/cmdutil"
chkpb "github.com/daos-stack/daos/src/control/common/proto/chk"
mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt"
"github.com/daos-stack/daos/src/control/lib/control"
"github.com/daos-stack/daos/src/control/system/checker"
)

type faultsCmdRoot struct {
Faults faultCmd `command:"faults" description:"Inject system fault"`
}

type faultCmd struct {
AddCheckerReport addCheckerReportCmd `command:"add-checker-report" description:"Add a system checker report"`
}

type chkRptCls chkpb.CheckInconsistClass

func (c chkRptCls) String() string {
return chkpb.CheckInconsistClass_name[int32(c)]
}

func (c chkRptCls) Complete(match string) (comps []flags.Completion) {
for _, v := range chkpb.CheckInconsistClass_name {
if strings.HasPrefix(v, match) {
comps = append(comps, flags.Completion{Item: v})
}
}
return
}

func (c *chkRptCls) UnmarshalFlag(value string) error {
for i, v := range chkpb.CheckInconsistClass_name {
if v == value {
*c = chkRptCls(i)
return nil
}
}

if v, err := strconv.Atoi(value); err == nil {
if _, found := chkpb.CheckInconsistClass_name[int32(v)]; found {
*c = chkRptCls(v)
return nil
}
}
return errors.Errorf("invalid class %s", value)
}

type addCheckerReportCmd struct {
cmdutil.LogCmd
cfgCmd
ctlInvokerCmd
jsonOutputCmd

File string `short:"f" long:"file" description:"File containing checker report in JSON format"`
Class chkRptCls `short:"c" long:"class" description:"Checker report class (canned reports)"`
}

func (cmd *addCheckerReportCmd) Execute(_ []string) (errOut error) {
defer func() {
errOut = errors.Wrap(errOut, "add checker finding")
}()

var rpt *chkpb.CheckReport
if cmd.File != "" {
buf, err := ioutil.ReadFile(cmd.File)
if err != nil {
return errors.Wrapf(err, "failed to open file %s", cmd.File)
}
rpt = new(chkpb.CheckReport)
if err := json.Unmarshal(buf, rpt); err != nil {
return errors.Wrapf(err, "failed to parse file %s", cmd.File)
}
} else {
rand.Seed(time.Now().UnixNano())

cls := chkpb.CheckInconsistClass(cmd.Class)
// Define some canned reports based on class. These can be used
// for prototyping and testing. For more control, define a report
// in JSON format and load it with the --file option.
switch cls {
case chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL:
rpt = &chkpb.CheckReport{
Seq: rand.Uint64(),
Class: cls,
Action: chkpb.CheckInconsistAction_CIA_INTERACT,
PoolUuid: uuid.New().String(),
Actions: []chkpb.CheckInconsistAction{
chkpb.CheckInconsistAction_CIA_TRUST_MS,
chkpb.CheckInconsistAction_CIA_TRUST_PS,
chkpb.CheckInconsistAction_CIA_IGNORE,
},
Details: []string{"ms-label", "ps-label"},
}
default:
return errors.Errorf("no canned report for class: %s", cls)
}

// For canned reports, annotate the report for nice messages.
// For reports loaded from file, don't annotate them, just use them as-is.
f := checker.AnnotateFinding(checker.NewFinding(rpt))
rpt = &f.CheckReport
}

if rpt.Class == chkpb.CheckInconsistClass_CIC_NONE {
return errors.New("class must be set")
}

ctx := context.Background()
resp, err := control.InvokeFaultRPC(ctx, cmd.ctlInvoker,
func(ctx context.Context, conn *grpc.ClientConn) (proto.Message, error) {
cmd.Debugf("injecting checker report: %+v", rpt)
return mgmtpb.NewMgmtSvcClient(conn).FaultInjectReport(ctx, rpt)
},
)

if cmd.jsonOutputEnabled() {
return cmd.outputJSON(resp, err)
}

if err != nil {
return err
}

cmd.Info("Checker report added")

return nil
}
11 changes: 11 additions & 0 deletions src/control/cmd/dmg/fi_disabled.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
//
// (C) Copyright 2019-2022 Intel Corporation.
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//
//go:build !fault_injection
// +build !fault_injection

package main

type faultsCmdRoot struct{}
35 changes: 18 additions & 17 deletions src/control/cmd/dmg/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,23 +148,24 @@ func (c *cfgCmd) setConfig(cfg *control.Config) {
}

type cliOptions struct {
AllowProxy bool `long:"allow-proxy" description:"Allow proxy configuration via environment"`
HostList string `short:"l" long:"host-list" description:"A comma separated list of addresses <ipv4addr/hostname> to connect to"`
Insecure bool `short:"i" long:"insecure" description:"Have dmg attempt to connect without certificates"`
Debug bool `short:"d" long:"debug" description:"Enable debug output"`
JSON bool `short:"j" long:"json" description:"Enable JSON output"`
JSONLogs bool `short:"J" long:"json-logging" description:"Enable JSON-formatted log output"`
ConfigPath string `short:"o" long:"config-path" description:"Client config file path"`
Server serverCmd `command:"server" alias:"srv" description:"Perform tasks related to remote servers"`
Storage storageCmd `command:"storage" alias:"sto" description:"Perform tasks related to storage attached to remote servers"`
Config configCmd `command:"config" alias:"cfg" description:"Perform tasks related to configuration of hardware on remote servers"`
System SystemCmd `command:"system" alias:"sys" description:"Perform distributed tasks related to DAOS system"`
Network NetCmd `command:"network" alias:"net" description:"Perform tasks related to network devices attached to remote servers"`
Pool PoolCmd `command:"pool" description:"Perform tasks related to DAOS pools"`
Cont ContCmd `command:"container" alias:"cont" description:"Perform tasks related to DAOS containers"`
Version versionCmd `command:"version" description:"Print dmg version"`
Telemetry telemCmd `command:"telemetry" alias:"telem" description:"Perform telemetry operations"`
Check checkCmdRoot `command:"check" description:"Check system health"`
AllowProxy bool `long:"allow-proxy" description:"Allow proxy configuration via environment"`
HostList string `short:"l" long:"host-list" description:"A comma separated list of addresses <ipv4addr/hostname> to connect to"`
Insecure bool `short:"i" long:"insecure" description:"Have dmg attempt to connect without certificates"`
Debug bool `short:"d" long:"debug" description:"Enable debug output"`
JSON bool `short:"j" long:"json" description:"Enable JSON output"`
JSONLogs bool `short:"J" long:"json-logging" description:"Enable JSON-formatted log output"`
ConfigPath string `short:"o" long:"config-path" description:"Client config file path"`
Server serverCmd `command:"server" alias:"srv" description:"Perform tasks related to remote servers"`
Storage storageCmd `command:"storage" alias:"sto" description:"Perform tasks related to storage attached to remote servers"`
Config configCmd `command:"config" alias:"cfg" description:"Perform tasks related to configuration of hardware on remote servers"`
System SystemCmd `command:"system" alias:"sys" description:"Perform distributed tasks related to DAOS system"`
Network NetCmd `command:"network" alias:"net" description:"Perform tasks related to network devices attached to remote servers"`
Pool PoolCmd `command:"pool" description:"Perform tasks related to DAOS pools"`
Cont ContCmd `command:"container" alias:"cont" description:"Perform tasks related to DAOS containers"`
Version versionCmd `command:"version" description:"Print dmg version"`
Telemetry telemCmd `command:"telemetry" alias:"telem" description:"Perform telemetry operations"`
Check checkCmdRoot `command:"check" description:"Check system health"`
faultsCmdRoot
firmwareOption // build with tag "firmware" to enable
ManPage cmdutil.ManCmd `command:"manpage" hidden:"true"`
}
Expand Down
Loading

0 comments on commit 65b0a7b

Please sign in to comment.