Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-9799 chk: Add fault injection capability for MS checker #8685

Merged
merged 1 commit into from
Apr 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/control/SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ def get_build_tags(benv):
tags.append("firmware")
if is_ucx_build(benv):
tags.append("ucx")
if not is_release_build(benv):
tags.append("fault_injection")
if len(tags) == 0:
return ""
return "-tags {}".format(','.join(tags))
Expand Down
152 changes: 152 additions & 0 deletions src/control/cmd/dmg/fi.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
//
// (C) Copyright 2019-2022 Intel Corporation.
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//
//go:build fault_injection
// +build fault_injection

package main

import (
"context"
"encoding/json"
"io/ioutil"
"math/rand"
"strconv"
"strings"
"time"

"github.com/google/uuid"
"github.com/jessevdk/go-flags"
"github.com/pkg/errors"
"google.golang.org/grpc"
"google.golang.org/protobuf/proto"

"github.com/daos-stack/daos/src/control/common/cmdutil"
chkpb "github.com/daos-stack/daos/src/control/common/proto/chk"
mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt"
"github.com/daos-stack/daos/src/control/lib/control"
"github.com/daos-stack/daos/src/control/system/checker"
)

type faultsCmdRoot struct {
Faults faultCmd `command:"faults" description:"Inject system fault"`
}

type faultCmd struct {
AddCheckerReport addCheckerReportCmd `command:"add-checker-report" description:"Add a system checker report"`
}

type chkRptCls chkpb.CheckInconsistClass

func (c chkRptCls) String() string {
return chkpb.CheckInconsistClass_name[int32(c)]
}

func (c chkRptCls) Complete(match string) (comps []flags.Completion) {
for _, v := range chkpb.CheckInconsistClass_name {
if strings.HasPrefix(v, match) {
comps = append(comps, flags.Completion{Item: v})
}
}
return
}

func (c *chkRptCls) UnmarshalFlag(value string) error {
for i, v := range chkpb.CheckInconsistClass_name {
if v == value {
*c = chkRptCls(i)
return nil
}
}

if v, err := strconv.Atoi(value); err == nil {
if _, found := chkpb.CheckInconsistClass_name[int32(v)]; found {
*c = chkRptCls(v)
return nil
}
}
return errors.Errorf("invalid class %s", value)
}

type addCheckerReportCmd struct {
cmdutil.LogCmd
cfgCmd
ctlInvokerCmd
jsonOutputCmd

File string `short:"f" long:"file" description:"File containing checker report in JSON format"`
Class chkRptCls `short:"c" long:"class" description:"Checker report class (canned reports)"`
}

func (cmd *addCheckerReportCmd) Execute(_ []string) (errOut error) {
defer func() {
errOut = errors.Wrap(errOut, "add checker finding")
}()

var rpt *chkpb.CheckReport
if cmd.File != "" {
buf, err := ioutil.ReadFile(cmd.File)
if err != nil {
return errors.Wrapf(err, "failed to open file %s", cmd.File)
}
rpt = new(chkpb.CheckReport)
if err := json.Unmarshal(buf, rpt); err != nil {
return errors.Wrapf(err, "failed to parse file %s", cmd.File)
}
} else {
rand.Seed(time.Now().UnixNano())

cls := chkpb.CheckInconsistClass(cmd.Class)
// Define some canned reports based on class. These can be used
// for prototyping and testing. For more control, define a report
// in JSON format and load it with the --file option.
switch cls {
case chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL:
rpt = &chkpb.CheckReport{
Seq: rand.Uint64(),
Class: cls,
Action: chkpb.CheckInconsistAction_CIA_INTERACT,
PoolUuid: uuid.New().String(),
ActChoices: []chkpb.CheckInconsistAction{
chkpb.CheckInconsistAction_CIA_TRUST_MS,
chkpb.CheckInconsistAction_CIA_TRUST_PS,
chkpb.CheckInconsistAction_CIA_IGNORE,
},
ActDetails: []string{"ms-label", "ps-label"},
}
default:
return errors.Errorf("no canned report for class: %s", cls)
}

// For canned reports, annotate the report for nice messages.
// For reports loaded from file, don't annotate them, just use them as-is.
f := checker.AnnotateFinding(checker.NewFinding(rpt))
rpt = &f.CheckReport
}

if rpt.Class == chkpb.CheckInconsistClass_CIC_NONE {
return errors.New("class must be set")
}

ctx := context.Background()
resp, err := control.InvokeFaultRPC(ctx, cmd.ctlInvoker,
func(ctx context.Context, conn *grpc.ClientConn) (proto.Message, error) {
cmd.Debugf("injecting checker report: %+v", rpt)
return mgmtpb.NewMgmtSvcClient(conn).FaultInjectReport(ctx, rpt)
},
)

if cmd.jsonOutputEnabled() {
return cmd.outputJSON(resp, err)
}

if err != nil {
return err
}

cmd.Info("Checker report added")

return nil
}
11 changes: 11 additions & 0 deletions src/control/cmd/dmg/fi_disabled.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
//
// (C) Copyright 2019-2022 Intel Corporation.
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//
//go:build !fault_injection
// +build !fault_injection

package main

type faultsCmdRoot struct{}
35 changes: 18 additions & 17 deletions src/control/cmd/dmg/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,23 +148,24 @@ func (c *cfgCmd) setConfig(cfg *control.Config) {
}

type cliOptions struct {
AllowProxy bool `long:"allow-proxy" description:"Allow proxy configuration via environment"`
HostList string `short:"l" long:"host-list" description:"A comma separated list of addresses <ipv4addr/hostname> to connect to"`
Insecure bool `short:"i" long:"insecure" description:"Have dmg attempt to connect without certificates"`
Debug bool `short:"d" long:"debug" description:"Enable debug output"`
JSON bool `short:"j" long:"json" description:"Enable JSON output"`
JSONLogs bool `short:"J" long:"json-logging" description:"Enable JSON-formatted log output"`
ConfigPath string `short:"o" long:"config-path" description:"Client config file path"`
Server serverCmd `command:"server" alias:"srv" description:"Perform tasks related to remote servers"`
Storage storageCmd `command:"storage" alias:"sto" description:"Perform tasks related to storage attached to remote servers"`
Config configCmd `command:"config" alias:"cfg" description:"Perform tasks related to configuration of hardware on remote servers"`
System SystemCmd `command:"system" alias:"sys" description:"Perform distributed tasks related to DAOS system"`
Network NetCmd `command:"network" alias:"net" description:"Perform tasks related to network devices attached to remote servers"`
Pool PoolCmd `command:"pool" description:"Perform tasks related to DAOS pools"`
Cont ContCmd `command:"container" alias:"cont" description:"Perform tasks related to DAOS containers"`
Version versionCmd `command:"version" description:"Print dmg version"`
Telemetry telemCmd `command:"telemetry" alias:"telem" description:"Perform telemetry operations"`
Check checkCmdRoot `command:"check" description:"Check system health"`
AllowProxy bool `long:"allow-proxy" description:"Allow proxy configuration via environment"`
HostList string `short:"l" long:"host-list" description:"A comma separated list of addresses <ipv4addr/hostname> to connect to"`
Insecure bool `short:"i" long:"insecure" description:"Have dmg attempt to connect without certificates"`
Debug bool `short:"d" long:"debug" description:"Enable debug output"`
JSON bool `short:"j" long:"json" description:"Enable JSON output"`
JSONLogs bool `short:"J" long:"json-logging" description:"Enable JSON-formatted log output"`
ConfigPath string `short:"o" long:"config-path" description:"Client config file path"`
Server serverCmd `command:"server" alias:"srv" description:"Perform tasks related to remote servers"`
Storage storageCmd `command:"storage" alias:"sto" description:"Perform tasks related to storage attached to remote servers"`
Config configCmd `command:"config" alias:"cfg" description:"Perform tasks related to configuration of hardware on remote servers"`
System SystemCmd `command:"system" alias:"sys" description:"Perform distributed tasks related to DAOS system"`
Network NetCmd `command:"network" alias:"net" description:"Perform tasks related to network devices attached to remote servers"`
Pool PoolCmd `command:"pool" description:"Perform tasks related to DAOS pools"`
Cont ContCmd `command:"container" alias:"cont" description:"Perform tasks related to DAOS containers"`
Version versionCmd `command:"version" description:"Print dmg version"`
Telemetry telemCmd `command:"telemetry" alias:"telem" description:"Perform telemetry operations"`
Check checkCmdRoot `command:"check" description:"Check system health"`
faultsCmdRoot
firmwareOption // build with tag "firmware" to enable
ManPage cmdutil.ManCmd `command:"manpage" hidden:"true"`
}
Expand Down
Loading