From f2ae965f839d1e9cac82a3c3e8bc1af9d2b73c4d Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Mon, 30 Oct 2023 18:05:16 +0000 Subject: [PATCH] DAOS-14181 control: Remove bdev scan cache Required-githooks: true Signed-off-by: Tom Nabarro --- src/control/drpc/modules.go | 2 + src/control/server/config/faults.go | 4 +- src/control/server/config/server.go | 6 +- src/control/server/config/server_test.go | 29 +- src/control/server/ctl_storage.go | 93 - src/control/server/ctl_storage_rpc.go | 175 +- src/control/server/ctl_storage_rpc_test.go | 1940 +++++-------------- src/control/server/ctl_svc_test.go | 60 +- src/control/server/harness.go | 2 - src/control/server/instance_drpc.go | 108 +- src/control/server/instance_storage.go | 36 +- src/control/server/instance_storage_test.go | 137 +- src/control/server/server.go | 13 - src/control/server/server_utils.go | 71 - src/control/server/server_utils_test.go | 177 -- src/control/server/storage/bdev.go | 5 +- src/control/server/storage/provider.go | 109 -- src/control/server/storage/provider_test.go | 598 +++--- src/control/server/util_test.go | 10 +- src/include/daos/drpc_modules.h | 73 +- 20 files changed, 1193 insertions(+), 2455 deletions(-) diff --git a/src/control/drpc/modules.go b/src/control/drpc/modules.go index 1a51bc2f67c3..e816dd654120 100644 --- a/src/control/drpc/modules.go +++ b/src/control/drpc/modules.go @@ -244,6 +244,8 @@ const ( MethodPoolUpgrade MgmtMethod = C.DRPC_METHOD_MGMT_POOL_UPGRADE // MethodLedManage defines a method to manage a VMD device LED state MethodLedManage MgmtMethod = C.DRPC_METHOD_MGMT_LED_MANAGE + // MethodNvmeDevs is a ModuleMgmt method + MethodNvmeDevs MgmtMethod = C.DRPC_METHOD_MGMT_NVME_LIST_DEVS ) type srvMethod int32 diff --git a/src/control/server/config/faults.go b/src/control/server/config/faults.go index 16e851ee1fe5..67148dbd0102 100644 --- a/src/control/server/config/faults.go +++ b/src/control/server/config/faults.go @@ -147,7 +147,7 @@ func FaultConfigDuplicateScmDeviceList(curIdx, seenIdx int) *fault.Fault { func FaultConfigScmDiffClass(curIdx, seenIdx int) *fault.Fault { return serverConfigFault( code.ServerConfigScmDiffClass, - fmt.Sprintf("the SCM class in I/O Engine %d is different from I/O Engine %d", + fmt.Sprintf("the SCM class in engine %d is different from engine %d", curIdx, seenIdx), "ensure that each I/O Engine has a single SCM tier with the same class and restart", ) @@ -156,7 +156,7 @@ func FaultConfigScmDiffClass(curIdx, seenIdx int) *fault.Fault { func FaultConfigOverlappingBdevDeviceList(curIdx, seenIdx int) *fault.Fault { return serverConfigFault( code.ServerConfigOverlappingBdevDeviceList, - fmt.Sprintf("the bdev_list value in I/O Engine %d overlaps with entries in server %d", curIdx, seenIdx), + fmt.Sprintf("the bdev_list value in engine %d overlaps with entries in engine %d", curIdx, seenIdx), "ensure that each I/O Engine has a unique set of bdev_list entries and restart", ) } diff --git a/src/control/server/config/server.go b/src/control/server/config/server.go index 496f343bf2eb..4b6eeb911ab8 100644 --- a/src/control/server/config/server.go +++ b/src/control/server/config/server.go @@ -739,7 +739,7 @@ func (cfg *Server) validateMultiEngineConfig(log logging.Logger) error { seenValues := make(map[string]int) seenScmSet := make(map[string]int) seenBdevSet := make(map[string]int) - seenIdx := 0 + seenIdx := -1 seenBdevCount := -1 seenTargetCount := -1 seenHelperStreamCount := -1 @@ -806,8 +806,8 @@ func (cfg *Server) validateMultiEngineConfig(log logging.Logger) error { // Log error but don't fail in order to be lenient with unbalanced device // counts in particular cases e.g. using different capacity SSDs or VMDs // with different number of backing devices. - err := FaultConfigBdevCountMismatch(idx, bdevCount, seenIdx, seenBdevCount) - log.Noticef(err.Error()) + e := FaultConfigBdevCountMismatch(idx, bdevCount, seenIdx, seenBdevCount) + log.Noticef(e.Error()) } if seenTargetCount != -1 && engine.TargetCount != seenTargetCount { return FaultConfigTargetCountMismatch(idx, engine.TargetCount, seenIdx, diff --git a/src/control/server/config/server_test.go b/src/control/server/config/server_test.go index e229831558c8..6be17cf4ae6b 100644 --- a/src/control/server/config/server_test.go +++ b/src/control/server/config/server_test.go @@ -1617,7 +1617,7 @@ func TestServerConfig_validateMultiEngineConfig(t *testing.T) { WithStorageClass("ram"). WithScmMountPoint("b"), ). - WithPinnedNumaNode(0). + WithPinnedNumaNode(1). WithTargetCount(8) } @@ -1625,6 +1625,7 @@ func TestServerConfig_validateMultiEngineConfig(t *testing.T) { configA *engine.Config configB *engine.Config expErr error + expLog string }{ "successful validation": { configA: configA(), @@ -1690,15 +1691,15 @@ func TestServerConfig_validateMultiEngineConfig(t *testing.T) { AppendStorage( storage.NewTierConfig(). WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(MockPCIAddr(1), MockPCIAddr(1)), + WithBdevDeviceList(MockPCIAddr(1), MockPCIAddr(2)), ), configB: configB(). AppendStorage( storage.NewTierConfig(). WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(MockPCIAddr(2), MockPCIAddr(2)), + WithBdevDeviceList(MockPCIAddr(2), MockPCIAddr(1)), ), - expErr: errors.New("valid PCI addresses"), + expErr: errors.New("engine 1 overlaps with entries in engine 0"), }, "mismatched scm_class": { configA: configA(), @@ -1711,6 +1712,21 @@ func TestServerConfig_validateMultiEngineConfig(t *testing.T) { ), expErr: FaultConfigScmDiffClass(1, 0), }, + "mismatched nr bdev_list": { + configA: configA(). + AppendStorage( + storage.NewTierConfig(). + WithStorageClass(storage.ClassNvme.String()). + WithBdevDeviceList(MockPCIAddr(1)), + ), + configB: configB(). + AppendStorage( + storage.NewTierConfig(). + WithStorageClass(storage.ClassNvme.String()). + WithBdevDeviceList(MockPCIAddr(2), MockPCIAddr(3)), + ), + expLog: "engine 1 has 2 but engine 0 has 1", + }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) @@ -1722,6 +1738,11 @@ func TestServerConfig_validateMultiEngineConfig(t *testing.T) { gotErr := conf.Validate(log) CmpErr(t, tc.expErr, gotErr) + + if tc.expLog != "" { + hasEntry := strings.Contains(buf.String(), tc.expLog) + AssertTrue(t, hasEntry, "expected entries not found in log") + } }) } } diff --git a/src/control/server/ctl_storage.go b/src/control/server/ctl_storage.go index f4747f875130..fa6dd85b9721 100644 --- a/src/control/server/ctl_storage.go +++ b/src/control/server/ctl_storage.go @@ -7,8 +7,6 @@ package server import ( - "context" - "fmt" "path/filepath" "strings" @@ -16,7 +14,6 @@ import ( "github.com/pkg/errors" "github.com/daos-stack/daos/src/control/common" - "github.com/daos-stack/daos/src/control/common/proto/ctl" "github.com/daos-stack/daos/src/control/logging" "github.com/daos-stack/daos/src/control/server/engine" "github.com/daos-stack/daos/src/control/server/storage" @@ -156,93 +153,3 @@ func (cs *ControlService) getScmUsage(ssr *storage.ScmScanResponse) (*storage.Sc return &storage.ScmScanResponse{Namespaces: nss}, nil } - -// scanAssignedBdevs retrieves up-to-date NVMe controller info including -// health statistics and stored server meta-data. If I/O Engines are running -// then query is issued over dRPC as go-spdk bindings cannot be used to access -// controller claimed by another process. Only update info for controllers -// assigned to I/O Engines. -func (cs *ControlService) scanAssignedBdevs(ctx context.Context, nsps []*ctl.ScmNamespace, statsReq bool) (*storage.BdevScanResponse, error) { - instances := cs.harness.Instances() - ctrlrs := new(storage.NvmeControllers) - - for _, ei := range instances { - if !ei.GetStorage().HasBlockDevices() { - continue - } - - tsrs, err := ei.ScanBdevTiers() - if err != nil { - return nil, err - } - - // Build slice of controllers in all tiers. - tierCtrlrs := make([]storage.NvmeController, 0) - msg := fmt.Sprintf("NVMe tiers for engine-%d:", ei.Index()) - for _, tsr := range tsrs { - msg += fmt.Sprintf("\n\tTier-%d: %s", tsr.Tier, tsr.Result.Controllers) - for _, c := range tsr.Result.Controllers { - tierCtrlrs = append(tierCtrlrs, *c) - } - } - cs.log.Info(msg) - - // If the engine is not running or we aren't interested in temporal - // statistics for the bdev devices then continue to next engine. - if !ei.IsReady() || !statsReq { - ctrlrs.Update(tierCtrlrs...) - continue - } - - cs.log.Debugf("updating stats for %d bdev(s) on instance %d", len(tierCtrlrs), - ei.Index()) - - // DAOS-12750 Compute the maximal size of the metadata to allow the engine to fill - // the WallMeta field response. The maximal metadata (i.e. VOS index file) size - // should be equal to the SCM available size divided by the number of targets of the - // engine. - var md_size uint64 - var rdb_size uint64 - for _, nsp := range nsps { - mp := nsp.GetMount() - if mp == nil { - continue - } - if r, err := ei.GetRank(); err != nil || uint32(r) != mp.GetRank() { - continue - } - - // NOTE DAOS-14223: This metadata size calculation won't necessarily match - // the meta blob size on SSD if --meta-size is specified in - // pool create command. - md_size = mp.GetUsableBytes() / uint64(ei.GetTargetCount()) - - engineCfg, err := cs.getEngineCfgFromScmNsp(nsp) - if err != nil { - return nil, errors.Wrap(err, "Engine with invalid configuration") - } - rdb_size, err = cs.getRdbSize(engineCfg) - if err != nil { - return nil, err - } - break - } - - if md_size == 0 { - cs.log.Noticef("instance %d: no SCM space available for metadata", ei.Index) - } - - // If engine is running and has claimed the assigned devices for - // each tier, iterate over scan results for each tier and send query - // over drpc to update controller details with current health stats - // and smd info. - updatedCtrlrs, err := ei.updateInUseBdevs(ctx, tierCtrlrs, md_size, rdb_size) - if err != nil { - return nil, errors.Wrapf(err, "instance %d: update online bdevs", ei.Index()) - } - - ctrlrs.Update(updatedCtrlrs...) - } - - return &storage.BdevScanResponse{Controllers: *ctrlrs}, nil -} diff --git a/src/control/server/ctl_storage_rpc.go b/src/control/server/ctl_storage_rpc.go index c448e98aaab3..f29c7d857826 100644 --- a/src/control/server/ctl_storage_rpc.go +++ b/src/control/server/ctl_storage_rpc.go @@ -48,6 +48,15 @@ const ( mdFsScmBytes uint64 = humanize.MiByte ) +// Package-local function variables for mocking in unit tests. +var ( + // Use to stub bdev scan response in StorageScan() unit tests. + scanBdevs = bdevScan + scanEngineBdevs = bdevScanEngine +) + +type scanBdevsFn func(storage.BdevScanRequest) (*storage.BdevScanResponse, error) + // newResponseState creates, populates and returns ResponseState. func newResponseState(inErr error, badStatus ctlpb.ResponseStatus, infoMsg string) *ctlpb.ResponseState { rs := new(ctlpb.ResponseState) @@ -61,73 +70,103 @@ func newResponseState(inErr error, badStatus ctlpb.ResponseStatus, infoMsg strin return rs } -// stripNvmeDetails removes all controller details leaving only PCI address and -// NUMA node/socket ID. Useful when scanning only device topology. -func stripNvmeDetails(pbc *ctlpb.NvmeController) { - pbc.Serial = "" - pbc.Model = "" - pbc.FwRev = "" -} +// TODO: Trim unwanted fields so responses can be coalesced from hash map when returned via +// control API. This should now occur in bdev backend and engine drpc handler. +// for _, pbc := range inResp.Ctrlrs { +// if !req.GetHealth() { +// pbc.HealthStats = nil +// } +// if !req.GetMeta() { +// pbc.SmdDevices = nil +// } +// if req.GetBasic() { +// pbc.Serial = "" +// pbc.Model = "" +// pbc.FwRev = "" +// } +// } + +// Convert bdev scan results to protobuf response. +func bdevScanToProtoResp(scan scanBdevsFn, req storage.BdevScanRequest) (*ctlpb.ScanNvmeResp, error) { + resp, err := scan(req) + if err != nil { + return nil, err + } -// newScanBdevResp populates protobuf NVMe scan response with controller info -// including health statistics or metadata if requested. -func newScanNvmeResp(req *ctlpb.ScanNvmeReq, inResp *storage.BdevScanResponse, inErr error) (*ctlpb.ScanNvmeResp, error) { - outResp := new(ctlpb.ScanNvmeResp) - outResp.State = new(ctlpb.ResponseState) + pbCtrlrs := make(proto.NvmeControllers, 0, len(resp.Controllers)) - if inErr != nil { - outResp.State = newResponseState(inErr, ctlpb.ResponseStatus_CTL_ERR_NVME, "") - return outResp, nil - } + return &ctlpb.ScanNvmeResp{ + State: new(ctlpb.ResponseState), + Ctrlrs: pbCtrlrs, + }, pbCtrlrs.FromNative(resp.Controllers) +} - pbCtrlrs := make(proto.NvmeControllers, 0, len(inResp.Controllers)) - if err := pbCtrlrs.FromNative(inResp.Controllers); err != nil { - return nil, err - } +// Scan bdevs through harness's ControlService (not per-engine). +func bdevScanGlobal(cs *ControlService, cfgBdevs *storage.BdevDeviceList) (*ctlpb.ScanNvmeResp, error) { + req := storage.BdevScanRequest{DeviceList: cfgBdevs} + return bdevScanToProtoResp(cs.storage.ScanBdevs, req) +} - // trim unwanted fields so responses can be coalesced from hash map - for _, pbc := range pbCtrlrs { - if !req.GetHealth() { - pbc.HealthStats = nil - } - if !req.GetMeta() { - pbc.SmdDevices = nil - } - if req.GetBasic() { - stripNvmeDetails(pbc) +// Scan bdevs through each engine and collate response results. +func bdevScanEngines(ctx context.Context, cs *ControlService, req *ctlpb.ScanNvmeReq) (*ctlpb.ScanNvmeResp, error) { + var errLast error + instances := cs.harness.Instances() + resp := &ctlpb.ScanNvmeResp{} + + for _, ei := range instances { + respEng, err := scanEngineBdevs(ctx, ei, req) + if err != nil { + err = errors.Wrapf(err, "instance %d", ei.Index()) + if errLast == nil && len(instances) > 1 { + errLast = err // Save err to preserve partial results. + cs.log.Error(err.Error()) + continue + } + return nil, err // No partial results to save so fail. } + resp.Ctrlrs = append(resp.Ctrlrs, respEng.Ctrlrs...) } - outResp.Ctrlrs = pbCtrlrs + // If one engine succeeds and one other fails, error is embedded in the response. + resp.State = newResponseState(errLast, ctlpb.ResponseStatus_CTL_ERR_NVME, "") - return outResp, nil + return resp, nil + //resp, err := c.scanAssignedBdevs(ctx, nsps, req.GetHealth() || req.GetMeta()) } -// scanBdevs updates transient details if health statistics or server metadata -// is requested otherwise just retrieves cached static controller details. -func (c *ControlService) scanBdevs(ctx context.Context, req *ctlpb.ScanNvmeReq, nsps []*ctlpb.ScmNamespace) (*ctlpb.ScanNvmeResp, error) { +// Return NVMe device details. The scan method employed depends on whether the engines are running +// or not. If running, scan over dRPC. If not running then use engine's storage provider. +func bdevScan(ctx context.Context, cs *ControlService, req *ctlpb.ScanNvmeReq, nsps []*ctlpb.ScmNamespace) (*ctlpb.ScanNvmeResp, error) { if req == nil { - return nil, errors.New("nil bdev request") + return nil, errors.New("nil request") } - var bdevsInCfg bool - for _, ei := range c.harness.Instances() { - if ei.GetStorage().HasBlockDevices() { - bdevsInCfg = true + cfgBdevs := getBdevCfgsFromSrvCfg(cs.srvCfg).Bdevs() + hasBdevs := cfgBdevs.Len() != 0 + + // Note the potential window where engines are started but not yet ready to respond. In this + // state there is a possibility that neither scan mechanism will work because devices have + // been claimed by SPDK but details are not yet available over dRPC. Here it is assumed that + // devices are claimed as soon the engines are started. + + hasStarted := false + for _, ei := range cs.harness.Instances() { + if ei.IsStarted() { + hasStarted = true } } - if !bdevsInCfg { - c.log.Debugf("no bdevs in cfg so scan all") - // return details of all bdevs if none are assigned to engines - resp, err := c.storage.ScanBdevs(storage.BdevScanRequest{}) - return newScanNvmeResp(req, resp, err) + if !hasBdevs || !hasStarted { + if hasBdevs { + cs.log.Debugf("scan bdevs from control service as no engines started") + } else { + cs.log.Debugf("scan bdevs from control service as no bdevs in cfg") + } + return bdevScanGlobal(cs, cfgBdevs) } - c.log.Debugf("bdevs in cfg so scan only assigned") - resp, err := c.scanAssignedBdevs(ctx, nsps, req.GetHealth() || req.GetMeta()) - - return newScanNvmeResp(req, resp, err) + cs.log.Debugf("scan assigned bdevs through engine instances as some are started") + return bdevScanEngines(ctx, cs, req) } // newScanScmResp sets protobuf SCM scan response with module or namespace info. @@ -174,8 +213,6 @@ func (c *ControlService) scanScm(ctx context.Context, req *ctlpb.ScanScmReq) (*c // Returns the engine configuration managing the given NVMe controller func (c *ControlService) getEngineCfgFromNvmeCtl(nc *ctl.NvmeController) (*engine.Config, error) { - var engineCfg *engine.Config - pciAddr, err := hardware.NewPCIAddress(nc.GetPciAddr()) if err != nil { return nil, errors.Errorf("Invalid PCI address: %s", err) @@ -188,58 +225,33 @@ func (c *ControlService) getEngineCfgFromNvmeCtl(nc *ctl.NvmeController) (*engin ctlrAddr := pciAddr.String() for index := range c.srvCfg.Engines { - if engineCfg != nil { - break - } - for _, tierCfg := range c.srvCfg.Engines[index].Storage.Tiers { - if engineCfg != nil { - break - } - if !tierCfg.IsBdev() { continue } - for _, devName := range tierCfg.Bdev.DeviceList.Devices() { if devName == ctlrAddr { - engineCfg = c.srvCfg.Engines[index] - break + return c.srvCfg.Engines[index], nil } - } } } - if engineCfg == nil { - return nil, errors.Errorf("unknown PCI device %q", pciAddr) - } - - return engineCfg, nil + return nil, errors.Errorf("unknown PCI device %q", pciAddr) } // Returns the engine configuration managing the given SCM name-space func (c *ControlService) getEngineCfgFromScmNsp(nsp *ctl.ScmNamespace) (*engine.Config, error) { - var engineCfg *engine.Config mountPoint := nsp.GetMount().Path for index := range c.srvCfg.Engines { - if engineCfg != nil { - break - } - for _, tierCfg := range c.srvCfg.Engines[index].Storage.Tiers { if tierCfg.IsSCM() && tierCfg.Scm.MountPoint == mountPoint { - engineCfg = c.srvCfg.Engines[index] - break + return c.srvCfg.Engines[index], nil } } } - if engineCfg == nil { - return nil, errors.Errorf("unknown SCM mount point %s", mountPoint) - } - - return engineCfg, nil + return nil, errors.Errorf("unknown SCM mount point %s", mountPoint) } // return the size of the RDB file used for managing SCM metadata @@ -541,10 +553,11 @@ func (c *ControlService) StorageScan(ctx context.Context, req *ctlpb.StorageScan } resp.Scm = respScm - respNvme, err := c.scanBdevs(ctx, req.Nvme, respScm.Namespaces) + respNvme, err := scanBdevs(ctx, c, req.Nvme, respScm.Namespaces) if err != nil { return nil, err } + // TODO: Move into updateScanNvmeResp(). if req.Nvme.GetMeta() { c.adjustNvmeSize(respNvme) } diff --git a/src/control/server/ctl_storage_rpc_test.go b/src/control/server/ctl_storage_rpc_test.go index 252d9dfedd56..bf9cdd4e1c93 100644 --- a/src/control/server/ctl_storage_rpc_test.go +++ b/src/control/server/ctl_storage_rpc_test.go @@ -9,7 +9,6 @@ package server import ( "context" "fmt" - "math" "os" "os/user" "path/filepath" @@ -24,14 +23,11 @@ import ( "github.com/daos-stack/daos/src/control/common" "github.com/daos-stack/daos/src/control/common/proto" - "github.com/daos-stack/daos/src/control/common/proto/convert" "github.com/daos-stack/daos/src/control/common/proto/ctl" ctlpb "github.com/daos-stack/daos/src/control/common/proto/ctl" "github.com/daos-stack/daos/src/control/common/test" - "github.com/daos-stack/daos/src/control/drpc" "github.com/daos-stack/daos/src/control/events" "github.com/daos-stack/daos/src/control/lib/daos" - "github.com/daos-stack/daos/src/control/lib/ranklist" "github.com/daos-stack/daos/src/control/logging" "github.com/daos-stack/daos/src/control/provider/system" "github.com/daos-stack/daos/src/control/server/config" @@ -47,1597 +43,681 @@ const defaultRdbSize uint64 = uint64(daos.DefaultDaosMdCapSize) var ( defStorageScanCmpOpts = append(test.DefaultCmpOpts(), protocmp.IgnoreFields(&ctlpb.NvmeController{}, "serial")) -) - -func TestServer_CtlSvc_StorageScan_PreEngineStart(t *testing.T) { - ctrlr := storage.MockNvmeController() - ctrlr.SmdDevices = nil - ctrlrPB := proto.MockNvmeController() - ctrlrPB.HealthStats = nil - ctrlrPB.SmdDevices = nil - ctrlrPB2 := proto.MockNvmeController(2) - ctrlrPB2.HealthStats = nil - ctrlrPB2.SmdDevices = nil - ctrlrPBwHealth := proto.MockNvmeController() - ctrlrPBwHealth.SmdDevices = nil - ctrlrPBBasic := proto.MockNvmeController() - ctrlrPBBasic.HealthStats = nil - ctrlrPBBasic.SmdDevices = nil - ctrlrPBBasic.FwRev = "" - ctrlrPBBasic.Model = "" - - for name, tc := range map[string]struct { - multiEngine bool - req *ctlpb.StorageScanReq - bmbc *bdev.MockBackendConfig - smbc *scm.MockBackendConfig - tierCfgs storage.TierConfigs - expResp *ctlpb.StorageScanResp - expErr error - }{ - "successful scan; scm namespaces": { - bmbc: &bdev.MockBackendConfig{ - ScanRes: &storage.BdevScanResponse{ - Controllers: storage.NvmeControllers{ - ctrlr, - storage.MockNvmeController(2), - }, - }, - }, - smbc: &scm.MockBackendConfig{ - GetModulesRes: storage.ScmModules{storage.MockScmModule()}, - GetNamespacesRes: storage.ScmNamespaces{storage.MockScmNamespace()}, - }, - tierCfgs: storage.TierConfigs{ - storage.NewTierConfig(). - WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(ctrlr.PciAddr, test.MockPCIAddr(2)), - }, - expResp: &ctlpb.StorageScanResp{ - Nvme: &ctlpb.ScanNvmeResp{ - Ctrlrs: proto.NvmeControllers{ - ctrlrPB, - ctrlrPB2, - }, - State: new(ctlpb.ResponseState), - }, - Scm: &ctlpb.ScanScmResp{ - Namespaces: proto.ScmNamespaces{proto.MockScmNamespace()}, - State: new(ctlpb.ResponseState), - }, - MemInfo: proto.MockPBMemInfo(), - }, - }, - "successful scan; no scm namespaces": { - bmbc: &bdev.MockBackendConfig{ - ScanRes: &storage.BdevScanResponse{ - Controllers: storage.NvmeControllers{ctrlr}, - }, - }, - smbc: &scm.MockBackendConfig{ - GetModulesRes: storage.ScmModules{storage.MockScmModule()}, - }, - expResp: &ctlpb.StorageScanResp{ - Nvme: &ctlpb.ScanNvmeResp{ - Ctrlrs: proto.NvmeControllers{ctrlrPB}, - State: new(ctlpb.ResponseState), - }, - Scm: &ctlpb.ScanScmResp{ - Modules: proto.ScmModules{proto.MockScmModule()}, - State: new(ctlpb.ResponseState), - }, - MemInfo: proto.MockPBMemInfo(), - }, - }, - "successful scan; no bdevs in config": { - bmbc: &bdev.MockBackendConfig{ - ScanRes: &storage.BdevScanResponse{ - Controllers: storage.NvmeControllers{ctrlr}, - }, - }, - smbc: &scm.MockBackendConfig{ - GetModulesRes: storage.ScmModules{storage.MockScmModule()}, - }, - tierCfgs: storage.TierConfigs{}, - expResp: &ctlpb.StorageScanResp{ - Nvme: &ctlpb.ScanNvmeResp{ - Ctrlrs: proto.NvmeControllers{ctrlrPB}, - State: new(ctlpb.ResponseState), - }, - Scm: &ctlpb.ScanScmResp{ - Modules: proto.ScmModules{proto.MockScmModule()}, - State: new(ctlpb.ResponseState), - }, - MemInfo: proto.MockPBMemInfo(), - }, - }, - "successful scan; missing bdev in config": { - bmbc: &bdev.MockBackendConfig{ - ScanRes: &storage.BdevScanResponse{ - Controllers: storage.NvmeControllers{ctrlr}, - }, - }, - smbc: &scm.MockBackendConfig{ - GetModulesRes: storage.ScmModules{storage.MockScmModule()}, - }, - tierCfgs: storage.TierConfigs{ - storage.NewTierConfig(). - WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(test.MockPCIAddr(2)), - }, - expResp: &ctlpb.StorageScanResp{ - Nvme: &ctlpb.ScanNvmeResp{ - Ctrlrs: proto.NvmeControllers{}, - State: new(ctlpb.ResponseState), - }, - Scm: &ctlpb.ScanScmResp{ - Modules: proto.ScmModules{proto.MockScmModule()}, - State: new(ctlpb.ResponseState), - }, - MemInfo: proto.MockPBMemInfo(), - }, - }, - "successful scan; multiple bdev tiers in config": { - bmbc: &bdev.MockBackendConfig{ - ScanRes: &storage.BdevScanResponse{ - Controllers: storage.NvmeControllers{ - ctrlr, - storage.MockNvmeController(2), - }, - }, - }, - smbc: &scm.MockBackendConfig{ - GetModulesRes: storage.ScmModules{storage.MockScmModule()}, - }, - tierCfgs: storage.TierConfigs{ - storage.NewTierConfig(). - WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(test.MockPCIAddr(1)), - storage.NewTierConfig(). - WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(test.MockPCIAddr(2)), - }, - expResp: &ctlpb.StorageScanResp{ - Nvme: &ctlpb.ScanNvmeResp{ - Ctrlrs: proto.NvmeControllers{ - ctrlrPB, - ctrlrPB2, - }, - State: new(ctlpb.ResponseState), - }, - Scm: &ctlpb.ScanScmResp{ - Modules: proto.ScmModules{proto.MockScmModule()}, - State: new(ctlpb.ResponseState), - }, - MemInfo: proto.MockPBMemInfo(), - }, - }, - "spdk scan failure": { - bmbc: &bdev.MockBackendConfig{ - ScanErr: errors.New("spdk scan failed"), - }, - smbc: &scm.MockBackendConfig{ - GetModulesRes: storage.ScmModules{storage.MockScmModule()}, - GetNamespacesRes: storage.ScmNamespaces{storage.MockScmNamespace()}, - }, - expResp: &ctlpb.StorageScanResp{ - Nvme: &ctlpb.ScanNvmeResp{ - State: &ctlpb.ResponseState{ - Error: "spdk scan failed", - Status: ctlpb.ResponseStatus_CTL_ERR_NVME, - }, - }, - Scm: &ctlpb.ScanScmResp{ - Namespaces: proto.ScmNamespaces{proto.MockScmNamespace()}, - State: new(ctlpb.ResponseState), - }, - MemInfo: proto.MockPBMemInfo(), - }, - }, - "scm module discovery failure": { - bmbc: &bdev.MockBackendConfig{ - ScanRes: &storage.BdevScanResponse{ - Controllers: storage.NvmeControllers{ctrlr}, - }, - }, - smbc: &scm.MockBackendConfig{ - GetModulesErr: errors.New("scm discover failed"), - }, - expResp: &ctlpb.StorageScanResp{ - Nvme: &ctlpb.ScanNvmeResp{ - Ctrlrs: proto.NvmeControllers{ctrlrPB}, - State: new(ctlpb.ResponseState), - }, - Scm: &ctlpb.ScanScmResp{ - State: &ctlpb.ResponseState{ - Error: "scm discover failed", - Status: ctlpb.ResponseStatus_CTL_ERR_SCM, - }, - }, - MemInfo: proto.MockPBMemInfo(), - }, - }, - "all discover fail": { - bmbc: &bdev.MockBackendConfig{ - ScanErr: errors.New("spdk scan failed"), - }, - smbc: &scm.MockBackendConfig{ - GetModulesErr: errors.New("scm discover failed"), - }, - expResp: &ctlpb.StorageScanResp{ - Nvme: &ctlpb.ScanNvmeResp{ - State: &ctlpb.ResponseState{ - Error: "spdk scan failed", - Status: ctlpb.ResponseStatus_CTL_ERR_NVME, - }, - }, - Scm: &ctlpb.ScanScmResp{ - State: &ctlpb.ResponseState{ - Error: "scm discover failed", - Status: ctlpb.ResponseStatus_CTL_ERR_SCM, - }, - }, - MemInfo: proto.MockPBMemInfo(), - }, - }, - "scan bdev health; single engine down": { - req: &ctlpb.StorageScanReq{ - Scm: &ctlpb.ScanScmReq{}, - Nvme: &ctlpb.ScanNvmeReq{ - Health: true, - }, - }, - bmbc: &bdev.MockBackendConfig{ - ScanRes: &storage.BdevScanResponse{ - Controllers: storage.NvmeControllers{ctrlr}, - }, - }, - expResp: &ctlpb.StorageScanResp{ - Nvme: &ctlpb.ScanNvmeResp{ - Ctrlrs: proto.NvmeControllers{ctrlrPBwHealth}, - State: new(ctlpb.ResponseState), - }, - Scm: &ctlpb.ScanScmResp{ - State: new(ctlpb.ResponseState), - }, - MemInfo: proto.MockPBMemInfo(), - }, - }, - "scan bdev health; multiple engines down": { - multiEngine: true, - req: &ctlpb.StorageScanReq{ - Scm: &ctlpb.ScanScmReq{}, - Nvme: &ctlpb.ScanNvmeReq{ - Health: true, - }, - }, - bmbc: &bdev.MockBackendConfig{ - ScanRes: &storage.BdevScanResponse{ - Controllers: storage.NvmeControllers{ctrlr}, - }, - }, - expResp: &ctlpb.StorageScanResp{ - Nvme: &ctlpb.ScanNvmeResp{ - // response should not contain duplicates - Ctrlrs: proto.NvmeControllers{ctrlrPBwHealth}, - State: new(ctlpb.ResponseState), - }, - Scm: &ctlpb.ScanScmResp{ - State: new(ctlpb.ResponseState), - }, - MemInfo: proto.MockPBMemInfo(), - }, - }, - "scan bdev meta; engines down": { - req: &ctlpb.StorageScanReq{ - Scm: &ctlpb.ScanScmReq{}, - Nvme: &ctlpb.ScanNvmeReq{ - Meta: true, - }, - }, - bmbc: &bdev.MockBackendConfig{ - ScanRes: &storage.BdevScanResponse{ - Controllers: storage.NvmeControllers{ctrlr}, - }, - }, - expResp: &ctlpb.StorageScanResp{ - Nvme: &ctlpb.ScanNvmeResp{ - Ctrlrs: proto.NvmeControllers{ctrlrPB}, - State: new(ctlpb.ResponseState), - }, - Scm: &ctlpb.ScanScmResp{ - State: new(ctlpb.ResponseState), - }, - MemInfo: proto.MockPBMemInfo(), - }, - }, - "scan bdev; nvme basic set": { - req: &ctlpb.StorageScanReq{ - Scm: &ctlpb.ScanScmReq{}, - Nvme: &ctlpb.ScanNvmeReq{ - Basic: true, - }, - }, - bmbc: &bdev.MockBackendConfig{ - ScanRes: &storage.BdevScanResponse{ - Controllers: storage.NvmeControllers{ctrlr}, - }, - }, - expResp: &ctlpb.StorageScanResp{ - Nvme: &ctlpb.ScanNvmeResp{ - Ctrlrs: proto.NvmeControllers{ctrlrPBBasic}, - State: new(ctlpb.ResponseState), - }, - Scm: &ctlpb.ScanScmResp{ - State: new(ctlpb.ResponseState), - }, - MemInfo: proto.MockPBMemInfo(), - }, - }, - "scan bdev; vmd enabled": { - req: &ctlpb.StorageScanReq{ - Scm: &ctlpb.ScanScmReq{}, - Nvme: &ctlpb.ScanNvmeReq{}, - }, - bmbc: &bdev.MockBackendConfig{ - ScanRes: &storage.BdevScanResponse{ - Controllers: storage.NvmeControllers{ - &storage.NvmeController{PciAddr: "050505:01:00.0"}, - }, - }, - }, - tierCfgs: storage.TierConfigs{ - storage.NewTierConfig(). - WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList("0000:05:05.5"), - }, - expResp: &ctlpb.StorageScanResp{ - Nvme: &ctlpb.ScanNvmeResp{ - Ctrlrs: proto.NvmeControllers{ - &ctlpb.NvmeController{PciAddr: "050505:01:00.0"}, - }, - State: new(ctlpb.ResponseState), - }, - Scm: &ctlpb.ScanScmResp{ - State: new(ctlpb.ResponseState), - }, - MemInfo: proto.MockPBMemInfo(), - }, - }, - "scan usage": { - req: &ctlpb.StorageScanReq{ - Scm: &ctlpb.ScanScmReq{ - Usage: true, - }, - Nvme: &ctlpb.ScanNvmeReq{ - Meta: true, - }, - }, - expErr: errEngineNotReady, - }, - } { - t.Run(name, func(t *testing.T) { - log, buf := logging.NewTestLogger(t.Name()) - defer test.ShowBufferOnFailure(t, buf) - - if tc.tierCfgs == nil { - tc.tierCfgs = storage.TierConfigs{ - storage.NewTierConfig(). - WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(test.MockPCIAddr(1)), - } - } - - engineCfg := engine.MockConfig().WithStorage(tc.tierCfgs...) - engineCfgs := []*engine.Config{engineCfg} - if tc.multiEngine { - engineCfgs = append(engineCfgs, engineCfg) - } - sCfg := config.DefaultServer().WithEngines(engineCfgs...) - - // tests are for pre-engine-start scenario so pass notStarted: true - cs := mockControlService(t, log, sCfg, tc.bmbc, tc.smbc, nil, true) - - if tc.req == nil { - tc.req = &ctlpb.StorageScanReq{ - Scm: new(ctlpb.ScanScmReq), - Nvme: new(ctlpb.ScanNvmeReq), - } - } - - resp, err := cs.StorageScan(test.Context(t), tc.req) - test.CmpErr(t, tc.expErr, err) - if err != nil { - return - } - - if tc.req.Nvme.Health || tc.req.Nvme.Meta { - if len(cs.harness.instances) == 0 { - tc.expResp.Nvme.Ctrlrs = nil - } - } - - if diff := cmp.Diff(tc.expResp, resp, defStorageScanCmpOpts...); diff != "" { - t.Fatalf("unexpected response (-want, +got):\n%s\n", diff) - } - }) - } -} - -func TestServer_CtlSvc_StorageScan_PostEngineStart(t *testing.T) { - const ( - clusterSize uint64 = 32 * humanize.MiByte - metaWalSize uint64 = 64 * humanize.MiByte - rdbSize uint64 = defaultRdbSize - rdbWalSize uint64 = 512 * humanize.MiByte - ) - - adjustScmSize := func(sizeBytes uint64, withMdDaosScm bool) uint64 { - mdBytes := rdbSize + mdFsScmBytes - if withMdDaosScm { - mdBytes += mdDaosScmBytes - } - - if sizeBytes < mdBytes { - return 0 - } - - return sizeBytes - mdBytes - } - - adjustNvmeSize := func(nvmeCtlr *ctlpb.NvmeController, mdBytes uint64, engineTargetCount int) *ctlpb.NvmeController { - getClusterCount := func(sizeBytes uint64) uint64 { - clusterCount := sizeBytes / clusterSize - if sizeBytes%clusterSize != 0 { - clusterCount += 1 - } - return clusterCount - } - - type deviceSizeStat struct { - clusterPerTarget uint64 - smdDevs []*ctlpb.SmdDevice - } - devicesToAdjust := make(map[uint32]*deviceSizeStat, 0) - for _, dev := range nvmeCtlr.GetSmdDevices() { - targetCount := uint64(len(dev.GetTgtIds())) - dev.MetaSize = adjustScmSize(mdBytes, false) / uint64(engineTargetCount) - dev.AvailBytes = (dev.GetAvailBytes() / clusterSize) * clusterSize - - usableClusterCount := dev.GetAvailBytes() / clusterSize - usableClusterCount -= getClusterCount(dev.MetaSize) * uint64(engineTargetCount) - usableClusterCount -= getClusterCount(metaWalSize) * uint64(engineTargetCount) - usableClusterCount -= getClusterCount(rdbSize) - usableClusterCount -= getClusterCount(rdbWalSize) - - rank := dev.GetRank() - if devicesToAdjust[rank] == nil { - devicesToAdjust[rank] = &deviceSizeStat{ - clusterPerTarget: math.MaxUint64, - } - } - devicesToAdjust[rank].smdDevs = append(devicesToAdjust[rank].smdDevs, dev) - clusterPerTarget := usableClusterCount / targetCount - if clusterPerTarget < devicesToAdjust[rank].clusterPerTarget { - devicesToAdjust[rank].clusterPerTarget = clusterPerTarget - } - } - - for _, item := range devicesToAdjust { - for _, dev := range item.smdDevs { - targetCount := uint64(len(dev.GetTgtIds())) - dev.UsableBytes = item.clusterPerTarget * targetCount * clusterSize - } - } - - return nvmeCtlr - } - - // output to be returned from mock bdev backend - newCtrlr := func(idx int32) *storage.NvmeController { - ctrlr := storage.MockNvmeController(idx) - ctrlr.Serial = test.MockUUID(idx) - ctrlr.SmdDevices = nil - - return ctrlr - } - newCtrlrMultiNs := func(idx int32, numNss int) *storage.NvmeController { - ctrlr := storage.MockNvmeController(idx) - ctrlr.Serial = test.MockUUID(idx) - ctrlr.SmdDevices = nil - ctrlr.Namespaces = make([]*storage.NvmeNamespace, numNss) - for i := 0; i < numNss; i++ { - ctrlr.Namespaces[i] = storage.MockNvmeNamespace(int32(i + 1)) - } - - return ctrlr - } - - // expected protobuf output to be returned svc.StorageScan when health - // updated over drpc. Override serial uuid with variable argument - newCtrlrHealth := func(idx int32, serialIdx ...int32) (*ctlpb.NvmeController, *ctlpb.BioHealthResp) { - ctrlr := proto.MockNvmeController(idx) - sIdx := idx - if len(serialIdx) > 0 { - sIdx = serialIdx[0] - } - ctrlr.Model = fmt.Sprintf("model-%d", sIdx) - ctrlr.Serial = test.MockUUID(sIdx) - ctrlr.HealthStats = proto.MockNvmeHealth(idx + 1) - ctrlr.HealthStats.ClusterSize = clusterSize - ctrlr.HealthStats.MetaWalSize = metaWalSize - ctrlr.HealthStats.RdbWalSize = rdbWalSize - ctrlr.SmdDevices = nil - - bioHealthResp := new(ctlpb.BioHealthResp) - if err := convert.Types(ctrlr.HealthStats, bioHealthResp); err != nil { - t.Fatal(err) - } - bioHealthResp.TotalBytes = uint64(idx) * uint64(humanize.TByte) - bioHealthResp.AvailBytes = uint64(idx) * uint64(humanize.TByte/2) - bioHealthResp.ClusterSize = clusterSize - bioHealthResp.MetaWalSize = metaWalSize - bioHealthResp.RdbWalSize = rdbWalSize - - return ctrlr, bioHealthResp - } - newCtrlrPBwHealth := func(idx int32, serialIdx ...int32) *ctlpb.NvmeController { - c, _ := newCtrlrHealth(idx, serialIdx...) - return c - } - newBioHealthResp := func(idx int32, serialIdx ...int32) *ctlpb.BioHealthResp { - _, b := newCtrlrHealth(idx, serialIdx...) - return b - } - - // expected protobuf output to be returned svc.StorageScan when smd - // updated over drpc - newCtrlrMeta := func(ctrlrIdx int32, smdIndexes ...int32) (*ctlpb.NvmeController, *ctlpb.SmdDevResp) { - ctrlr := proto.MockNvmeController(ctrlrIdx) - ctrlr.Serial = test.MockUUID(ctrlrIdx) - ctrlr.HealthStats = nil - - if len(smdIndexes) == 0 { - smdIndexes = append(smdIndexes, ctrlrIdx) - } - smdDevRespDevices := make([]*ctlpb.SmdDevice, len(smdIndexes)) - ctrlr.SmdDevices = make([]*ctlpb.SmdDevice, len(smdIndexes)) - ctrlr.Namespaces = make([]*ctlpb.NvmeController_Namespace, len(smdIndexes)) - for i, idx := range smdIndexes { - sd := proto.MockSmdDevice(ctrlr.PciAddr, idx+1) - sd.DevState = devStateNormal - sd.Rank = uint32(ctrlrIdx) - sd.TrAddr = ctrlr.PciAddr - ctrlr.SmdDevices[i] = sd - - smdPB := new(ctlpb.SmdDevice) - if err := convert.Types(sd, smdPB); err != nil { - t.Fatal(err) - } - smdDevRespDevices[i] = smdPB - - // expect resultant controller to have updated utilization values - ctrlr.SmdDevices[i].TotalBytes = uint64(idx) * uint64(humanize.TByte) - ctrlr.SmdDevices[i].AvailBytes = uint64(idx) * uint64(humanize.TByte/2) - ctrlr.SmdDevices[i].ClusterSize = clusterSize - ctrlr.SmdDevices[i].MetaWalSize = metaWalSize - ctrlr.SmdDevices[i].RdbSize = rdbSize - ctrlr.SmdDevices[i].RdbWalSize = rdbWalSize - ctrlr.Namespaces[i] = proto.MockNvmeNamespace(int32(i + 1)) - } - - return ctrlr, &ctlpb.SmdDevResp{Devices: smdDevRespDevices} - } - newCtrlrPB := func(idx int32) *ctlpb.NvmeController { - c, _ := newCtrlrMeta(idx) - c.SmdDevices = nil - return c - } - newCtrlrPBwBasic := func(idx int32) *ctlpb.NvmeController { - c := newCtrlrPB(idx) - c.FwRev = "" - c.Model = "" - return c - } - newCtrlrPBwMeta := func(idx int32, smdIndexes ...int32) *ctlpb.NvmeController { - c, _ := newCtrlrMeta(idx, smdIndexes...) - return c + defProviderScanRes = &storage.BdevScanResponse{ + Controllers: storage.NvmeControllers{ + storage.MockNvmeController(1), + }, } - newSmdDevResp := func(idx int32, smdIndexes ...int32) *ctlpb.SmdDevResp { - _, s := newCtrlrMeta(idx, smdIndexes...) - return s + defEngineScanRes = &ctlpb.ScanNvmeResp{ + Ctrlrs: proto.NvmeControllers{ + proto.MockNvmeController(2), + }, + State: new(ctlpb.ResponseState), } +) - smdDevRespStateNew := newSmdDevResp(1) - smdDevRespStateNew.Devices[0].DevState = devStateNew - smdDevRespStateNew.Devices[0].ClusterSize = 0 - smdDevRespStateNew.Devices[0].MetaWalSize = 0 - smdDevRespStateNew.Devices[0].RdbWalSize = 0 - - ctrlrPBwMetaNew := newCtrlrPBwMeta(1) - ctrlrPBwMetaNew.SmdDevices[0].AvailBytes = 0 - ctrlrPBwMetaNew.SmdDevices[0].TotalBytes = 0 - ctrlrPBwMetaNew.SmdDevices[0].DevState = devStateNew - ctrlrPBwMetaNew.SmdDevices[0].ClusterSize = 0 - ctrlrPBwMetaNew.SmdDevices[0].UsableBytes = 0 - ctrlrPBwMetaNew.SmdDevices[0].RdbSize = 0 - ctrlrPBwMetaNew.SmdDevices[0].RdbWalSize = 0 - ctrlrPBwMetaNew.SmdDevices[0].MetaSize = 0 - ctrlrPBwMetaNew.SmdDevices[0].MetaWalSize = 0 - - ctrlrPBwMetaNormal := newCtrlrPBwMeta(1) - ctrlrPBwMetaNormal.SmdDevices[0].AvailBytes = 0 - ctrlrPBwMetaNormal.SmdDevices[0].TotalBytes = 0 - ctrlrPBwMetaNormal.SmdDevices[0].DevState = devStateNormal - ctrlrPBwMetaNormal.SmdDevices[0].ClusterSize = 0 - ctrlrPBwMetaNormal.SmdDevices[0].UsableBytes = 0 - ctrlrPBwMetaNormal.SmdDevices[0].RdbSize = 0 - ctrlrPBwMetaNormal.SmdDevices[0].RdbWalSize = 0 - ctrlrPBwMetaNormal.SmdDevices[0].MetaSize = 0 - ctrlrPBwMetaNormal.SmdDevices[0].MetaWalSize = 0 - - mockPbScmMount0 := proto.MockScmMountPoint(0) - mockPbScmMount0.Rank += 1 - mockPbScmNamespace0 := proto.MockScmNamespace(0) - mockPbScmNamespace0.Mount = mockPbScmMount0 - mockPbScmMount1 := proto.MockScmMountPoint(1) - mockPbScmMount1.Rank += 1 - mockPbScmNamespace1 := proto.MockScmNamespace(1) - mockPbScmNamespace1.Mount = mockPbScmMount1 - +func TestServer_bdevScan(t *testing.T) { for name, tc := range map[string]struct { - req *ctlpb.StorageScanReq - csCtrlrs *storage.NvmeControllers // control service storage provider - eCtrlrs []*storage.NvmeControllers // engine storage provider - smbc *scm.MockBackendConfig - smsc *system.MockSysConfig - storageCfgs []storage.TierConfigs - engineTargetCount []int - enginesNotReady bool - scanTwice bool - junkResp bool - drpcResps map[int][]*mockDrpcResponse - expErr error - expResp *ctlpb.StorageScanResp + req *ctlpb.ScanNvmeReq + provRes *storage.BdevScanResponse + provErr error + engTierCfgs []storage.TierConfigs // one per-engine + engStopped []bool // one per-engine (all false if unset) + engRes []ctlpb.ScanNvmeResp // one per-engine + engErr []error // one per-engine + expResp *ctlpb.ScanNvmeResp + expErr error + expBackendScanCalls []storage.BdevScanRequest }{ - "engine up; scan bdev basic": { - req: &ctlpb.StorageScanReq{ - Scm: new(ctlpb.ScanScmReq), - Nvme: &ctlpb.ScanNvmeReq{Basic: true}, + "nil request": { + expErr: errors.New("nil request"), + }, + "no bdevs in config; scan local fails": { + req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, + engTierCfgs: []storage.TierConfigs{{}}, + provErr: errors.New("fail"), + engStopped: []bool{false}, + expErr: errors.New("fail"), + }, + "no bdevs in config; scan local; devlist passed to backend": { + req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, + engTierCfgs: []storage.TierConfigs{{}}, + engStopped: []bool{false}, + expResp: &ctlpb.ScanNvmeResp{ + Ctrlrs: proto.NvmeControllers{ + proto.MockNvmeController(1), + }, + State: new(ctlpb.ResponseState), }, - storageCfgs: []storage.TierConfigs{ + expBackendScanCalls: []storage.BdevScanRequest{ + {DeviceList: new(storage.BdevDeviceList)}, + }, + }, + "bdevs in config; engine not started; scan local; devlist passed to backend": { + req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, + engTierCfgs: []storage.TierConfigs{ { storage.NewTierConfig(). WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(newCtrlr(1).PciAddr), + WithBdevDeviceList(test.MockPCIAddr(1), + test.MockPCIAddr(2)), }, }, - csCtrlrs: &storage.NvmeControllers{newCtrlr(1)}, - engineTargetCount: []int{4}, - drpcResps: map[int][]*mockDrpcResponse{ - 0: {}, - }, - expResp: &ctlpb.StorageScanResp{ - Nvme: &ctlpb.ScanNvmeResp{ - Ctrlrs: proto.NvmeControllers{newCtrlrPBwBasic(1)}, - State: new(ctlpb.ResponseState), + engStopped: []bool{true}, + expResp: &ctlpb.ScanNvmeResp{ + Ctrlrs: proto.NvmeControllers{ + proto.MockNvmeController(1), }, - Scm: &ctlpb.ScanScmResp{State: new(ctlpb.ResponseState)}, - MemInfo: proto.MockPBMemInfo(), - }, - }, - "engine up; scan bdev basic; no bdevs in config": { - req: &ctlpb.StorageScanReq{ - Scm: new(ctlpb.ScanScmReq), - Nvme: &ctlpb.ScanNvmeReq{Basic: true}, + State: new(ctlpb.ResponseState), }, - storageCfgs: []storage.TierConfigs{}, - csCtrlrs: &storage.NvmeControllers{newCtrlr(1)}, - expResp: &ctlpb.StorageScanResp{ - Nvme: &ctlpb.ScanNvmeResp{ - Ctrlrs: proto.NvmeControllers{newCtrlrPBwBasic(1)}, - State: new(ctlpb.ResponseState), + expBackendScanCalls: []storage.BdevScanRequest{ + { + DeviceList: storage.MustNewBdevDeviceList( + test.MockPCIAddr(1), test.MockPCIAddr(2)), }, - Scm: &ctlpb.ScanScmResp{State: new(ctlpb.ResponseState)}, - MemInfo: proto.MockPBMemInfo(), }, }, - "engine up; scan bdev basic; missing bdev in config": { - req: &ctlpb.StorageScanReq{ - Scm: new(ctlpb.ScanScmReq), - Nvme: &ctlpb.ScanNvmeReq{Basic: true}, - }, - storageCfgs: []storage.TierConfigs{ + "bdevs in config; engine started; scan remote": { + req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, + engTierCfgs: []storage.TierConfigs{ { storage.NewTierConfig(). WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(newCtrlr(1).PciAddr), + WithBdevDeviceList(test.MockPCIAddr(1), + test.MockPCIAddr(2)), }, }, - csCtrlrs: &storage.NvmeControllers{newCtrlr(2)}, - engineTargetCount: []int{4}, - drpcResps: map[int][]*mockDrpcResponse{ - 0: {}, - }, - expResp: &ctlpb.StorageScanResp{ - Nvme: &ctlpb.ScanNvmeResp{ - Ctrlrs: proto.NvmeControllers{}, - State: new(ctlpb.ResponseState), + engStopped: []bool{false}, + engErr: []error{nil}, + expResp: &ctlpb.ScanNvmeResp{ + Ctrlrs: proto.NvmeControllers{ + proto.MockNvmeController(2), }, - Scm: &ctlpb.ScanScmResp{State: new(ctlpb.ResponseState)}, - MemInfo: proto.MockPBMemInfo(), + State: new(ctlpb.ResponseState), }, }, - "engine up; scan bdev health": { - req: &ctlpb.StorageScanReq{ - Scm: new(ctlpb.ScanScmReq), - Nvme: &ctlpb.ScanNvmeReq{Health: true}, - }, - csCtrlrs: &storage.NvmeControllers{newCtrlr(1)}, - storageCfgs: []storage.TierConfigs{ + "scan remote; collate results from multiple engines": { + req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, + engTierCfgs: []storage.TierConfigs{ { storage.NewTierConfig(). WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(newCtrlr(1).PciAddr), - }, - }, - engineTargetCount: []int{4}, - drpcResps: map[int][]*mockDrpcResponse{ - 0: { - {Message: newSmdDevResp(1)}, - {Message: newBioHealthResp(1)}, - }, - }, - expResp: &ctlpb.StorageScanResp{ - Nvme: &ctlpb.ScanNvmeResp{ - Ctrlrs: proto.NvmeControllers{newCtrlrPBwHealth(1)}, - State: new(ctlpb.ResponseState), - }, - Scm: &ctlpb.ScanScmResp{State: new(ctlpb.ResponseState)}, - MemInfo: proto.MockPBMemInfo(), - }, - }, - "engine up; scan bdev meta": { - req: &ctlpb.StorageScanReq{ - Scm: &ctlpb.ScanScmReq{Usage: true}, - Nvme: &ctlpb.ScanNvmeReq{Meta: true}, - }, - csCtrlrs: &storage.NvmeControllers{newCtrlr(1)}, - smbc: &scm.MockBackendConfig{ - GetModulesRes: storage.ScmModules{ - storage.MockScmModule(0), + WithBdevDeviceList(test.MockPCIAddr(1), + test.MockPCIAddr(2)), }, - GetNamespacesRes: storage.ScmNamespaces{ - storage.MockScmNamespace(0), - }, - }, - smsc: &system.MockSysConfig{ - GetfsUsageResps: []system.GetfsUsageRetval{ - { - Total: mockPbScmMount0.TotalBytes, - Avail: mockPbScmMount0.AvailBytes, - }, - }, - }, - storageCfgs: []storage.TierConfigs{ { - storage.NewTierConfig(). - WithStorageClass(storage.ClassDcpm.String()). - WithScmMountPoint(mockPbScmMount0.Path). - WithScmDeviceList(mockPbScmNamespace0.Blockdev), storage.NewTierConfig(). WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(newCtrlr(1).PciAddr), + WithBdevDeviceList(test.MockPCIAddr(3), + test.MockPCIAddr(4)), }, }, - engineTargetCount: []int{4}, - drpcResps: map[int][]*mockDrpcResponse{ - 0: { - {Message: newSmdDevResp(1)}, - {Message: newBioHealthResp(1)}, - }, - }, - expResp: &ctlpb.StorageScanResp{ - Nvme: &ctlpb.ScanNvmeResp{ + engRes: []ctlpb.ScanNvmeResp{ + { Ctrlrs: proto.NvmeControllers{ - adjustNvmeSize(newCtrlrPBwMeta(1), mockPbScmMount0.AvailBytes, 4), + proto.MockNvmeController(1), + proto.MockNvmeController(2), }, State: new(ctlpb.ResponseState), }, - Scm: &ctlpb.ScanScmResp{ - Namespaces: proto.ScmNamespaces{ - &ctlpb.ScmNamespace{ - Blockdev: mockPbScmNamespace0.Blockdev, - Dev: mockPbScmNamespace0.Dev, - Size: mockPbScmNamespace0.Size, - Uuid: mockPbScmNamespace0.Uuid, - Mount: &ctlpb.ScmNamespace_Mount{ - Class: mockPbScmMount0.Class, - DeviceList: mockPbScmMount0.DeviceList, - Path: mockPbScmMount0.Path, - TotalBytes: mockPbScmMount0.TotalBytes, - AvailBytes: mockPbScmMount0.AvailBytes, - UsableBytes: adjustScmSize(mockPbScmMount0.AvailBytes, false), - Rank: mockPbScmMount0.Rank, - }, - }, + { + Ctrlrs: proto.NvmeControllers{ + proto.MockNvmeController(3), + proto.MockNvmeController(4), }, State: new(ctlpb.ResponseState), }, - MemInfo: proto.MockPBMemInfo(), }, - }, - "engines up; scan bdev health": { - req: &ctlpb.StorageScanReq{ - Scm: new(ctlpb.ScanScmReq), - Nvme: &ctlpb.ScanNvmeReq{Health: true}, + engErr: []error{nil, nil}, + engStopped: []bool{false, false}, + expResp: &ctlpb.ScanNvmeResp{ + Ctrlrs: proto.NvmeControllers{ + proto.MockNvmeController(1), + proto.MockNvmeController(2), + proto.MockNvmeController(3), + proto.MockNvmeController(4), + }, + State: new(ctlpb.ResponseState), }, - csCtrlrs: &storage.NvmeControllers{newCtrlr(1), newCtrlr(2)}, - eCtrlrs: []*storage.NvmeControllers{{newCtrlr(1)}, {newCtrlr(2)}}, - storageCfgs: []storage.TierConfigs{ + }, + "scan remote; both engine scans fail": { + req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, + engTierCfgs: []storage.TierConfigs{ { storage.NewTierConfig(). WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(newCtrlr(1).PciAddr), + WithBdevDeviceList(test.MockPCIAddr(1), + test.MockPCIAddr(2)), }, { storage.NewTierConfig(). WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(newCtrlr(2).PciAddr), - }, - }, - engineTargetCount: []int{4, 4}, - drpcResps: map[int][]*mockDrpcResponse{ - 0: { - {Message: newSmdDevResp(1)}, - {Message: newBioHealthResp(1)}, - }, - 1: { - {Message: newSmdDevResp(2)}, - {Message: newBioHealthResp(2)}, - }, - }, - expResp: &ctlpb.StorageScanResp{ - Nvme: &ctlpb.ScanNvmeResp{ - Ctrlrs: proto.NvmeControllers{ - newCtrlrPBwHealth(1), - newCtrlrPBwHealth(2), - }, - State: new(ctlpb.ResponseState), + WithBdevDeviceList(test.MockPCIAddr(3), + test.MockPCIAddr(4)), }, - Scm: &ctlpb.ScanScmResp{State: new(ctlpb.ResponseState)}, - MemInfo: proto.MockPBMemInfo(), }, + engRes: []ctlpb.ScanNvmeResp{{}, {}}, + engErr: []error{errors.New("fail1"), errors.New("fail2")}, + engStopped: []bool{false, false}, + expErr: errors.New("fail2"), }, - // make sure stale information is cleared and not used from cache - "verify cache invalidation over multiple storage scan calls": { - req: &ctlpb.StorageScanReq{ - Scm: &ctlpb.ScanScmReq{Usage: true}, - Nvme: &ctlpb.ScanNvmeReq{Meta: true}, - }, - csCtrlrs: &storage.NvmeControllers{newCtrlr(1), newCtrlr(2)}, - eCtrlrs: []*storage.NvmeControllers{{newCtrlr(1)}, {newCtrlr(2)}}, - smbc: &scm.MockBackendConfig{ - GetModulesRes: storage.ScmModules{ - storage.MockScmModule(0), - }, - GetNamespacesRes: storage.ScmNamespaces{ - storage.MockScmNamespace(0), - storage.MockScmNamespace(1), - }, - }, - smsc: &system.MockSysConfig{ - GetfsUsageResps: []system.GetfsUsageRetval{ - { - Total: mockPbScmMount0.TotalBytes, - Avail: mockPbScmMount0.AvailBytes, - }, - { - Total: mockPbScmMount1.TotalBytes, - Avail: mockPbScmMount1.AvailBytes, - }, - { - Total: mockPbScmMount0.TotalBytes, - Avail: mockPbScmMount0.AvailBytes, - }, - { - Total: mockPbScmMount1.TotalBytes, - Avail: mockPbScmMount1.AvailBytes, - }, - }, - }, - storageCfgs: []storage.TierConfigs{ + "scan remote; partial results with one failed engine scan": { + req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, + engTierCfgs: []storage.TierConfigs{ { - storage.NewTierConfig(). - WithStorageClass(storage.ClassDcpm.String()). - WithScmMountPoint(mockPbScmMount0.Path). - WithScmDeviceList(mockPbScmNamespace0.Blockdev), storage.NewTierConfig(). WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(newCtrlr(1).PciAddr), + WithBdevDeviceList(test.MockPCIAddr(1), + test.MockPCIAddr(2)), }, { - storage.NewTierConfig(). - WithStorageClass(storage.ClassDcpm.String()). - WithScmMountPoint(mockPbScmMount1.Path). - WithScmDeviceList(mockPbScmNamespace1.Blockdev), storage.NewTierConfig(). WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(newCtrlr(2).PciAddr), - }, - }, - scanTwice: true, - engineTargetCount: []int{4, 4}, - drpcResps: map[int][]*mockDrpcResponse{ - 0: { - {Message: newSmdDevResp(1, 1, 2, 3)}, - {Message: newBioHealthResp(1, 1)}, - {Message: newBioHealthResp(1, 2)}, - {Message: newBioHealthResp(1, 3)}, - {Message: newSmdDevResp(1)}, - {Message: newBioHealthResp(1)}, - }, - 1: { - {Message: newSmdDevResp(2, 1, 2, 3)}, - {Message: newBioHealthResp(1, 1)}, - {Message: newBioHealthResp(1, 2)}, - {Message: newBioHealthResp(1, 3)}, - {Message: newSmdDevResp(2)}, - {Message: newBioHealthResp(2)}, + WithBdevDeviceList(test.MockPCIAddr(3), + test.MockPCIAddr(4)), }, }, - expResp: &ctlpb.StorageScanResp{ - Nvme: &ctlpb.ScanNvmeResp{ + engRes: []ctlpb.ScanNvmeResp{ + {}, + { Ctrlrs: proto.NvmeControllers{ - adjustNvmeSize(newCtrlrPBwMeta(1), mockPbScmMount0.AvailBytes, 4), - adjustNvmeSize(newCtrlrPBwMeta(2), mockPbScmMount1.AvailBytes, 4), + proto.MockNvmeController(3), + proto.MockNvmeController(4), }, State: new(ctlpb.ResponseState), }, - // Scm: &ctlpb.ScanScmResp{State: new(ctlpb.ResponseState)}, - Scm: &ctlpb.ScanScmResp{ - Namespaces: proto.ScmNamespaces{ - &ctlpb.ScmNamespace{ - Blockdev: mockPbScmNamespace0.Blockdev, - Dev: mockPbScmNamespace0.Dev, - Size: mockPbScmNamespace0.Size, - Uuid: mockPbScmNamespace0.Uuid, - Mount: &ctlpb.ScmNamespace_Mount{ - Class: mockPbScmMount0.Class, - DeviceList: mockPbScmMount0.DeviceList, - Path: mockPbScmMount0.Path, - TotalBytes: mockPbScmMount0.TotalBytes, - AvailBytes: mockPbScmMount0.AvailBytes, - UsableBytes: adjustScmSize(mockPbScmMount0.AvailBytes, false), - Rank: mockPbScmMount0.Rank, - }, - }, - &ctlpb.ScmNamespace{ - Blockdev: mockPbScmNamespace1.Blockdev, - Dev: mockPbScmNamespace1.Dev, - Size: mockPbScmNamespace1.Size, - Uuid: mockPbScmNamespace1.Uuid, - NumaNode: mockPbScmNamespace1.NumaNode, - Mount: &ctlpb.ScmNamespace_Mount{ - Class: mockPbScmMount1.Class, - DeviceList: mockPbScmMount1.DeviceList, - Path: mockPbScmMount1.Path, - TotalBytes: mockPbScmMount1.TotalBytes, - AvailBytes: mockPbScmMount1.AvailBytes, - UsableBytes: adjustScmSize(mockPbScmMount1.AvailBytes, false), - Rank: mockPbScmMount1.Rank, - }, - }, - }, - State: new(ctlpb.ResponseState), + }, + engErr: []error{errors.New("fail"), nil}, + engStopped: []bool{false, false}, + expResp: &ctlpb.ScanNvmeResp{ + Ctrlrs: proto.NvmeControllers{ + proto.MockNvmeController(3), + proto.MockNvmeController(4), + }, + State: &ctlpb.ResponseState{ + Error: "instance 0: fail", + Status: ctlpb.ResponseStatus_CTL_ERR_NVME, }, - MemInfo: proto.MockPBMemInfo(), }, }, - "engines up; scan bdev meta; multiple nvme namespaces": { - req: &ctlpb.StorageScanReq{ - Scm: &ctlpb.ScanScmReq{Usage: true}, - Nvme: &ctlpb.ScanNvmeReq{Meta: true}, - }, - csCtrlrs: &storage.NvmeControllers{ - newCtrlrMultiNs(1, 2), newCtrlrMultiNs(2, 2), - }, - eCtrlrs: []*storage.NvmeControllers{ - {newCtrlrMultiNs(1, 2)}, {newCtrlrMultiNs(2, 2)}, - }, - smbc: &scm.MockBackendConfig{ - GetModulesRes: storage.ScmModules{ - storage.MockScmModule(0), + + // TODO: Results filtering of controller details should be handled either in bdev + // storage provider backend or in engine dRPC handler, not here. + + //"scan remote; filter results based on request basic flag": { + // req: &ctlpb.ScanNvmeReq{Basic: true}, + // engTierCfgs: []storage.TierConfigs{ + // { + // storage.NewTierConfig(). + // WithStorageClass(storage.ClassNvme.String()). + // WithBdevDeviceList(test.MockPCIAddr(1), + // test.MockPCIAddr(2)), + // }, + // }, + // engRes: []ctlpb.ScanNvmeResp{ + // { + // Ctrlrs: proto.NvmeControllers{ + // proto.MockNvmeController(1), + // proto.MockNvmeController(2), + // }, + // State: new(ctlpb.ResponseState), + // }, + // }, + // engErr: []error{nil}, + // engStopped: []bool{false}, + // expResp: &ctlpb.ScanNvmeResp{ + // Ctrlrs: proto.NvmeControllers{ + // func() *ctlpb.NvmeController { + // nc := proto.MockNvmeController(1) + // nc.HealthStats = nil + // nc.SmdDevices = nil + // nc.FwRev = "" + // nc.Model = "" + // return nc + // }(), + // func() *ctlpb.NvmeController { + // nc := proto.MockNvmeController(2) + // nc.HealthStats = nil + // nc.SmdDevices = nil + // nc.FwRev = "" + // nc.Model = "" + // return nc + // }(), + // }, + // State: new(ctlpb.ResponseState), + // }, + //}, + //"scan local; filter results based on request basic flag": { + // req: &ctlpb.ScanNvmeReq{Basic: true}, + // engTierCfgs: []storage.TierConfigs{ + // { + // storage.NewTierConfig(). + // WithStorageClass(storage.ClassNvme.String()). + // WithBdevDeviceList(test.MockPCIAddr(1), + // test.MockPCIAddr(2)), + // }, + // }, + // provRes: &storage.BdevScanResponse{ + // Controllers: storage.NvmeControllers{ + // storage.MockNvmeController(1), + // storage.MockNvmeController(2), + // }, + // }, + // engStopped: []bool{true}, + // expResp: &ctlpb.ScanNvmeResp{ + // Ctrlrs: proto.NvmeControllers{ + // func() *ctlpb.NvmeController { + // nc := proto.MockNvmeController(1) + // nc.HealthStats = nil + // nc.SmdDevices = nil + // nc.FwRev = "" + // nc.Model = "" + // return nc + // }(), + // func() *ctlpb.NvmeController { + // nc := proto.MockNvmeController(2) + // nc.HealthStats = nil + // nc.SmdDevices = nil + // nc.FwRev = "" + // nc.Model = "" + // return nc + // }(), + // }, + // State: new(ctlpb.ResponseState), + // }, + // expBackendScanCalls: []storage.BdevScanRequest{ + // { + // DeviceList: storage.MustNewBdevDeviceList( + // test.MockPCIAddr(1), test.MockPCIAddr(2)), + // }, + // }, + //}, + "bdevs in config; engine not started; scan local; vmd enabled": { + req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, + engTierCfgs: []storage.TierConfigs{ + { + storage.NewTierConfig(). + WithStorageClass(storage.ClassNvme.String()). + WithBdevDeviceList("0000:05:05.5"), }, - GetNamespacesRes: storage.ScmNamespaces{ - storage.MockScmNamespace(0), - storage.MockScmNamespace(1), + }, + provRes: &storage.BdevScanResponse{ + Controllers: storage.NvmeControllers{ + &storage.NvmeController{PciAddr: "050505:01:00.0"}, }, }, - smsc: &system.MockSysConfig{ - GetfsUsageResps: []system.GetfsUsageRetval{ - { - Total: mockPbScmMount0.TotalBytes, - Avail: mockPbScmMount0.AvailBytes, - }, - { - Total: mockPbScmMount1.TotalBytes, - Avail: mockPbScmMount1.AvailBytes, - }, + engStopped: []bool{true}, + expResp: &ctlpb.ScanNvmeResp{ + Ctrlrs: proto.NvmeControllers{ + &ctlpb.NvmeController{PciAddr: "050505:01:00.0"}, }, + State: new(ctlpb.ResponseState), }, - storageCfgs: []storage.TierConfigs{ + expBackendScanCalls: []storage.BdevScanRequest{ + {DeviceList: storage.MustNewBdevDeviceList("0000:05:05.5")}, + }, + }, + "bdevs in config; engine started; scan remote; vmd enabled": { + req: &ctlpb.ScanNvmeReq{Health: true, Meta: true}, + engTierCfgs: []storage.TierConfigs{ { - storage.NewTierConfig(). - WithStorageClass(storage.ClassDcpm.String()). - WithScmMountPoint(mockPbScmMount0.Path). - WithScmDeviceList(mockPbScmNamespace0.Blockdev), storage.NewTierConfig(). WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(newCtrlr(1).PciAddr), + WithBdevDeviceList("0000:05:05.5"), }, + }, + engRes: []ctlpb.ScanNvmeResp{ { - storage.NewTierConfig(). - WithStorageClass(storage.ClassDcpm.String()). - WithScmMountPoint(mockPbScmMount1.Path). - WithScmDeviceList(mockPbScmNamespace1.Blockdev), - storage.NewTierConfig(). - WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(newCtrlr(2).PciAddr), + Ctrlrs: proto.NvmeControllers{ + &ctlpb.NvmeController{PciAddr: "050505:01:00.0"}, + }, + State: new(ctlpb.ResponseState), }, }, - engineTargetCount: []int{8, 8}, - drpcResps: map[int][]*mockDrpcResponse{ - 0: { - {Message: newSmdDevResp(1, 1, 2)}, - {Message: newBioHealthResp(1, 1)}, - {Message: newBioHealthResp(2, 1)}, + engErr: []error{nil}, + engStopped: []bool{false}, + expResp: &ctlpb.ScanNvmeResp{ + Ctrlrs: proto.NvmeControllers{ + &ctlpb.NvmeController{PciAddr: "050505:01:00.0"}, }, - 1: { - {Message: newSmdDevResp(2, 3, 4)}, - {Message: newBioHealthResp(3, 2)}, - {Message: newBioHealthResp(4, 2)}, + State: new(ctlpb.ResponseState), + }, + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + if tc.provRes == nil { + tc.provRes = defProviderScanRes + } + if tc.engRes == nil { + tc.engRes = []ctlpb.ScanNvmeResp{*defEngineScanRes} + } + + if len(tc.engStopped) != len(tc.engTierCfgs) { + t.Fatal("len tc.engStopped != len tc.tierCfgs") + } + + idx := 0 + // Mock per-engine-scan function to focus on unit testing bdevScan(). + scanEngineBdevs = func(_ context.Context, _ Engine, _ *ctlpb.ScanNvmeReq) (*ctlpb.ScanNvmeResp, error) { + if len(tc.engRes) <= idx { + t.Fatal("engine scan called but response not specified") + } + if len(tc.engErr) <= idx { + t.Fatal("engine scan called but error not specified") + } + engRes := tc.engRes[idx] + engErr := tc.engErr[idx] + idx++ + return &engRes, engErr + } + defer func() { + scanEngineBdevs = bdevScanEngine + }() + + engCfgs := []*engine.Config{} + for _, tcs := range tc.engTierCfgs { + engCfg := engine.MockConfig().WithStorage(tcs...) + engCfgs = append(engCfgs, engCfg) + } + sCfg := config.DefaultServer().WithEngines(engCfgs...) + + bmbc := &bdev.MockBackendConfig{ + ScanRes: tc.provRes, + ScanErr: tc.provErr, + } + bmb := bdev.NewMockBackend(bmbc) + smb := scm.NewMockBackend(nil) + + cs := newMockControlServiceFromBackends(t, log, sCfg, bmb, smb, nil, + tc.engStopped...) + + resp, err := bdevScan(test.Context(t), cs, tc.req, nil) + test.CmpErr(t, tc.expErr, err) + if err != nil { + return + } + + if diff := cmp.Diff(tc.expResp, resp, + defStorageScanCmpOpts...); diff != "" { + t.Fatalf("unexpected response (-want, +got):\n%s\n", diff) + } + + cmpopt := cmp.Comparer(func(x, y *storage.BdevDeviceList) bool { + if x == nil && y == nil { + return true + } + return x.Equals(y) + }) + + bmb.RLock() + if len(tc.expBackendScanCalls) != len(bmb.ScanCalls) { + t.Fatalf("unexpected number of backend scan calls, want %d got %d", + len(tc.expBackendScanCalls), len(bmb.ScanCalls)) + } + if len(tc.expBackendScanCalls) == 0 { + return + } + if diff := cmp.Diff(tc.expBackendScanCalls, bmb.ScanCalls, + append(defStorageScanCmpOpts, cmpopt)...); diff != "" { + t.Fatalf("unexpected backend scan calls (-want, +got):\n%s\n", diff) + } + bmb.RUnlock() + }) + } +} + +func TestServer_CtlSvc_StorageScan(t *testing.T) { + ctrlr := storage.MockNvmeController() + ctrlr.SmdDevices = nil + ctrlrPB := proto.MockNvmeController() + ctrlrPB.HealthStats = nil + ctrlrPB.SmdDevices = nil + ctrlrPB2 := proto.MockNvmeController(2) + ctrlrPB2.HealthStats = nil + ctrlrPB2.SmdDevices = nil + ctrlrPBwHealth := proto.MockNvmeController() + ctrlrPBwHealth.SmdDevices = nil + ctrlrPBBasic := proto.MockNvmeController() + ctrlrPBBasic.HealthStats = nil + ctrlrPBBasic.SmdDevices = nil + ctrlrPBBasic.FwRev = "" + ctrlrPBBasic.Model = "" + + for name, tc := range map[string]struct { + req *ctlpb.StorageScanReq + bdevScanRes *ctlpb.ScanNvmeResp + bdevScanErr error + smbc *scm.MockBackendConfig + tierCfgs storage.TierConfigs + enginesNotReady bool + expResp *ctlpb.StorageScanResp + expErr error + }{ + "successful scan; scm namespaces": { + bdevScanRes: &ctlpb.ScanNvmeResp{ + Ctrlrs: proto.NvmeControllers{ + ctrlrPB, + ctrlrPB2, }, + State: new(ctlpb.ResponseState), + }, + smbc: &scm.MockBackendConfig{ + GetModulesRes: storage.ScmModules{storage.MockScmModule()}, + GetNamespacesRes: storage.ScmNamespaces{storage.MockScmNamespace()}, + }, + tierCfgs: storage.TierConfigs{ + storage.NewTierConfig(). + WithStorageClass(storage.ClassNvme.String()). + WithBdevDeviceList(ctrlr.PciAddr, test.MockPCIAddr(2)), }, expResp: &ctlpb.StorageScanResp{ Nvme: &ctlpb.ScanNvmeResp{ Ctrlrs: proto.NvmeControllers{ - adjustNvmeSize(newCtrlrPBwMeta(1, 1, 2), mockPbScmMount0.AvailBytes, 8), - adjustNvmeSize(newCtrlrPBwMeta(2, 3, 4), mockPbScmMount1.AvailBytes, 8), + ctrlrPB, + ctrlrPB2, }, State: new(ctlpb.ResponseState), }, Scm: &ctlpb.ScanScmResp{ - Namespaces: proto.ScmNamespaces{ - &ctlpb.ScmNamespace{ - Blockdev: mockPbScmNamespace0.Blockdev, - Dev: mockPbScmNamespace0.Dev, - Size: mockPbScmNamespace0.Size, - Uuid: mockPbScmNamespace0.Uuid, - Mount: &ctlpb.ScmNamespace_Mount{ - Class: mockPbScmMount0.Class, - DeviceList: mockPbScmMount0.DeviceList, - Path: mockPbScmMount0.Path, - TotalBytes: mockPbScmMount0.TotalBytes, - AvailBytes: mockPbScmMount0.AvailBytes, - UsableBytes: adjustScmSize(mockPbScmMount0.AvailBytes, false), - Rank: mockPbScmMount0.Rank, - }, - }, - &ctlpb.ScmNamespace{ - Blockdev: mockPbScmNamespace1.Blockdev, - Dev: mockPbScmNamespace1.Dev, - Size: mockPbScmNamespace1.Size, - Uuid: mockPbScmNamespace1.Uuid, - NumaNode: mockPbScmNamespace1.NumaNode, - Mount: &ctlpb.ScmNamespace_Mount{ - Class: mockPbScmMount1.Class, - DeviceList: mockPbScmMount1.DeviceList, - Path: mockPbScmMount1.Path, - TotalBytes: mockPbScmMount1.TotalBytes, - AvailBytes: mockPbScmMount1.AvailBytes, - UsableBytes: adjustScmSize(mockPbScmMount1.AvailBytes, false), - Rank: mockPbScmMount1.Rank, - }, - }, - }, - State: new(ctlpb.ResponseState), + Namespaces: proto.ScmNamespaces{proto.MockScmNamespace()}, + State: new(ctlpb.ResponseState), }, MemInfo: proto.MockPBMemInfo(), }, }, - "scan scm usage": { - req: &ctlpb.StorageScanReq{ - Scm: &ctlpb.ScanScmReq{Usage: true}, - Nvme: new(ctlpb.ScanNvmeReq), - }, - smbc: &scm.MockBackendConfig{ - GetModulesRes: storage.ScmModules{storage.MockScmModule(0)}, - GetNamespacesRes: storage.ScmNamespaces{storage.MockScmNamespace(0)}, - }, - smsc: &system.MockSysConfig{ - GetfsUsageResps: []system.GetfsUsageRetval{ - { - Total: mockPbScmMount0.TotalBytes, - Avail: mockPbScmMount0.AvailBytes, - }, - }, - }, - storageCfgs: []storage.TierConfigs{ - { - storage.NewTierConfig(). - WithStorageClass(storage.ClassDcpm.String()). - WithScmMountPoint(mockPbScmMount0.Path). - WithScmDeviceList(mockPbScmNamespace0.Blockdev), + "successful scan; no scm namespaces": { + bdevScanRes: &ctlpb.ScanNvmeResp{ + Ctrlrs: proto.NvmeControllers{ + ctrlrPB, }, + State: new(ctlpb.ResponseState), }, - engineTargetCount: []int{4}, - drpcResps: map[int][]*mockDrpcResponse{ - 0: {}, + smbc: &scm.MockBackendConfig{ + GetModulesRes: storage.ScmModules{storage.MockScmModule()}, }, expResp: &ctlpb.StorageScanResp{ Nvme: &ctlpb.ScanNvmeResp{ - State: new(ctlpb.ResponseState), + Ctrlrs: proto.NvmeControllers{ctrlrPB}, + State: new(ctlpb.ResponseState), }, Scm: &ctlpb.ScanScmResp{ - Namespaces: proto.ScmNamespaces{ - &ctlpb.ScmNamespace{ - Blockdev: mockPbScmNamespace0.Blockdev, - Dev: mockPbScmNamespace0.Dev, - Size: mockPbScmNamespace0.Size, - Uuid: mockPbScmNamespace0.Uuid, - Mount: &ctlpb.ScmNamespace_Mount{ - Class: mockPbScmMount0.Class, - DeviceList: mockPbScmMount0.DeviceList, - Path: mockPbScmMount0.Path, - Rank: mockPbScmMount0.Rank, - TotalBytes: mockPbScmMount0.TotalBytes, - AvailBytes: mockPbScmMount0.AvailBytes, - UsableBytes: adjustScmSize(mockPbScmMount0.AvailBytes, true), - }, - }, - }, - State: new(ctlpb.ResponseState), + Modules: proto.ScmModules{proto.MockScmModule()}, + State: new(ctlpb.ResponseState), }, MemInfo: proto.MockPBMemInfo(), }, }, - "scan scm usage; pmem not in instance device list": { - req: &ctlpb.StorageScanReq{ - Scm: &ctlpb.ScanScmReq{Usage: true}, - Nvme: new(ctlpb.ScanNvmeReq), - }, - smbc: &scm.MockBackendConfig{ - GetModulesRes: storage.ScmModules{storage.MockScmModule(0)}, - GetNamespacesRes: storage.ScmNamespaces{storage.MockScmNamespace(0)}, - }, - smsc: &system.MockSysConfig{ - GetfsUsageResps: []system.GetfsUsageRetval{ - { - Total: mockPbScmMount0.TotalBytes, - Avail: mockPbScmMount0.AvailBytes, - }, + "successful scan; multiple bdev tiers in config": { + bdevScanRes: &ctlpb.ScanNvmeResp{ + Ctrlrs: proto.NvmeControllers{ + ctrlrPB, + ctrlrPB2, }, + State: new(ctlpb.ResponseState), }, - storageCfgs: []storage.TierConfigs{ - { - storage.NewTierConfig(). - WithStorageClass(storage.ClassDcpm.String()). - WithScmMountPoint(mockPbScmMount0.Path). - WithScmDeviceList("/dev/foo", "/dev/bar"), - }, + smbc: &scm.MockBackendConfig{ + GetModulesRes: storage.ScmModules{storage.MockScmModule()}, }, - engineTargetCount: []int{4}, - drpcResps: map[int][]*mockDrpcResponse{ - 0: {}, + tierCfgs: storage.TierConfigs{ + storage.NewTierConfig(). + WithStorageClass(storage.ClassNvme.String()). + WithBdevDeviceList(test.MockPCIAddr(1)), + storage.NewTierConfig(). + WithStorageClass(storage.ClassNvme.String()). + WithBdevDeviceList(test.MockPCIAddr(2)), }, expResp: &ctlpb.StorageScanResp{ Nvme: &ctlpb.ScanNvmeResp{ + Ctrlrs: proto.NvmeControllers{ + ctrlrPB, + ctrlrPB2, + }, State: new(ctlpb.ResponseState), }, Scm: &ctlpb.ScanScmResp{ - State: &ctlpb.ResponseState{ - Status: ctlpb.ResponseStatus_CTL_ERR_SCM, - Error: "instance 0: no pmem namespace for mount /mnt/daos0", - }, + Modules: proto.ScmModules{proto.MockScmModule()}, + State: new(ctlpb.ResponseState), }, MemInfo: proto.MockPBMemInfo(), }, }, - "scan scm usage; class ram": { - req: &ctlpb.StorageScanReq{ - Scm: &ctlpb.ScanScmReq{Usage: true}, - Nvme: new(ctlpb.ScanNvmeReq), + "spdk scan failure": { + bdevScanRes: &ctlpb.ScanNvmeResp{ + State: &ctlpb.ResponseState{ + Status: ctlpb.ResponseStatus_CTL_ERR_NVME, + Error: "spdk scan failed", + }, }, smbc: &scm.MockBackendConfig{ - GetModulesRes: storage.ScmModules{storage.MockScmModule(0)}, - GetNamespacesRes: storage.ScmNamespaces{storage.MockScmNamespace(0)}, + GetModulesRes: storage.ScmModules{storage.MockScmModule()}, + GetNamespacesRes: storage.ScmNamespaces{storage.MockScmNamespace()}, }, - smsc: &system.MockSysConfig{ - GetfsUsageResps: []system.GetfsUsageRetval{ - { - Total: mockPbScmMount0.TotalBytes, - Avail: mockPbScmMount0.AvailBytes, + expResp: &ctlpb.StorageScanResp{ + Nvme: &ctlpb.ScanNvmeResp{ + State: &ctlpb.ResponseState{ + Error: "spdk scan failed", + Status: ctlpb.ResponseStatus_CTL_ERR_NVME, }, }, + Scm: &ctlpb.ScanScmResp{ + Namespaces: proto.ScmNamespaces{proto.MockScmNamespace()}, + State: new(ctlpb.ResponseState), + }, + MemInfo: proto.MockPBMemInfo(), }, - storageCfgs: []storage.TierConfigs{ - { - storage.NewTierConfig(). - WithStorageClass(storage.ClassRam.String()). - WithScmMountPoint(mockPbScmMount0.Path). - WithScmRamdiskSize(16), + }, + "scm module discovery failure": { + bdevScanRes: &ctlpb.ScanNvmeResp{ + Ctrlrs: proto.NvmeControllers{ + ctrlrPB, }, + State: new(ctlpb.ResponseState), }, - engineTargetCount: []int{4}, - drpcResps: map[int][]*mockDrpcResponse{ - 0: {}, + smbc: &scm.MockBackendConfig{ + GetModulesErr: errors.New("scm discover failed"), }, expResp: &ctlpb.StorageScanResp{ Nvme: &ctlpb.ScanNvmeResp{ - State: new(ctlpb.ResponseState), + Ctrlrs: proto.NvmeControllers{ctrlrPB}, + State: new(ctlpb.ResponseState), }, Scm: &ctlpb.ScanScmResp{ - Namespaces: proto.ScmNamespaces{ - &ctlpb.ScmNamespace{ - Blockdev: "ramdisk", - Size: uint64(humanize.GiByte * 16), - Mount: &ctlpb.ScmNamespace_Mount{ - Class: "ram", - Path: mockPbScmMount0.Path, - TotalBytes: mockPbScmMount0.TotalBytes, - AvailBytes: mockPbScmMount0.AvailBytes, - UsableBytes: adjustScmSize(mockPbScmMount0.AvailBytes, true), - Rank: mockPbScmMount0.Rank, - }, - }, + State: &ctlpb.ResponseState{ + Error: "scm discover failed", + Status: ctlpb.ResponseStatus_CTL_ERR_SCM, }, - State: new(ctlpb.ResponseState), }, MemInfo: proto.MockPBMemInfo(), }, }, - "multi-engine; multi-tier; with usage": { - req: &ctlpb.StorageScanReq{ - Scm: &ctlpb.ScanScmReq{Usage: true}, - Nvme: &ctlpb.ScanNvmeReq{Meta: true}, - }, - csCtrlrs: &storage.NvmeControllers{newCtrlr(1), newCtrlr(2)}, - eCtrlrs: []*storage.NvmeControllers{{newCtrlr(1)}, {newCtrlr(2)}}, - smbc: &scm.MockBackendConfig{ - GetModulesRes: storage.ScmModules{ - storage.MockScmModule(0), - }, - GetNamespacesRes: storage.ScmNamespaces{ - storage.MockScmNamespace(0), - storage.MockScmNamespace(1), - }, - }, - smsc: &system.MockSysConfig{ - GetfsUsageResps: []system.GetfsUsageRetval{ - { - Total: mockPbScmMount0.TotalBytes, - Avail: mockPbScmMount0.AvailBytes, - }, - { - Total: mockPbScmMount1.TotalBytes, - Avail: mockPbScmMount1.AvailBytes, - }, - }, - }, - storageCfgs: []storage.TierConfigs{ - { - storage.NewTierConfig(). - WithStorageClass(storage.ClassDcpm.String()). - WithScmMountPoint(mockPbScmMount0.Path). - WithScmDeviceList(mockPbScmNamespace0.Blockdev), - storage.NewTierConfig(). - WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(newCtrlr(1).PciAddr), - }, - { - storage.NewTierConfig(). - WithStorageClass(storage.ClassDcpm.String()). - WithScmMountPoint(mockPbScmMount1.Path). - WithScmDeviceList(mockPbScmNamespace1.Blockdev), - storage.NewTierConfig(). - WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(newCtrlr(2).PciAddr), + "all discover fail": { + bdevScanRes: &ctlpb.ScanNvmeResp{ + State: &ctlpb.ResponseState{ + Status: ctlpb.ResponseStatus_CTL_ERR_NVME, + Error: "spdk scan failed", }, }, - engineTargetCount: []int{4, 4}, - drpcResps: map[int][]*mockDrpcResponse{ - 0: { - {Message: newSmdDevResp(1)}, - {Message: newBioHealthResp(1)}, - }, - 1: { - {Message: newSmdDevResp(2)}, - {Message: newBioHealthResp(2)}, - }, + smbc: &scm.MockBackendConfig{ + GetModulesErr: errors.New("scm discover failed"), }, expResp: &ctlpb.StorageScanResp{ Nvme: &ctlpb.ScanNvmeResp{ - Ctrlrs: proto.NvmeControllers{ - adjustNvmeSize(newCtrlrPBwMeta(1), mockPbScmMount0.AvailBytes, 4), - adjustNvmeSize(newCtrlrPBwMeta(2), mockPbScmMount1.AvailBytes, 4), + State: &ctlpb.ResponseState{ + Error: "spdk scan failed", + Status: ctlpb.ResponseStatus_CTL_ERR_NVME, }, - State: new(ctlpb.ResponseState), }, Scm: &ctlpb.ScanScmResp{ - Namespaces: proto.ScmNamespaces{ - &ctlpb.ScmNamespace{ - Blockdev: mockPbScmNamespace0.Blockdev, - Dev: mockPbScmNamespace0.Dev, - Size: mockPbScmNamespace0.Size, - Uuid: mockPbScmNamespace0.Uuid, - Mount: &ctlpb.ScmNamespace_Mount{ - Class: mockPbScmMount0.Class, - DeviceList: mockPbScmMount0.DeviceList, - Path: mockPbScmMount0.Path, - TotalBytes: mockPbScmMount0.TotalBytes, - AvailBytes: mockPbScmMount0.AvailBytes, - UsableBytes: adjustScmSize(mockPbScmMount0.AvailBytes, false), - Rank: mockPbScmMount0.Rank, - }, - }, - &ctlpb.ScmNamespace{ - Blockdev: mockPbScmNamespace1.Blockdev, - Dev: mockPbScmNamespace1.Dev, - Size: mockPbScmNamespace1.Size, - Uuid: mockPbScmNamespace1.Uuid, - NumaNode: mockPbScmNamespace1.NumaNode, - Mount: &ctlpb.ScmNamespace_Mount{ - Class: mockPbScmMount1.Class, - DeviceList: mockPbScmMount1.DeviceList, - Path: mockPbScmMount1.Path, - TotalBytes: mockPbScmMount1.TotalBytes, - AvailBytes: mockPbScmMount1.AvailBytes, - UsableBytes: adjustScmSize(mockPbScmMount1.AvailBytes, false), - Rank: mockPbScmMount1.Rank, - }, - }, + State: &ctlpb.ResponseState{ + Error: "scm discover failed", + Status: ctlpb.ResponseStatus_CTL_ERR_SCM, }, - State: new(ctlpb.ResponseState), }, MemInfo: proto.MockPBMemInfo(), }, }, - "multi-engine; multi-tier; with usage; engines not ready": { - req: &ctlpb.StorageScanReq{ - Scm: &ctlpb.ScanScmReq{Usage: true}, - Nvme: &ctlpb.ScanNvmeReq{Meta: true}, - }, - storageCfgs: []storage.TierConfigs{ - { - storage.NewTierConfig(). - WithStorageClass(storage.ClassDcpm.String()). - WithScmMountPoint(mockPbScmMount0.Path). - WithScmDeviceList(mockPbScmNamespace0.Blockdev), - storage.NewTierConfig(). - WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(newCtrlr(1).PciAddr), - }, - { - storage.NewTierConfig(). - WithStorageClass(storage.ClassDcpm.String()). - WithScmMountPoint(mockPbScmMount1.Path). - WithScmDeviceList(mockPbScmNamespace1.Blockdev), - storage.NewTierConfig(). - WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(newCtrlr(2).PciAddr), - }, - }, - engineTargetCount: []int{4, 4}, - enginesNotReady: true, - drpcResps: map[int][]*mockDrpcResponse{ - 0: { - {Message: newSmdDevResp(1)}, - {Message: newBioHealthResp(1)}, - }, - 1: { - {Message: newSmdDevResp(2)}, - {Message: newBioHealthResp(2)}, - }, - }, - expErr: errEngineNotReady, - }, - // Sometimes when more than a few ssds are assigned to engine without many targets, - // some of the smd entries for the latter ssds are in state "NEW" rather than - // "NORMAL", when in this state, health is unavailable and DER_NONEXIST is returned. - "bdev scan; meta; new state; non-existent smd health": { - req: &ctlpb.StorageScanReq{ - Scm: new(ctlpb.ScanScmReq), - Nvme: &ctlpb.ScanNvmeReq{Meta: true}, - }, - csCtrlrs: &storage.NvmeControllers{newCtrlr(1)}, - storageCfgs: []storage.TierConfigs{ - { - storage.NewTierConfig(). - WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(newCtrlr(1).PciAddr), + "scan bdev; vmd enabled": { + bdevScanRes: &ctlpb.ScanNvmeResp{ + Ctrlrs: proto.NvmeControllers{ + &ctlpb.NvmeController{PciAddr: "050505:01:00.0"}, }, + State: new(ctlpb.ResponseState), }, - engineTargetCount: []int{4}, - drpcResps: map[int][]*mockDrpcResponse{ - 0: { - {Message: smdDevRespStateNew}, - { - Message: &ctlpb.BioHealthResp{ - Status: int32(daos.Nonexistent), - }, - }, - }, + tierCfgs: storage.TierConfigs{ + storage.NewTierConfig(). + WithStorageClass(storage.ClassNvme.String()). + WithBdevDeviceList("0000:05:05.5"), }, expResp: &ctlpb.StorageScanResp{ Nvme: &ctlpb.ScanNvmeResp{ - Ctrlrs: proto.NvmeControllers{ctrlrPBwMetaNew}, - State: new(ctlpb.ResponseState), - }, - Scm: &ctlpb.ScanScmResp{State: new(ctlpb.ResponseState)}, - MemInfo: proto.MockPBMemInfo(), - }, - }, - "bdev scan; meta; new state; nomem smd health": { - req: &ctlpb.StorageScanReq{ - Scm: new(ctlpb.ScanScmReq), - Nvme: &ctlpb.ScanNvmeReq{Meta: true}, - }, - csCtrlrs: &storage.NvmeControllers{newCtrlr(1)}, - storageCfgs: []storage.TierConfigs{ - { - storage.NewTierConfig(). - WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(newCtrlr(1).PciAddr), - }, - }, - engineTargetCount: []int{4}, - drpcResps: map[int][]*mockDrpcResponse{ - 0: { - {Message: smdDevRespStateNew}, - { - Message: &ctlpb.BioHealthResp{ - Status: int32(daos.FreeMemError), - }, + Ctrlrs: proto.NvmeControllers{ + &ctlpb.NvmeController{PciAddr: "050505:01:00.0"}, }, + State: new(ctlpb.ResponseState), }, - }, - expResp: &ctlpb.StorageScanResp{ - Nvme: &ctlpb.ScanNvmeResp{ - Ctrlrs: proto.NvmeControllers{ctrlrPBwMetaNew}, - State: new(ctlpb.ResponseState), + Scm: &ctlpb.ScanScmResp{ + State: new(ctlpb.ResponseState), }, - Scm: &ctlpb.ScanScmResp{State: new(ctlpb.ResponseState)}, MemInfo: proto.MockPBMemInfo(), }, }, - "bdev scan; meta; normal state; non-existent smd health": { + "scan usage": { req: &ctlpb.StorageScanReq{ - Scm: new(ctlpb.ScanScmReq), - Nvme: &ctlpb.ScanNvmeReq{Meta: true}, - }, - csCtrlrs: &storage.NvmeControllers{newCtrlr(1)}, - storageCfgs: []storage.TierConfigs{ - { - storage.NewTierConfig(). - WithStorageClass(storage.ClassNvme.String()). - WithBdevDeviceList(newCtrlr(1).PciAddr), - }, - }, - engineTargetCount: []int{4}, - drpcResps: map[int][]*mockDrpcResponse{ - 0: { - {Message: newSmdDevResp(1)}, - { - Message: &ctlpb.BioHealthResp{ - Status: int32(daos.Nonexistent), - }, - }, + Scm: &ctlpb.ScanScmReq{ + Usage: true, }, - }, - expResp: &ctlpb.StorageScanResp{ - Nvme: &ctlpb.ScanNvmeResp{ - Ctrlrs: proto.NvmeControllers{ctrlrPBwMetaNormal}, - State: new(ctlpb.ResponseState), + Nvme: &ctlpb.ScanNvmeReq{ + Meta: true, }, - Scm: &ctlpb.ScanScmResp{State: new(ctlpb.ResponseState)}, - MemInfo: proto.MockPBMemInfo(), }, + enginesNotReady: true, + expErr: errEngineNotReady, }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) - if len(tc.storageCfgs) != len(tc.drpcResps) { - t.Fatalf("number of tc.storageCfgs doesn't match num drpc msg groups") - } - - if len(tc.storageCfgs) == 1 && tc.eCtrlrs == nil && tc.csCtrlrs != nil { - log.Debugf("using control service storage provider for first engine") - tc.eCtrlrs = []*storage.NvmeControllers{tc.csCtrlrs} - } - - var csbmbc *bdev.MockBackendConfig - if tc.csCtrlrs != nil { - log.Debugf("bdevs %v to be returned for control service scan", *tc.csCtrlrs) - csbmbc = &bdev.MockBackendConfig{ - ScanRes: &storage.BdevScanResponse{Controllers: *tc.csCtrlrs}, - } - } - - var engineCfgs []*engine.Config - for i, sc := range tc.storageCfgs { - log.Debugf("storage cfg contains bdevs %v for engine %d", sc.Bdevs(), i) - engineCfgs = append(engineCfgs, - engine.MockConfig(). - WithStorage(sc...). - WithTargetCount(tc.engineTargetCount[i])) - } + engineCfg := engine.MockConfig().WithStorage(tc.tierCfgs...) + engineCfgs := []*engine.Config{engineCfg} sCfg := config.DefaultServer().WithEngines(engineCfgs...) - cs := mockControlService(t, log, sCfg, csbmbc, tc.smbc, tc.smsc) - - // In production, during server/server.go:srv.addEngines() and after - // srv.createEngine(), engine.storage.SetBdevCache() is called to load the - // results of the start-up bdev scan from the control service storage - // provider into the engine's storage provider. The control service and - // each of the engines have distinct storage provider instances so cached - // cached results have to be explicitly shared so results are available when - // engines are up. - - for idx, ec := range engineCfgs { - var ebmbc *bdev.MockBackendConfig - if tc.eCtrlrs != nil && len(tc.eCtrlrs) > idx { - log.Debugf("bdevs %v to be returned for engine %d scan", - *tc.eCtrlrs[idx], idx) - ebmbc = &bdev.MockBackendConfig{ - ScanRes: &storage.BdevScanResponse{ - Controllers: *tc.eCtrlrs[idx], - }, - } - } - - // replace harness instance with mock I/O Engine - // to enable mocking of harness instance drpc channel - sp := storage.MockProvider(log, idx, &ec.Storage, - cs.storage.Sys, // share system provider cfo - scm.NewMockProvider(log, tc.smbc, nil), - bdev.NewMockProvider(log, ebmbc), nil) - if tc.eCtrlrs != nil && len(tc.eCtrlrs) > idx { - sp.SetBdevCache(storage.BdevScanResponse{ - Controllers: *tc.eCtrlrs[idx], - }) - } - te := newTestEngine(log, false, sp, ec) - if tc.enginesNotReady { - te.ready.SetFalse() - } - - // mock drpc responses - dcc := new(mockDrpcClientConfig) - if tc.junkResp { - dcc.setSendMsgResponse(drpc.Status_SUCCESS, - makeBadBytes(42), nil) - } else if len(tc.drpcResps) > idx { - t.Logf("setting %d drpc responses for engine %d", - len(tc.drpcResps[idx]), idx) - dcc.setSendMsgResponseList(t, tc.drpcResps[idx]...) - } else { - t.Fatal("drpc response mocks unpopulated") - } - te.setDrpcClient(newMockDrpcClient(dcc)) - te._superblock.Rank = ranklist.NewRankPtr(uint32(idx + 1)) - for _, tc := range te.storage.GetBdevConfigs() { - tc.Bdev.DeviceRoles.OptionBits = storage.OptionBits(storage.BdevRoleAll) - } - md := te.storage.GetControlMetadata() - md.Path = "/foo" - md.DevicePath = md.Path + var cs *ControlService + if tc.enginesNotReady { + cs = mockControlService(t, log, sCfg, nil, tc.smbc, nil, true) + } else { + cs = mockControlService(t, log, sCfg, nil, tc.smbc, nil) + } - cs.harness.instances[idx] = te + scanBdevs = func(_ context.Context, c *ControlService, _ *ctlpb.ScanNvmeReq, _ []*ctlpb.ScmNamespace) (*ctlpb.ScanNvmeResp, error) { + return tc.bdevScanRes, tc.bdevScanErr } - cs.harness.started.SetTrue() + defer func() { + scanBdevs = bdevScan + }() if tc.req == nil { tc.req = &ctlpb.StorageScanReq{ @@ -1646,14 +726,6 @@ func TestServer_CtlSvc_StorageScan_PostEngineStart(t *testing.T) { } } - if tc.scanTwice { - _, err := cs.StorageScan(test.Context(t), tc.req) - test.CmpErr(t, tc.expErr, err) - if err != nil { - return - } - } - resp, err := cs.StorageScan(test.Context(t), tc.req) test.CmpErr(t, tc.expErr, err) if err != nil { @@ -2326,10 +1398,10 @@ func TestServer_CtlSvc_StorageFormat(t *testing.T) { // Mimic control service start-up and engine creation where cache is shared // to the engines from the base control service storage provider. - nvmeScanResp, err := cs.NvmeScan(storage.BdevScanRequest{}) - if err != nil { - t.Fatal(err) - } + // nvmeScanResp, err := cs.NvmeScan(storage.BdevScanRequest{}) + // if err != nil { + // t.Fatal(err) + // } for i, ec := range config.Engines { root := filepath.Dir(tc.sMounts[i]) @@ -2349,7 +1421,7 @@ func TestServer_CtlSvc_StorageFormat(t *testing.T) { ei := NewEngineInstance(log, storProv, nil, runner) ei.ready.Store(tc.instancesStarted) - ei.storage.SetBdevCache(*nvmeScanResp) + //ei.storage.SetBdevCache(*nvmeScanResp) // if the instance is expected to have a valid superblock, create one if tc.superblockExists { diff --git a/src/control/server/ctl_svc_test.go b/src/control/server/ctl_svc_test.go index 11995e06671f..cb87c590788e 100644 --- a/src/control/server/ctl_svc_test.go +++ b/src/control/server/ctl_svc_test.go @@ -22,27 +22,20 @@ import ( "github.com/daos-stack/daos/src/control/server/storage/scm" ) -// mockControlService takes cfgs for tuneable scm and sys provider behavior but -// default nvmeStorage behavior (cs.nvoe can be subsequently replaced in test). -func mockControlService(t *testing.T, log logging.Logger, cfg *config.Server, bmbc *bdev.MockBackendConfig, smbc *scm.MockBackendConfig, smsc *system.MockSysConfig, notStarted ...bool) *ControlService { +func newMockControlServiceFromBackends(t *testing.T, log logging.Logger, cfg *config.Server, bmb *bdev.MockBackend, smb *scm.MockBackend, smsc *system.MockSysConfig, notStarted ...bool) *ControlService { t.Helper() - started := true - if len(notStarted) > 0 && notStarted[0] { - started = false - } - if cfg == nil { cfg = config.DefaultServer().WithEngines(engine.MockConfig().WithTargetCount(1)) } - // share sys provider between engines to be able to access to same mock config data - sysProv := system.NewMockSysProvider(log, smsc) - mounter := mount.NewProvider(log, sysProv) - scmProv := scm.NewProvider(log, scm.NewMockBackend(smbc), sysProv, mounter) - bdevProv := bdev.NewMockProvider(log, bmbc) + // Share sys provider between engines to be able to access to same mock config data. + bp := bdev.NewProvider(log, bmb) + syp := system.NewMockSysProvider(log, smsc) + mp := mount.NewProvider(log, syp) + sp := scm.NewProvider(log, smb, syp, mp) - mscs := NewMockStorageControlService(log, cfg.Engines, sysProv, scmProv, bdevProv, nil) + mscs := NewMockStorageControlService(log, cfg.Engines, syp, sp, bp, nil) cs := &ControlService{ StorageControlService: *mscs, @@ -51,20 +44,40 @@ func mockControlService(t *testing.T, log logging.Logger, cfg *config.Server, bm srvCfg: cfg, } + started := make([]bool, len(cfg.Engines)) + for idx := range started { + started[idx] = true + } + switch len(notStarted) { + case 0: // Not specified so start all engines. + case 1: + if notStarted[0] { + // If single true notStarted bool, don't start any engines. + for idx := range started { + started[idx] = false + } + } + case len(cfg.Engines): // One notStarted bool specified for each engine. + for idx := range started { + started[idx] = !notStarted[idx] + } + default: + t.Fatal("len notStarted != len cfg.Engines") + } + for idx, ec := range cfg.Engines { trc := new(engine.TestRunnerConfig) - if started { + if started[idx] { trc.Running.SetTrue() } runner := engine.NewTestRunner(trc, ec) - storProv := storage.MockProvider(log, 0, &ec.Storage, sysProv, scmProv, bdevProv, - nil) + storProv := storage.MockProvider(log, 0, &ec.Storage, syp, sp, bp, nil) ei := NewEngineInstance(log, storProv, nil, runner) ei.setSuperblock(&Superblock{ Rank: ranklist.NewRankPtr(uint32(idx)), }) - if started { + if started[idx] { ei.ready.SetTrue() } if err := cs.harness.AddInstance(ei); err != nil { @@ -74,3 +87,14 @@ func mockControlService(t *testing.T, log logging.Logger, cfg *config.Server, bm return cs } + +// mockControlService takes cfgs for tuneable scm and sys provider behavior but +// default nvmeStorage behavior. +func mockControlService(t *testing.T, log logging.Logger, cfg *config.Server, bmbc *bdev.MockBackendConfig, smbc *scm.MockBackendConfig, smsc *system.MockSysConfig, notStarted ...bool) *ControlService { + t.Helper() + + bmb := bdev.NewMockBackend(bmbc) + smb := scm.NewMockBackend(smbc) + + return newMockControlServiceFromBackends(t, log, cfg, bmb, smb, smsc, notStarted...) +} diff --git a/src/control/server/harness.go b/src/control/server/harness.go index f27febc1dceb..fc0fb97f2323 100644 --- a/src/control/server/harness.go +++ b/src/control/server/harness.go @@ -37,7 +37,6 @@ type Engine interface { newCret(string, error) *ctlpb.NvmeControllerResult tryDrpc(context.Context, drpc.Method) *system.MemberResult requestStart(context.Context) - updateInUseBdevs(context.Context, []storage.NvmeController, uint64, uint64) ([]storage.NvmeController, error) isAwaitingFormat() bool // These methods should probably be replaced by callbacks. @@ -48,7 +47,6 @@ type Engine interface { // These methods should probably be refactored out into functions that // accept the engine instance as a parameter. GetBioHealth(context.Context, *ctlpb.BioHealthReq) (*ctlpb.BioHealthResp, error) - ScanBdevTiers() ([]storage.BdevTierScanResult, error) ListSmdDevices(context.Context, *ctlpb.SmdDevReq) (*ctlpb.SmdDevResp, error) StorageFormatSCM(context.Context, bool) *ctlpb.ScmMountResult StorageFormatNVMe() commonpb.NvmeControllerResults diff --git a/src/control/server/instance_drpc.go b/src/control/server/instance_drpc.go index 542b636e2f07..c915577ee7fc 100644 --- a/src/control/server/instance_drpc.go +++ b/src/control/server/instance_drpc.go @@ -11,19 +11,16 @@ import ( "fmt" "time" - "github.com/dustin/go-humanize" "github.com/pkg/errors" "google.golang.org/protobuf/proto" "github.com/daos-stack/daos/src/control/build" - "github.com/daos-stack/daos/src/control/common/proto/convert" ctlpb "github.com/daos-stack/daos/src/control/common/proto/ctl" mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt" srvpb "github.com/daos-stack/daos/src/control/common/proto/srv" "github.com/daos-stack/daos/src/control/drpc" "github.com/daos-stack/daos/src/control/lib/daos" "github.com/daos-stack/daos/src/control/lib/ranklist" - "github.com/daos-stack/daos/src/control/server/storage" "github.com/daos-stack/daos/src/control/system" ) @@ -211,105 +208,20 @@ func (ei *EngineInstance) ListSmdDevices(ctx context.Context, req *ctlpb.SmdDevR return resp, nil } -func (ei *EngineInstance) getSmdDetails(smd *ctlpb.SmdDevice) (*storage.SmdDevice, error) { - smdDev := new(storage.SmdDevice) - if err := convert.Types(smd, smdDev); err != nil { - return nil, errors.Wrap(err, "convert smd") - } - - engineRank, err := ei.GetRank() +func listNvmeDevices(ctx context.Context, ei *EngineInstance, req *ctlpb.ScanNvmeReq) (*ctlpb.ScanNvmeResp, error) { + dresp, err := ei.CallDrpc(ctx, drpc.MethodNvmeDevs, req) if err != nil { - return nil, errors.Wrapf(err, "get rank") - } - - smdDev.Rank = engineRank - smdDev.TrAddr = smd.GetTrAddr() - - return smdDev, nil -} - -// updateInUseBdevs updates-in-place the input list of controllers with new NVMe health stats and -// SMD metadata info. -// -// Query each SmdDevice on each I/O Engine instance for health stats and update existing controller -// data in ctrlrMap using PCI address key. -func (ei *EngineInstance) updateInUseBdevs(ctx context.Context, ctrlrs []storage.NvmeController, ms uint64, rs uint64) ([]storage.NvmeController, error) { - ctrlrMap := make(map[string]*storage.NvmeController) - for idx, ctrlr := range ctrlrs { - if _, exists := ctrlrMap[ctrlr.PciAddr]; exists { - return nil, errors.Errorf("duplicate entries for controller %s", - ctrlr.PciAddr) - } - - // Clear SMD info for controllers to remove stale stats. - ctrlrs[idx].SmdDevices = []*storage.SmdDevice{} - // Update controllers in input slice through map by reference. - ctrlrMap[ctrlr.PciAddr] = &ctrlrs[idx] + return nil, err } - smdDevs, err := ei.ListSmdDevices(ctx, new(ctlpb.SmdDevReq)) - if err != nil { - return nil, errors.Wrapf(err, "list smd devices") + resp := new(ctlpb.ScanNvmeResp) + if err = proto.Unmarshal(dresp.Body, resp); err != nil { + return nil, errors.Wrap(err, "unmarshal NVMeListDevs response") } - ei.log.Debugf("engine %d: smdDevs %+v", ei.Index(), smdDevs) - - hasUpdatedHealth := make(map[string]bool) - for _, smd := range smdDevs.Devices { - msg := fmt.Sprintf("instance %d: smd %s: ctrlr %s", ei.Index(), smd.Uuid, - smd.TrAddr) - - ctrlr, exists := ctrlrMap[smd.GetTrAddr()] - if !exists { - ei.log.Errorf("%s: ctrlr not found", msg) - continue - } - smdDev, err := ei.getSmdDetails(smd) - if err != nil { - return nil, errors.Wrapf(err, "%s: collect smd info", msg) - } - smdDev.MetaSize = ms - smdDev.RdbSize = rs - - pbStats, err := ei.GetBioHealth(ctx, &ctlpb.BioHealthReq{DevUuid: smdDev.UUID, MetaSize: ms, RdbSize: rs}) - if err != nil { - // Log the error if it indicates non-existent health and the SMD entity has - // an abnormal state. Otherwise it is expected that health may be missing. - status, ok := errors.Cause(err).(daos.Status) - if ok && status == daos.Nonexistent && smdDev.NvmeState != storage.NvmeStateNormal { - ei.log.Debugf("%s: stats not found (device state: %q), skip update", - msg, smdDev.NvmeState.String()) - } else { - ei.log.Errorf("%s: fetch stats: %s", msg, err.Error()) - } - ctrlr.UpdateSmd(smdDev) - continue - } + // if resp.Status != 0 { + // return nil, errors.Wrap(daos.Status(resp.Status), "ListNVMeDevices failed") + // } - // Populate space usage for each SMD device from health stats. - smdDev.TotalBytes = pbStats.TotalBytes - smdDev.AvailBytes = pbStats.AvailBytes - smdDev.ClusterSize = pbStats.ClusterSize - smdDev.MetaWalSize = pbStats.MetaWalSize - smdDev.RdbWalSize = pbStats.RdbWalSize - msg = fmt.Sprintf("%s: smd usage = %s/%s", msg, humanize.Bytes(smdDev.AvailBytes), - humanize.Bytes(smdDev.TotalBytes)) - ctrlr.UpdateSmd(smdDev) - - // Multiple SMD entries for the same address key may exist when there are multiple - // NVMe namespaces (and resident blobstores) exist on a single controller. In this - // case only update once as health stats will be the same for each. - if hasUpdatedHealth[ctrlr.PciAddr] { - continue - } - ctrlr.HealthStats = new(storage.NvmeHealth) - if err := convert.Types(pbStats, ctrlr.HealthStats); err != nil { - ei.log.Errorf("%s: update ctrlr health: %s", msg, err.Error()) - continue - } - ei.log.Debugf("%s: ctrlr health updated", msg) - hasUpdatedHealth[ctrlr.PciAddr] = true - } - - return ctrlrs, nil + return resp, nil } diff --git a/src/control/server/instance_storage.go b/src/control/server/instance_storage.go index 2cc4f1f54432..c52e45e4726f 100644 --- a/src/control/server/instance_storage.go +++ b/src/control/server/instance_storage.go @@ -14,11 +14,14 @@ import ( "github.com/pkg/errors" "github.com/daos-stack/daos/src/control/build" + ctlpb "github.com/daos-stack/daos/src/control/common/proto/ctl" "github.com/daos-stack/daos/src/control/events" "github.com/daos-stack/daos/src/control/lib/ranklist" "github.com/daos-stack/daos/src/control/server/storage" ) +var scanEngineBdevsOverDrpc = listNvmeDevices + // GetStorage retrieve the storage provider for an engine instance. func (ei *EngineInstance) GetStorage() *storage.Provider { return ei.storage @@ -169,15 +172,38 @@ func (ei *EngineInstance) logScmStorage() error { return nil } -// ScanBdevTiers calls in to the private engine storage provider to scan bdev -// tiers. Scan will avoid using any cached results if direct is set to true. -func (ei *EngineInstance) ScanBdevTiers() ([]storage.BdevTierScanResult, error) { - isUp := ei.IsReady() +// bdevScanEngine calls either in to the private engine storage provider to scan bdevs if engine process +// is not started, otherwise dRPC is used to retrieve details from the online engine. +func bdevScanEngine(ctx context.Context, engine Engine, pbReq *ctlpb.ScanNvmeReq) (*ctlpb.ScanNvmeResp, error) { + ei, ok := engine.(*EngineInstance) + if !ok { + return nil, errors.New("not EngineInstance") + } + + if pbReq == nil { + return nil, errors.New("nil request") + } + + isUp := ei.IsStarted() upDn := "down" if isUp { upDn = "up" } ei.log.Debugf("scanning engine-%d bdev tiers while engine is %s", ei.Index(), upDn) - return ei.storage.ScanBdevTiers(!isUp) + if isUp { + return scanEngineBdevsOverDrpc(ctx, ei, pbReq) + } + + // TODO: should anything be passed from pbReq here e.g. Meta/Health specifiers? + + // Retrieve engine cfg bdevs to restrict scan scope. + req := storage.BdevScanRequest{ + DeviceList: ei.runner.GetConfig().Storage.GetBdevs(), + } + if req.DeviceList.Len() == 0 { + return nil, errors.Errorf("empty device list for engine instance %d", ei.Index()) + } + + return bdevScanToProtoResp(ei.storage.ScanBdevs, req) } diff --git a/src/control/server/instance_storage_test.go b/src/control/server/instance_storage_test.go index 2bbc049bd652..552f42023ea7 100644 --- a/src/control/server/instance_storage_test.go +++ b/src/control/server/instance_storage_test.go @@ -1,5 +1,5 @@ // -// (C) Copyright 2020-2022 Intel Corporation. +// (C) Copyright 2020-2023 Intel Corporation. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -17,13 +17,17 @@ import ( "github.com/google/go-cmp/cmp" "github.com/pkg/errors" + "github.com/daos-stack/daos/src/control/common/proto" + ctlpb "github.com/daos-stack/daos/src/control/common/proto/ctl" "github.com/daos-stack/daos/src/control/common/test" "github.com/daos-stack/daos/src/control/events" "github.com/daos-stack/daos/src/control/lib/ranklist" "github.com/daos-stack/daos/src/control/logging" "github.com/daos-stack/daos/src/control/provider/system" + "github.com/daos-stack/daos/src/control/server/config" "github.com/daos-stack/daos/src/control/server/engine" "github.com/daos-stack/daos/src/control/server/storage" + "github.com/daos-stack/daos/src/control/server/storage/bdev" "github.com/daos-stack/daos/src/control/server/storage/scm" ) @@ -443,3 +447,134 @@ func TestIOEngineInstance_awaitStorageReady(t *testing.T) { }) } } + +func TestIOEngineInstance_bdevScanEngine(t *testing.T) { + for name, tc := range map[string]struct { + req ctlpb.ScanNvmeReq + bdevAddrs []string + provRes *storage.BdevScanResponse + provErr error + engStopped bool + engRes *ctlpb.ScanNvmeResp + engErr error + expResp *ctlpb.ScanNvmeResp + expErr error + expBackendScanCall *storage.BdevScanRequest + }{ + "scan over drpc": { + expResp: &ctlpb.ScanNvmeResp{ + Ctrlrs: proto.NvmeControllers{ + proto.MockNvmeController(2), + }, + State: new(ctlpb.ResponseState), + }, + }, + "scan fails over drpc": { + engErr: errors.New("drpc fail"), + expErr: errors.New("drpc fail"), + }, + "scan over engine provider; no bdevs in config": { + engStopped: true, + expErr: errors.New("empty device list"), + }, + "scan over engine provider; bdevs in config": { + bdevAddrs: []string{test.MockPCIAddr(1), test.MockPCIAddr(2)}, + engStopped: true, + expResp: &ctlpb.ScanNvmeResp{ + Ctrlrs: proto.NvmeControllers{ + proto.MockNvmeController(1), + }, + State: new(ctlpb.ResponseState), + }, + expBackendScanCall: &storage.BdevScanRequest{ + DeviceList: storage.MustNewBdevDeviceList(test.MockPCIAddr(1), + test.MockPCIAddr(2)), + }, + }, + "scan fails over engine provider": { + bdevAddrs: []string{test.MockPCIAddr(1), test.MockPCIAddr(2)}, + engStopped: true, + provErr: errors.New("provider scan fail"), + expErr: errors.New("provider scan fail"), + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + scanEngineBdevsOverDrpc = func(_ context.Context, _ *EngineInstance, _ *ctlpb.ScanNvmeReq) (*ctlpb.ScanNvmeResp, error) { + return tc.engRes, tc.engErr + } + defer func() { + scanEngineBdevsOverDrpc = listNvmeDevices + }() + + if tc.provRes == nil { + tc.provRes = defProviderScanRes + } + if tc.engRes == nil { + tc.engRes = defEngineScanRes + } + + ec := engine.MockConfig() + if tc.bdevAddrs != nil { + ec.WithStorage(storage.NewTierConfig(). + WithStorageClass(storage.ClassNvme.String()). + WithBdevDeviceList(tc.bdevAddrs...)) + } + + sCfg := config.DefaultServer().WithEngines(ec) + + bmbc := &bdev.MockBackendConfig{ + ScanRes: tc.provRes, + ScanErr: tc.provErr, + } + bmb := bdev.NewMockBackend(bmbc) + smb := scm.NewMockBackend(nil) + + cs := newMockControlServiceFromBackends(t, log, sCfg, bmb, smb, nil, + tc.engStopped) + + resp, err := bdevScanEngine(test.Context(t), cs.harness.Instances()[0], + &tc.req) + test.CmpErr(t, tc.expErr, err) + if err != nil { + return + } + + if diff := cmp.Diff(tc.expResp, resp, + defStorageScanCmpOpts...); diff != "" { + t.Fatalf("unexpected response (-want, +got):\n%s\n", diff) + } + + cmpopt := cmp.Comparer(func(x, y *storage.BdevDeviceList) bool { + if x == nil && y == nil { + return true + } + return x.Equals(y) + }) + + bmb.RLock() + switch len(bmb.ScanCalls) { + case 0: + if tc.expBackendScanCall == nil { + return + } + t.Fatalf("unexpected number of backend scan calls, want 1 got 0") + case 1: + if tc.expBackendScanCall != nil { + break + } + t.Fatalf("unexpected number of backend scan calls, want 0 got 1") + default: + t.Fatalf("unexpected number of backend scan calls, want 0-1 got %d", + len(bmb.ScanCalls)) + } + if diff := cmp.Diff(*tc.expBackendScanCall, bmb.ScanCalls[0], + append(defStorageScanCmpOpts, cmpopt)...); diff != "" { + t.Fatalf("unexpected backend scan calls (-want, +got):\n%s\n", diff) + } + bmb.RUnlock() + }) + } +} diff --git a/src/control/server/server.go b/src/control/server/server.go index c8812e3b9871..27e7341f6e7e 100644 --- a/src/control/server/server.go +++ b/src/control/server/server.go @@ -308,29 +308,16 @@ func (srv *server) addEngines(ctx context.Context) error { return err } - // Retrieve NVMe device details (before engines are started) so static details can be - // recovered by the engine storage provider(s) during scan even if devices are in use. - nvmeScanResp, err := scanBdevStorage(srv) - if err != nil { - return err - } - if len(srv.cfg.Engines) == 0 { return nil } - nrEngineBdevsIdx := -1 - nrEngineBdevs := -1 for i, c := range srv.cfg.Engines { engine, err := srv.createEngine(ctx, i, c) if err != nil { return errors.Wrap(err, "creating engine instances") } - if err := setEngineBdevs(engine, nvmeScanResp, &nrEngineBdevsIdx, &nrEngineBdevs); err != nil { - return errors.Wrap(err, "setting engine bdevs") - } - registerEngineEventCallbacks(srv, engine, &allStarted) if err := srv.harness.AddInstance(engine); err != nil { diff --git a/src/control/server/server_utils.go b/src/control/server/server_utils.go index 6e059aef32ec..596bb0c50a99 100644 --- a/src/control/server/server_utils.go +++ b/src/control/server/server_utils.go @@ -355,77 +355,6 @@ func prepBdevStorage(srv *server, iommuEnabled bool) error { return nil } -// scanBdevStorage performs discovery and validates existence of configured NVMe SSDs. -func scanBdevStorage(srv *server) (*storage.BdevScanResponse, error) { - defer srv.logDuration(track("time to scan bdev storage")) - - if srv.cfg.DisableHugepages { - srv.log.Debugf("skip nvme scan as hugepages have been disabled in config") - return &storage.BdevScanResponse{}, nil - } - - nvmeScanResp, err := srv.ctlSvc.NvmeScan(storage.BdevScanRequest{ - DeviceList: getBdevCfgsFromSrvCfg(srv.cfg).Bdevs(), - }) - if err != nil { - err = errors.Wrap(err, "NVMe Scan Failed") - srv.log.Errorf("%s", err) - return nil, err - } - - return nvmeScanResp, nil -} - -func setEngineBdevs(engine *EngineInstance, scanResp *storage.BdevScanResponse, lastEngineIdx, lastBdevCount *int) error { - badInput := "" - switch { - case engine == nil: - badInput = "engine" - case scanResp == nil: - badInput = "scanResp" - case lastEngineIdx == nil: - badInput = "lastEngineIdx" - case lastBdevCount == nil: - badInput = "lastBdevCount" - } - if badInput != "" { - return errors.New("nil input param: " + badInput) - } - - if err := engine.storage.SetBdevCache(*scanResp); err != nil { - return errors.Wrap(err, "setting engine storage bdev cache") - } - - // After engine's bdev cache has been set, the cache will only contain details of bdevs - // identified in the relevant engine config and device addresses will have been verified - // against NVMe scan results. As any VMD endpoint addresses will have been replaced with - // backing device addresses, device counts will reflect the number of physical (as opposed - // to logical) bdevs and engine bdev counts can be accurately compared. - - eIdx := engine.Index() - bdevCache := engine.storage.GetBdevCache() - newNrBdevs := len(bdevCache.Controllers) - - // Update last recorded counters if this is the first update or if the number of bdevs is - // unchanged. If bdev count differs between engines, return fault. - switch { - case *lastEngineIdx < 0: - if *lastBdevCount >= 0 { - return errors.New("expecting both lastEngineIdx and lastBdevCount to be unset") - } - *lastEngineIdx = int(eIdx) - *lastBdevCount = newNrBdevs - case *lastBdevCount < 0: - return errors.New("expecting both lastEngineIdx and lastBdevCount to be set") - case newNrBdevs == *lastBdevCount: - *lastEngineIdx = int(eIdx) - default: - return config.FaultConfigBdevCountMismatch(int(eIdx), newNrBdevs, *lastEngineIdx, *lastBdevCount) - } - - return nil -} - func setDaosHelperEnvs(cfg *config.Server, setenv func(k, v string) error) error { if cfg.HelperLogFile != "" { if err := setenv(pbin.DaosPrivHelperLogFileEnvVar, cfg.HelperLogFile); err != nil { diff --git a/src/control/server/server_utils_test.go b/src/control/server/server_utils_test.go index 1fb0567fadd8..7fd1ac73859f 100644 --- a/src/control/server/server_utils_test.go +++ b/src/control/server/server_utils_test.go @@ -842,183 +842,6 @@ func TestServer_checkEngineTmpfsMem(t *testing.T) { } } -// TestServer_scanBdevStorage validates that an error is returned in the case that a SSD is not -// found and doesn't return an error if SPDK fails to init. -func TestServer_scanBdevStorage(t *testing.T) { - for name, tc := range map[string]struct { - disableHugepages bool - bmbc *bdev.MockBackendConfig - expErr error - }{ - "spdk fails init": { - bmbc: &bdev.MockBackendConfig{ - ScanErr: errors.New("spdk failed"), - }, - expErr: errors.New("spdk failed"), - }, - "bdev in config not found by spdk": { - bmbc: &bdev.MockBackendConfig{ - ScanErr: storage.FaultBdevNotFound(test.MockPCIAddr()), - }, - expErr: storage.FaultBdevNotFound(test.MockPCIAddr()), - }, - "successful scan": { - bmbc: &bdev.MockBackendConfig{ - ScanRes: &storage.BdevScanResponse{ - Controllers: storage.MockNvmeControllers(1), - }, - }, - }, - "hugepages disabled": { - disableHugepages: true, - bmbc: &bdev.MockBackendConfig{ - ScanErr: errors.New("spdk failed"), - }, - }, - } { - t.Run(name, func(t *testing.T) { - log, buf := logging.NewTestLogger(name) - defer test.ShowBufferOnFailure(t, buf) - - cfg := config.DefaultServer().WithFabricProvider("ofi+verbs"). - WithDisableHugepages(tc.disableHugepages) - - if err := cfg.Validate(log); err != nil { - t.Fatal(err) - } - - srv, err := newServer(log, cfg, &system.FaultDomain{}) - if err != nil { - t.Fatal(err) - } - - mbb := bdev.NewMockBackend(tc.bmbc) - mbp := bdev.NewProvider(log, mbb) - sp := sysprov.NewMockSysProvider(log, nil) - - srv.ctlSvc = &ControlService{ - StorageControlService: *NewMockStorageControlService(log, cfg.Engines, - sp, - scm.NewProvider(log, scm.NewMockBackend(nil), sp, nil), - mbp, nil), - srvCfg: cfg, - } - - _, gotErr := scanBdevStorage(srv) - test.CmpErr(t, tc.expErr, gotErr) - }) - } -} - -func TestServer_setEngineBdevs(t *testing.T) { - for name, tc := range map[string]struct { - cfg engine.Config - engineIdx uint32 - scanResp *storage.BdevScanResponse - lastEngineIdx int - lastBdevCount int - expErr error - expLastEngineIdx int - expLastBdevCount int - }{ - "nil input": { - expErr: errors.New("nil input param: scanResp"), - }, - "empty cache": { - scanResp: &storage.BdevScanResponse{}, - lastEngineIdx: -1, - lastBdevCount: -1, - }, - "index unset; bdev count set": { - scanResp: &storage.BdevScanResponse{}, - lastEngineIdx: -1, - lastBdevCount: 0, - expErr: errors.New("to be unset"), - }, - "index set; bdev count unset": { - scanResp: &storage.BdevScanResponse{}, - lastEngineIdx: 0, - lastBdevCount: -1, - expErr: errors.New("to be set"), - }, - "empty cache; counts match": { - engineIdx: 1, - scanResp: &storage.BdevScanResponse{}, - lastEngineIdx: 0, - lastBdevCount: 0, - expLastEngineIdx: 1, - }, - "empty cache; count mismatch": { - engineIdx: 1, - scanResp: &storage.BdevScanResponse{}, - lastEngineIdx: 0, - lastBdevCount: 1, - expErr: errors.New("engine 1 has 0 but engine 0 has 1"), - }, - "populated cache; cache miss": { - engineIdx: 1, - scanResp: &storage.BdevScanResponse{Controllers: storage.MockNvmeControllers(1)}, - lastEngineIdx: 0, - lastBdevCount: 1, - expErr: errors.New("engine 1 has 0 but engine 0 has 1"), - }, - "populated cache; cache hit": { - cfg: *engine.MockConfig(). - WithStorage( - storage.NewTierConfig(). - WithStorageClass("nvme"). - WithBdevDeviceList("0000:00:00.0"), - ), - engineIdx: 1, - scanResp: &storage.BdevScanResponse{Controllers: storage.MockNvmeControllers(1)}, - lastEngineIdx: 0, - lastBdevCount: 1, - expLastEngineIdx: 1, - expLastBdevCount: 1, - }, - "populated cache; multiple vmd backing devices": { - cfg: *engine.MockConfig(). - WithStorage( - storage.NewTierConfig(). - WithStorageClass("nvme"). - WithBdevDeviceList("0000:05:05.5", "0000:5d:05.5"), - ), - engineIdx: 1, - scanResp: &storage.BdevScanResponse{ - Controllers: storage.NvmeControllers{ - &storage.NvmeController{PciAddr: "5d0505:01:00.0"}, - &storage.NvmeController{PciAddr: "5d0505:03:00.0"}, - &storage.NvmeController{PciAddr: "050505:01:00.0"}, - &storage.NvmeController{PciAddr: "050505:02:00.0"}, - }, - }, - lastEngineIdx: 0, - lastBdevCount: 4, - expLastEngineIdx: 1, - expLastBdevCount: 4, - }, - } { - t.Run(name, func(t *testing.T) { - log, buf := logging.NewTestLogger(name) - defer test.ShowBufferOnFailure(t, buf) - - engine := NewEngineInstance(log, - storage.DefaultProvider(log, int(tc.engineIdx), &tc.cfg.Storage), - nil, engine.NewRunner(log, &tc.cfg)) - engine.setIndex(tc.engineIdx) - - gotErr := setEngineBdevs(engine, tc.scanResp, &tc.lastEngineIdx, &tc.lastBdevCount) - test.CmpErr(t, tc.expErr, gotErr) - if tc.expErr != nil { - return - } - - test.AssertEqual(t, tc.expLastEngineIdx, tc.lastEngineIdx, "unexpected last engine index") - test.AssertEqual(t, tc.expLastBdevCount, tc.lastBdevCount, "unexpected last bdev count") - }) - } -} - func testFabricProviderSet(prov ...string) *hardware.FabricProviderSet { providers := []*hardware.FabricProvider{} for _, p := range prov { diff --git a/src/control/server/storage/bdev.go b/src/control/server/storage/bdev.go index 1bc1ebe981c1..8df50308c75b 100644 --- a/src/control/server/storage/bdev.go +++ b/src/control/server/storage/bdev.go @@ -500,9 +500,8 @@ type ( // BdevScanRequest defines the parameters for a Scan operation. BdevScanRequest struct { pbin.ForwardableRequest - DeviceList *BdevDeviceList - VMDEnabled bool - BypassCache bool + DeviceList *BdevDeviceList + VMDEnabled bool } // BdevScanResponse contains information gleaned during a successful Scan operation. diff --git a/src/control/server/storage/provider.go b/src/control/server/storage/provider.go index 135d94bcee59..3051512939e7 100644 --- a/src/control/server/storage/provider.go +++ b/src/control/server/storage/provider.go @@ -628,88 +628,6 @@ type BdevTierScanResult struct { Result *BdevScanResponse } -type scanFn func(BdevScanRequest) (*BdevScanResponse, error) - -func scanBdevTiers(log logging.Logger, vmdEnabled, direct bool, cfg *Config, cache *BdevScanResponse, scan scanFn) ([]BdevTierScanResult, error) { - if cfg == nil { - return nil, errors.New("nil storage config") - } - if cfg.Tiers == nil { - return nil, errors.New("nil storage config tiers") - } - - bdevs := cfg.GetBdevs() - if bdevs.Len() == 0 { - return nil, errors.New("scanBdevTiers should not be called if no bdevs in config") - } - - var bsr BdevScanResponse - scanOrCache := "scanned" - if direct { - req := BdevScanRequest{ - DeviceList: bdevs, - VMDEnabled: vmdEnabled, - } - resp, err := scan(req) - if err != nil { - return nil, err - } - bsr = *resp - } else { - if cache == nil { - cache = &BdevScanResponse{} - } - bsr = *cache - scanOrCache = "cached" - } - log.Debugf("bdevs in cfg: %s, %s: %+v", bdevs, scanOrCache, bsr) - - // Build slice of bdevs-per-tier from the entire scan response. - - bdevCfgs := cfg.Tiers.BdevConfigs() - results := make([]BdevTierScanResult, 0, len(bdevCfgs)) - resultBdevCount := 0 - for _, bc := range bdevCfgs { - if bc.Bdev.DeviceList.Len() == 0 { - continue - } - fbsr, err := filterBdevScanResponse(bc.Bdev.DeviceList, &bsr) - if err != nil { - return nil, errors.Wrapf(err, "filter scan cache for tier-%d", bc.Tier) - } - results = append(results, BdevTierScanResult{ - Tier: bc.Tier, Result: fbsr, - }) - - // Keep tally of total number of controllers added to results. - cpas, err := fbsr.Controllers.Addresses() - if err != nil { - return nil, errors.Wrap(err, "get controller pci addresses") - } - cpas, err = cpas.BackingToVMDAddresses() - if err != nil { - return nil, errors.Wrap(err, "convert backing device to vmd domain addresses") - } - resultBdevCount += cpas.Len() - } - - if resultBdevCount != bdevs.Len() { - log.Noticef("Unexpected scan results, wanted %d controllers got %d", bdevs.Len(), - resultBdevCount) - } - - return results, nil -} - -// ScanBdevTiers scans all Bdev tiers in the provider's engine storage configuration. -// If direct is set to true, bypass cache to retrieve up-to-date details. -func (p *Provider) ScanBdevTiers(direct bool) (results []BdevTierScanResult, err error) { - p.RLock() - defer p.RUnlock() - - return scanBdevTiers(p.log, p.vmdEnabled, direct, p.engineStorage, &p.bdevCache, p.bdev.Scan) -} - // ScanBdevs calls into bdev storage provider to scan SSDs, always bypassing cache. // Function should not be called when engines have been started and SSDs have been claimed by SPDK. func (p *Provider) ScanBdevs(req BdevScanRequest) (*BdevScanResponse, error) { @@ -720,33 +638,6 @@ func (p *Provider) ScanBdevs(req BdevScanRequest) (*BdevScanResponse, error) { return p.bdev.Scan(req) } -func (p *Provider) GetBdevCache() BdevScanResponse { - p.RLock() - defer p.RUnlock() - - return p.bdevCache -} - -// SetBdevCache stores given scan response in provider bdev cache. -func (p *Provider) SetBdevCache(resp BdevScanResponse) error { - p.Lock() - defer p.Unlock() - - // Enumerate scan results and filter out any controllers not specified in provider's engine - // storage config. - fResp, err := filterBdevScanResponse(p.engineStorage.GetBdevs(), &resp) - if err != nil { - return errors.Wrap(err, "filtering scan response before caching") - } - - p.log.Debugf("setting bdev cache in storage provider for engine %d: %v", p.engineIndex, - fResp.Controllers) - p.bdevCache = *fResp - p.vmdEnabled = resp.VMDEnabled - - return nil -} - // WithVMDEnabled enables VMD on storage provider. func (p *Provider) WithVMDEnabled() *Provider { p.vmdEnabled = true diff --git a/src/control/server/storage/provider_test.go b/src/control/server/storage/provider_test.go index 5ed2fb783d95..700671a5af31 100644 --- a/src/control/server/storage/provider_test.go +++ b/src/control/server/storage/provider_test.go @@ -7,9 +7,7 @@ package storage import ( - "fmt" "os" - "strings" "testing" "github.com/google/go-cmp/cmp" @@ -33,304 +31,304 @@ func defBdevCmpOpts() []cmp.Option { } } -func Test_scanBdevsTiers(t *testing.T) { - for name, tc := range map[string]struct { - direct bool - vmdEnabled bool - cfg *Config - cache *BdevScanResponse - scanResp *BdevScanResponse - scanErr error - expResults []BdevTierScanResult - expErr error - expNotice bool - }{ - "nil cfg": { - expErr: errors.New("nil storage config"), - }, - "nil cfg tiers": { - cfg: new(Config), - expErr: errors.New("nil storage config tiers"), - }, - "no bdev configs": { - cfg: &Config{ - Tiers: TierConfigs{mockScmTier}, - }, - expErr: errors.New("no bdevs in config"), - }, - "use cache; nil scan cache": { - cfg: &Config{ - Tiers: TierConfigs{ - mockScmTier, - NewTierConfig().WithStorageClass(ClassNvme.String()). - WithBdevDeviceList(test.MockPCIAddr(3)), - }, - }, - expResults: []BdevTierScanResult{ - { - Result: &BdevScanResponse{ - Controllers: NvmeControllers{}, - }, - }, - }, - expNotice: true, - }, - "bypass cache; missing controller": { - direct: true, - cfg: &Config{ - Tiers: TierConfigs{ - mockScmTier, - NewTierConfig().WithStorageClass(ClassNvme.String()). - WithBdevDeviceList(test.MockPCIAddr(3)), - }, - }, - cache: &BdevScanResponse{ - Controllers: MockNvmeControllers(3), - }, - scanResp: &BdevScanResponse{ - Controllers: MockNvmeControllers(2), - }, - expResults: []BdevTierScanResult{ - { - Result: &BdevScanResponse{ - Controllers: NvmeControllers{}, - }, - }, - }, - expNotice: true, - }, - "bypass cache": { - direct: true, - cfg: &Config{ - Tiers: TierConfigs{ - mockScmTier, - NewTierConfig().WithStorageClass(ClassNvme.String()). - WithBdevDeviceList(test.MockPCIAddr(2)), - }, - }, - cache: &BdevScanResponse{ - Controllers: MockNvmeControllers(2), - }, - scanResp: &BdevScanResponse{ - Controllers: MockNvmeControllers(3), - }, - expResults: []BdevTierScanResult{ - { - Result: &BdevScanResponse{ - Controllers: []*NvmeController{ - MockNvmeController(2), - }, - }, - }, - }, - }, - "bypass cache; scan error": { - direct: true, - cfg: &Config{ - Tiers: TierConfigs{ - mockScmTier, - NewTierConfig().WithStorageClass(ClassNvme.String()). - WithBdevDeviceList(test.MockPCIAddr(2)), - }, - }, - scanResp: &BdevScanResponse{ - Controllers: MockNvmeControllers(3), - }, - scanErr: errors.New("fail"), - expErr: errors.New("fail"), - }, - "use cache; missing controller": { - cfg: &Config{ - Tiers: TierConfigs{ - mockScmTier, - NewTierConfig().WithStorageClass(ClassNvme.String()). - WithBdevDeviceList(test.MockPCIAddr(2)), - }, - }, - cache: &BdevScanResponse{ - Controllers: MockNvmeControllers(2), - }, - scanResp: &BdevScanResponse{ - Controllers: MockNvmeControllers(3), - }, - expResults: []BdevTierScanResult{ - { - Result: &BdevScanResponse{ - Controllers: []*NvmeController{}, - }, - }, - }, - expNotice: true, - }, - "use cache": { - cfg: &Config{ - Tiers: TierConfigs{ - mockScmTier, - NewTierConfig().WithStorageClass(ClassNvme.String()). - WithBdevDeviceList(test.MockPCIAddr(2)), - }, - }, - cache: &BdevScanResponse{ - Controllers: MockNvmeControllers(3), - }, - expResults: []BdevTierScanResult{ - { - Result: &BdevScanResponse{ - Controllers: []*NvmeController{ - MockNvmeController(2), - }, - }, - }, - }, - }, - "bypass cache; multi-tier": { - direct: true, - cfg: &Config{ - Tiers: TierConfigs{ - mockScmTier, - NewTierConfig().WithStorageClass(ClassNvme.String()). - WithBdevDeviceList(test.MockPCIAddr(2), test.MockPCIAddr(3)), - NewTierConfig().WithStorageClass(ClassNvme.String()). - WithBdevDeviceList(test.MockPCIAddr(4), test.MockPCIAddr(5)), - }, - }, - scanResp: &BdevScanResponse{ - Controllers: MockNvmeControllers(6), - }, - expResults: []BdevTierScanResult{ - { - Result: &BdevScanResponse{ - Controllers: []*NvmeController{ - MockNvmeController(2), MockNvmeController(3), - }, - }, - }, - { - Result: &BdevScanResponse{ - Controllers: []*NvmeController{ - MockNvmeController(4), MockNvmeController(5), - }, - }, - }, - }, - }, - "use cache; multi-tier": { - cfg: &Config{ - Tiers: TierConfigs{ - mockScmTier, - NewTierConfig().WithStorageClass(ClassNvme.String()). - WithBdevDeviceList(test.MockPCIAddr(2), test.MockPCIAddr(3)), - NewTierConfig().WithStorageClass(ClassNvme.String()). - WithBdevDeviceList(test.MockPCIAddr(4), test.MockPCIAddr(5)), - }, - }, - cache: &BdevScanResponse{ - Controllers: MockNvmeControllers(6), - }, - expResults: []BdevTierScanResult{ - { - Result: &BdevScanResponse{ - Controllers: []*NvmeController{ - MockNvmeController(2), MockNvmeController(3), - }, - }, - }, - { - Result: &BdevScanResponse{ - Controllers: []*NvmeController{ - MockNvmeController(4), MockNvmeController(5), - }, - }, - }, - }, - }, - "use cache; vmd domain missing in scan": { - cfg: &Config{ - Tiers: TierConfigs{ - mockScmTier, - NewTierConfig().WithStorageClass(ClassNvme.String()). - WithBdevDeviceList("0000:62:00.5", "0000:63:00.5"), - }, - }, - cache: &BdevScanResponse{ - Controllers: NvmeControllers{ - &NvmeController{PciAddr: "620005:83:00.0"}, - &NvmeController{PciAddr: "620005:85:00.0"}, - &NvmeController{PciAddr: "620005:87:00.0"}, - &NvmeController{PciAddr: "620005:81:00.0"}, - }, - }, - expResults: []BdevTierScanResult{ - { - Result: &BdevScanResponse{ - Controllers: NvmeControllers{ - &NvmeController{PciAddr: "620005:83:00.0"}, - &NvmeController{PciAddr: "620005:85:00.0"}, - &NvmeController{PciAddr: "620005:87:00.0"}, - &NvmeController{PciAddr: "620005:81:00.0"}, - }, - }, - }, - }, - expNotice: true, - }, - "use cache; multiple devices behind vmd domain": { - cfg: &Config{ - Tiers: TierConfigs{ - mockScmTier, - NewTierConfig().WithStorageClass(ClassNvme.String()). - WithBdevDeviceList("0000:62:00.5"), - }, - }, - cache: &BdevScanResponse{ - Controllers: NvmeControllers{ - &NvmeController{PciAddr: "620005:83:00.0"}, - &NvmeController{PciAddr: "620005:85:00.0"}, - &NvmeController{PciAddr: "620005:87:00.0"}, - &NvmeController{PciAddr: "620005:81:00.0"}, - }, - }, - expResults: []BdevTierScanResult{ - { - Result: &BdevScanResponse{ - Controllers: NvmeControllers{ - &NvmeController{PciAddr: "620005:83:00.0"}, - &NvmeController{PciAddr: "620005:85:00.0"}, - &NvmeController{PciAddr: "620005:87:00.0"}, - &NvmeController{PciAddr: "620005:81:00.0"}, - }, - }, - }, - }, - }, - } { - t.Run(name, func(t *testing.T) { - log, buf := logging.NewTestLogger(name) - defer test.ShowBufferOnFailure(t, buf) - - scanFn := func(r BdevScanRequest) (*BdevScanResponse, error) { - return tc.scanResp, tc.scanErr - } - - gotResults, gotErr := scanBdevTiers(log, tc.vmdEnabled, tc.direct, tc.cfg, tc.cache, scanFn) - test.CmpErr(t, tc.expErr, gotErr) - if gotErr != nil { - return - } - - if diff := cmp.Diff(tc.expResults, gotResults, defBdevCmpOpts()...); diff != "" { - t.Fatalf("\nunexpected results (-want, +got):\n%s\n", diff) - } - - txtMod := "" - if !tc.expNotice { - txtMod = "not " - } - msg := fmt.Sprintf("expected NOTICE level message to %shave been logged", txtMod) - test.AssertEqual(t, tc.expNotice, strings.Contains(buf.String(), "NOTICE"), msg) - }) - } -} +//func Test_scanBdevsTiers(t *testing.T) { +// for name, tc := range map[string]struct { +// direct bool +// vmdEnabled bool +// cfg *Config +// cache *BdevScanResponse +// scanResp *BdevScanResponse +// scanErr error +// expResults []BdevTierScanResult +// expErr error +// expNotice bool +// }{ +// "nil cfg": { +// expErr: errors.New("nil storage config"), +// }, +// "nil cfg tiers": { +// cfg: new(Config), +// expErr: errors.New("nil storage config tiers"), +// }, +// "no bdev configs": { +// cfg: &Config{ +// Tiers: TierConfigs{mockScmTier}, +// }, +// expErr: errors.New("no bdevs in config"), +// }, +// "use cache; nil scan cache": { +// cfg: &Config{ +// Tiers: TierConfigs{ +// mockScmTier, +// NewTierConfig().WithStorageClass(ClassNvme.String()). +// WithBdevDeviceList(test.MockPCIAddr(3)), +// }, +// }, +// expResults: []BdevTierScanResult{ +// { +// Result: &BdevScanResponse{ +// Controllers: NvmeControllers{}, +// }, +// }, +// }, +// expNotice: true, +// }, +// "bypass cache; missing controller": { +// direct: true, +// cfg: &Config{ +// Tiers: TierConfigs{ +// mockScmTier, +// NewTierConfig().WithStorageClass(ClassNvme.String()). +// WithBdevDeviceList(test.MockPCIAddr(3)), +// }, +// }, +// cache: &BdevScanResponse{ +// Controllers: MockNvmeControllers(3), +// }, +// scanResp: &BdevScanResponse{ +// Controllers: MockNvmeControllers(2), +// }, +// expResults: []BdevTierScanResult{ +// { +// Result: &BdevScanResponse{ +// Controllers: NvmeControllers{}, +// }, +// }, +// }, +// expNotice: true, +// }, +// "bypass cache": { +// direct: true, +// cfg: &Config{ +// Tiers: TierConfigs{ +// mockScmTier, +// NewTierConfig().WithStorageClass(ClassNvme.String()). +// WithBdevDeviceList(test.MockPCIAddr(2)), +// }, +// }, +// cache: &BdevScanResponse{ +// Controllers: MockNvmeControllers(2), +// }, +// scanResp: &BdevScanResponse{ +// Controllers: MockNvmeControllers(3), +// }, +// expResults: []BdevTierScanResult{ +// { +// Result: &BdevScanResponse{ +// Controllers: []*NvmeController{ +// MockNvmeController(2), +// }, +// }, +// }, +// }, +// }, +// "bypass cache; scan error": { +// direct: true, +// cfg: &Config{ +// Tiers: TierConfigs{ +// mockScmTier, +// NewTierConfig().WithStorageClass(ClassNvme.String()). +// WithBdevDeviceList(test.MockPCIAddr(2)), +// }, +// }, +// scanResp: &BdevScanResponse{ +// Controllers: MockNvmeControllers(3), +// }, +// scanErr: errors.New("fail"), +// expErr: errors.New("fail"), +// }, +// "use cache; missing controller": { +// cfg: &Config{ +// Tiers: TierConfigs{ +// mockScmTier, +// NewTierConfig().WithStorageClass(ClassNvme.String()). +// WithBdevDeviceList(test.MockPCIAddr(2)), +// }, +// }, +// cache: &BdevScanResponse{ +// Controllers: MockNvmeControllers(2), +// }, +// scanResp: &BdevScanResponse{ +// Controllers: MockNvmeControllers(3), +// }, +// expResults: []BdevTierScanResult{ +// { +// Result: &BdevScanResponse{ +// Controllers: []*NvmeController{}, +// }, +// }, +// }, +// expNotice: true, +// }, +// "use cache": { +// cfg: &Config{ +// Tiers: TierConfigs{ +// mockScmTier, +// NewTierConfig().WithStorageClass(ClassNvme.String()). +// WithBdevDeviceList(test.MockPCIAddr(2)), +// }, +// }, +// cache: &BdevScanResponse{ +// Controllers: MockNvmeControllers(3), +// }, +// expResults: []BdevTierScanResult{ +// { +// Result: &BdevScanResponse{ +// Controllers: []*NvmeController{ +// MockNvmeController(2), +// }, +// }, +// }, +// }, +// }, +// "bypass cache; multi-tier": { +// direct: true, +// cfg: &Config{ +// Tiers: TierConfigs{ +// mockScmTier, +// NewTierConfig().WithStorageClass(ClassNvme.String()). +// WithBdevDeviceList(test.MockPCIAddr(2), test.MockPCIAddr(3)), +// NewTierConfig().WithStorageClass(ClassNvme.String()). +// WithBdevDeviceList(test.MockPCIAddr(4), test.MockPCIAddr(5)), +// }, +// }, +// scanResp: &BdevScanResponse{ +// Controllers: MockNvmeControllers(6), +// }, +// expResults: []BdevTierScanResult{ +// { +// Result: &BdevScanResponse{ +// Controllers: []*NvmeController{ +// MockNvmeController(2), MockNvmeController(3), +// }, +// }, +// }, +// { +// Result: &BdevScanResponse{ +// Controllers: []*NvmeController{ +// MockNvmeController(4), MockNvmeController(5), +// }, +// }, +// }, +// }, +// }, +// "use cache; multi-tier": { +// cfg: &Config{ +// Tiers: TierConfigs{ +// mockScmTier, +// NewTierConfig().WithStorageClass(ClassNvme.String()). +// WithBdevDeviceList(test.MockPCIAddr(2), test.MockPCIAddr(3)), +// NewTierConfig().WithStorageClass(ClassNvme.String()). +// WithBdevDeviceList(test.MockPCIAddr(4), test.MockPCIAddr(5)), +// }, +// }, +// cache: &BdevScanResponse{ +// Controllers: MockNvmeControllers(6), +// }, +// expResults: []BdevTierScanResult{ +// { +// Result: &BdevScanResponse{ +// Controllers: []*NvmeController{ +// MockNvmeController(2), MockNvmeController(3), +// }, +// }, +// }, +// { +// Result: &BdevScanResponse{ +// Controllers: []*NvmeController{ +// MockNvmeController(4), MockNvmeController(5), +// }, +// }, +// }, +// }, +// }, +// "use cache; vmd domain missing in scan": { +// cfg: &Config{ +// Tiers: TierConfigs{ +// mockScmTier, +// NewTierConfig().WithStorageClass(ClassNvme.String()). +// WithBdevDeviceList("0000:62:00.5", "0000:63:00.5"), +// }, +// }, +// cache: &BdevScanResponse{ +// Controllers: NvmeControllers{ +// &NvmeController{PciAddr: "620005:83:00.0"}, +// &NvmeController{PciAddr: "620005:85:00.0"}, +// &NvmeController{PciAddr: "620005:87:00.0"}, +// &NvmeController{PciAddr: "620005:81:00.0"}, +// }, +// }, +// expResults: []BdevTierScanResult{ +// { +// Result: &BdevScanResponse{ +// Controllers: NvmeControllers{ +// &NvmeController{PciAddr: "620005:83:00.0"}, +// &NvmeController{PciAddr: "620005:85:00.0"}, +// &NvmeController{PciAddr: "620005:87:00.0"}, +// &NvmeController{PciAddr: "620005:81:00.0"}, +// }, +// }, +// }, +// }, +// expNotice: true, +// }, +// "use cache; multiple devices behind vmd domain": { +// cfg: &Config{ +// Tiers: TierConfigs{ +// mockScmTier, +// NewTierConfig().WithStorageClass(ClassNvme.String()). +// WithBdevDeviceList("0000:62:00.5"), +// }, +// }, +// cache: &BdevScanResponse{ +// Controllers: NvmeControllers{ +// &NvmeController{PciAddr: "620005:83:00.0"}, +// &NvmeController{PciAddr: "620005:85:00.0"}, +// &NvmeController{PciAddr: "620005:87:00.0"}, +// &NvmeController{PciAddr: "620005:81:00.0"}, +// }, +// }, +// expResults: []BdevTierScanResult{ +// { +// Result: &BdevScanResponse{ +// Controllers: NvmeControllers{ +// &NvmeController{PciAddr: "620005:83:00.0"}, +// &NvmeController{PciAddr: "620005:85:00.0"}, +// &NvmeController{PciAddr: "620005:87:00.0"}, +// &NvmeController{PciAddr: "620005:81:00.0"}, +// }, +// }, +// }, +// }, +// }, +// } { +// t.Run(name, func(t *testing.T) { +// log, buf := logging.NewTestLogger(name) +// defer test.ShowBufferOnFailure(t, buf) +// +// scanFn := func(r BdevScanRequest) (*BdevScanResponse, error) { +// return tc.scanResp, tc.scanErr +// } +// +// gotResults, gotErr := scanBdevTiers(log, tc.vmdEnabled, tc.direct, tc.cfg, tc.cache, scanFn) +// test.CmpErr(t, tc.expErr, gotErr) +// if gotErr != nil { +// return +// } +// +// if diff := cmp.Diff(tc.expResults, gotResults, defBdevCmpOpts()...); diff != "" { +// t.Fatalf("\nunexpected results (-want, +got):\n%s\n", diff) +// } +// +// txtMod := "" +// if !tc.expNotice { +// txtMod = "not " +// } +// msg := fmt.Sprintf("expected NOTICE level message to %shave been logged", txtMod) +// test.AssertEqual(t, tc.expNotice, strings.Contains(buf.String(), "NOTICE"), msg) +// }) +// } +//} func Test_BdevWriteRequestFromConfig(t *testing.T) { hostname, err := os.Hostname() diff --git a/src/control/server/util_test.go b/src/control/server/util_test.go index f34c6d16f669..67037a172536 100644 --- a/src/control/server/util_test.go +++ b/src/control/server/util_test.go @@ -200,14 +200,14 @@ func newTestEngine(log logging.Logger, isAP bool, provider *storage.Provider, en rCfg.Running.SetTrue() r := engine.NewTestRunner(rCfg, engineCfg[0]) - srv := NewEngineInstance(log, provider, nil, r) - srv.setSuperblock(&Superblock{ + e := NewEngineInstance(log, provider, nil, r) + e.setSuperblock(&Superblock{ Rank: ranklist.NewRankPtr(0), }) - srv.ready.SetTrue() - srv.OnReady() + e.ready.SetTrue() + e.OnReady() - return srv + return e } // mockTCPResolver returns successful resolve results for any input. diff --git a/src/include/daos/drpc_modules.h b/src/include/daos/drpc_modules.h index 69aaf568673c..7fab6b09a5f0 100644 --- a/src/include/daos/drpc_modules.h +++ b/src/include/daos/drpc_modules.h @@ -33,43 +33,44 @@ enum drpc_sec_agent_method { }; enum drpc_mgmt_method { - DRPC_METHOD_MGMT_KILL_RANK = 201, - DRPC_METHOD_MGMT_SET_RANK = 202, - DRPC_METHOD_MGMT_GET_ATTACH_INFO = 206, - DRPC_METHOD_MGMT_POOL_CREATE = 207, - DRPC_METHOD_MGMT_POOL_DESTROY = 208, - DRPC_METHOD_MGMT_SET_UP = 209, - DRPC_METHOD_MGMT_BIO_HEALTH_QUERY = 210, - DRPC_METHOD_MGMT_SMD_LIST_DEVS = 211, - DRPC_METHOD_MGMT_SMD_LIST_POOLS = 212, - DRPC_METHOD_MGMT_POOL_GET_ACL = 213, - DRPC_METHOD_MGMT_POOL_OVERWRITE_ACL = 215, - DRPC_METHOD_MGMT_POOL_UPDATE_ACL = 216, - DRPC_METHOD_MGMT_POOL_DELETE_ACL = 217, - DRPC_METHOD_MGMT_PREP_SHUTDOWN = 218, - DRPC_METHOD_MGMT_DEV_SET_FAULTY = 220, - DRPC_METHOD_MGMT_DEV_REPLACE = 221, - DRPC_METHOD_MGMT_LIST_CONTAINERS = 222, - DRPC_METHOD_MGMT_POOL_QUERY = 223, - DRPC_METHOD_MGMT_POOL_SET_PROP = 224, - DRPC_METHOD_MGMT_PING_RANK = 225, - DRPC_METHOD_MGMT_REINTEGRATE = 226, - DRPC_METHOD_MGMT_CONT_SET_OWNER = 227, - DRPC_METHOD_MGMT_EXCLUDE = 228, - DRPC_METHOD_MGMT_EXTEND = 229, - DRPC_METHOD_MGMT_POOL_EVICT = 230, - DRPC_METHOD_MGMT_DRAIN = 231, - DRPC_METHOD_MGMT_GROUP_UPDATE = 232, - DRPC_METHOD_MGMT_NOTIFY_EXIT = 233, - DRPC_METHOD_MGMT_NOTIFY_POOL_CONNECT = 235, - DRPC_METHOD_MGMT_NOTIFY_POOL_DISCONNECT = 236, - DRPC_METHOD_MGMT_POOL_GET_PROP = 237, - DRPC_METHOD_MGMT_SET_LOG_MASKS = 238, - DRPC_METHOD_MGMT_POOL_UPGRADE = 239, - DRPC_METHOD_MGMT_POOL_QUERY_TARGETS = 240, - DRPC_METHOD_MGMT_LED_MANAGE = 241, + DRPC_METHOD_MGMT_KILL_RANK = 201, + DRPC_METHOD_MGMT_SET_RANK = 202, + DRPC_METHOD_MGMT_GET_ATTACH_INFO = 206, + DRPC_METHOD_MGMT_POOL_CREATE = 207, + DRPC_METHOD_MGMT_POOL_DESTROY = 208, + DRPC_METHOD_MGMT_SET_UP = 209, + DRPC_METHOD_MGMT_BIO_HEALTH_QUERY = 210, + DRPC_METHOD_MGMT_SMD_LIST_DEVS = 211, + DRPC_METHOD_MGMT_SMD_LIST_POOLS = 212, + DRPC_METHOD_MGMT_POOL_GET_ACL = 213, + DRPC_METHOD_MGMT_POOL_OVERWRITE_ACL = 215, + DRPC_METHOD_MGMT_POOL_UPDATE_ACL = 216, + DRPC_METHOD_MGMT_POOL_DELETE_ACL = 217, + DRPC_METHOD_MGMT_PREP_SHUTDOWN = 218, + DRPC_METHOD_MGMT_DEV_SET_FAULTY = 220, + DRPC_METHOD_MGMT_DEV_REPLACE = 221, + DRPC_METHOD_MGMT_LIST_CONTAINERS = 222, + DRPC_METHOD_MGMT_POOL_QUERY = 223, + DRPC_METHOD_MGMT_POOL_SET_PROP = 224, + DRPC_METHOD_MGMT_PING_RANK = 225, + DRPC_METHOD_MGMT_REINTEGRATE = 226, + DRPC_METHOD_MGMT_CONT_SET_OWNER = 227, + DRPC_METHOD_MGMT_EXCLUDE = 228, + DRPC_METHOD_MGMT_EXTEND = 229, + DRPC_METHOD_MGMT_POOL_EVICT = 230, + DRPC_METHOD_MGMT_DRAIN = 231, + DRPC_METHOD_MGMT_GROUP_UPDATE = 232, + DRPC_METHOD_MGMT_NOTIFY_EXIT = 233, + DRPC_METHOD_MGMT_NOTIFY_POOL_CONNECT = 235, + DRPC_METHOD_MGMT_NOTIFY_POOL_DISCONNECT = 236, + DRPC_METHOD_MGMT_POOL_GET_PROP = 237, + DRPC_METHOD_MGMT_SET_LOG_MASKS = 238, + DRPC_METHOD_MGMT_POOL_UPGRADE = 239, + DRPC_METHOD_MGMT_POOL_QUERY_TARGETS = 240, + DRPC_METHOD_MGMT_LED_MANAGE = 241, + DRPC_METHOD_MGMT_NVME_LIST_DEVS = 242, - NUM_DRPC_MGMT_METHODS /* Must be last */ + NUM_DRPC_MGMT_METHODS /* Must be last */ }; enum drpc_srv_method {