Skip to content

Commit

Permalink
DAOS-16650 control: dmg system exclude, update group version (#15288)
Browse files Browse the repository at this point in the history
With this change, when a daos administrator runs dmg system exclude
for a given set of engines, the system map version / cart primary group
version will be updated. In turn, daos_engines will more immediately
detect the "loss" of the administratively excluded engines, update
pool maps and perform rebuild. This change supports a use case of
a proactive exclusion of ranks that are expected to be impacted by
planned maintenance that would cut off connectivity to certain
engines.

Signed-off-by: Kenneth Cain <[email protected]>
  • Loading branch information
kccain authored Oct 16, 2024
1 parent 7c93b69 commit 0e42449
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/control/server/mgmt_system.go
Original file line number Diff line number Diff line change
Expand Up @@ -1002,6 +1002,8 @@ func (svc *mgmtSvc) SystemExclude(ctx context.Context, req *mgmtpb.SystemExclude
})
}

svc.reqGroupUpdate(ctx, false)

return resp, nil
}

Expand Down
21 changes: 21 additions & 0 deletions src/control/server/mgmt_system_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1765,6 +1765,7 @@ func TestServer_MgmtSvc_SystemExclude(t *testing.T) {
mockMember(t, 3, 2, "joined"),
},
},

"unexclude hosts": {
req: &mgmtpb.SystemExcludeReq{Hosts: test.MockHostAddr(1).String(), Clear: true},
members: system.Members{
Expand Down Expand Up @@ -1795,12 +1796,32 @@ func TestServer_MgmtSvc_SystemExclude(t *testing.T) {
if tc.req != nil && tc.req.Sys == "" {
tc.req.Sys = build.DefaultSystemName
}

startMapVer, err := svc.sysdb.CurMapVersion()
if err != nil {
t.Fatalf("startMapVer CurMapVersion() failed\n")
return
}
gotResp, gotAPIErr := svc.SystemExclude(ctx, tc.req)
test.CmpErr(t, tc.expAPIErr, gotAPIErr)
if tc.expAPIErr != nil {
return
}

// Check for any system map version increase by the (asynchronous) update.
// Test will time out if it never happens, thus choice of an infinite loop here.
for {
curMapVer, err := svc.sysdb.CurMapVersion()
if err != nil {
t.Fatalf("CurMapVersion() failed\n")
return
}

if curMapVer > startMapVer {
break
}
}

checkRankResults(t, tc.expResults, gotResp.Results)
checkMembers(t, tc.expMembers, svc.membership)
})
Expand Down

0 comments on commit 0e42449

Please sign in to comment.