Skip to content

Commit

Permalink
DAOS-15739 engine: Add single-engine, multi-socket support (#14311)
Browse files Browse the repository at this point in the history
Backport for the following patches
DAOS-13380 engine: refine tgt_nr check (#12405)
DAOS-15739 engine: Add multi-socket support (#14234)
DAOS-623 engine: Fix a typo (#14329)

* DAOS-13380 engine: refine tgt_nr check

1. for non-DAOS_TARGET_OVERSUBSCRIBE case
   fail to start engine if #cores is not enough
2. for DAOS_TARGET_OVERSUBSCRIBE case
   allow to force start engine
The #nr_xs_helpers possibly be reduced for either case.

* DAOS-15739 engine: Add multi-socket support (#14234)

Add a simple multi-socket mode for use cases where a single
engine must be used. Avoids the issue of having all helper
xstreams automatically assigned to a single NUMA node thus
increasing efficiency of synchronizations between I/O and
helper xstreams.

It is the default behavior if all of the following are true

Neither pinned_numa_node nor first_core are used.
No oversubscription is requested
NUMA has uniform number of cores
targets and helpers divide evenly among numa nodes
There is more than one numa node
Update server config logic to ensure first_core is passed
on to engine if it's set while keeping existing behavior
when both first_core: 0 and pinned_numa_node are set.

Signed-off-by: Jeff Olivier <[email protected]>
Signed-off-by: Xuezhao Liu <[email protected]>
Signed-off-by: Tom Nabarro <[email protected]>
  • Loading branch information
jolivier23 authored May 8, 2024
1 parent d438ace commit f16a7dd
Show file tree
Hide file tree
Showing 23 changed files with 363 additions and 183 deletions.
6 changes: 6 additions & 0 deletions docs/admin/deployment.md
Original file line number Diff line number Diff line change
Expand Up @@ -1377,6 +1377,12 @@ per four target threads, for example `targets: 16` and `nr_xs_helpers: 4`.
The server should have sufficiently many physical cores to support the
number of targets plus the additional service threads.

The 'targets:' and 'nr_xs_helpers:' requirement are mandatory, if the number
of physical cores are not enough it will fail the starting of the daos engine
(notes that 2 cores reserved for system service), or configures with ENV
"DAOS_TARGET_OVERSUBSCRIBE=1" to force starting daos engine (possibly hurts
performance as multiple XS compete on same core).


## Storage Formatting

Expand Down
1 change: 1 addition & 0 deletions docs/admin/env_variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ Environment variables in this section only apply to the server side.
|DAOS\_DTX\_AGG\_THD\_AGE|DTX aggregation age threshold in seconds. The valid range is [210, 1830]. The default value is 630.|
|DAOS\_DTX\_RPC\_HELPER\_THD|DTX RPC helper threshold. The valid range is [18, unlimited). The default value is 513.|
|DAOS\_DTX\_BATCHED\_ULT\_MAX|The max count of DTX batched commit ULTs. The valid range is [0, unlimited). 0 means to commit DTX synchronously. The default value is 32.|
|DAOS\_FORWARD\_NEIGHBOR|Set to enable I/O forwarding on neighbor xstream in the absence of helper threads.|

## Server and Client environment variables

Expand Down
8 changes: 4 additions & 4 deletions src/control/cmd/daos_server/start.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// (C) Copyright 2019-2023 Intel Corporation.
// (C) Copyright 2019-2024 Intel Corporation.
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//
Expand Down Expand Up @@ -29,7 +29,7 @@ type startCmd struct {
Modules *string `short:"m" long:"modules" description:"List of server modules to load"`
Targets uint16 `short:"t" long:"targets" description:"Number of targets to use (default use all cores)"`
NrXsHelpers *uint16 `short:"x" long:"xshelpernr" description:"Number of helper XS per VOS target"`
FirstCore uint16 `short:"f" long:"firstcore" default:"0" description:"Index of first core for service thread"`
FirstCore *uint16 `short:"f" long:"firstcore" description:"Index of first core for service thread"`
Group string `short:"g" long:"group" description:"Server group name"`
SocketDir string `short:"d" long:"socket_dir" description:"Location for all daos_server & daos_engine sockets"`
Insecure bool `short:"i" long:"insecure" description:"Allow for insecure connections"`
Expand Down Expand Up @@ -76,8 +76,8 @@ func (cmd *startCmd) setCLIOverrides() error {
if cmd.NrXsHelpers != nil {
srv.WithHelperStreamCount(int(*cmd.NrXsHelpers))
}
if cmd.FirstCore > 0 {
srv.WithServiceThreadCore(int(cmd.FirstCore))
if cmd.FirstCore != nil {
srv.WithServiceThreadCore(int(*cmd.FirstCore))
}
}

Expand Down
2 changes: 0 additions & 2 deletions src/control/cmd/dmg/auto_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,6 @@ transport_config:
engines:
- targets: 12
nr_xs_helpers: 2
first_core: 0
log_file: /tmp/daos_engine.0.log
storage:
- class: dcpm
Expand All @@ -599,7 +598,6 @@ engines:
pinned_numa_node: 0
- targets: 6
nr_xs_helpers: 0
first_core: 0
log_file: /tmp/daos_engine.1.log
storage:
- class: dcpm
Expand Down
14 changes: 12 additions & 2 deletions src/control/server/config/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -863,7 +863,11 @@ func (cfg *Server) SetEngineAffinities(log logging.Logger, affSources ...EngineA
// Detect legacy mode by checking if first_core is being used.
legacyMode := false
for _, engineCfg := range cfg.Engines {
if engineCfg.ServiceThreadCore != 0 {
if engineCfg.ServiceThreadCore != nil {
if *engineCfg.ServiceThreadCore == 0 && engineCfg.PinnedNumaNode != nil {
// Both are set but we don't know yet which to use
continue
}
legacyMode = true
break
}
Expand All @@ -872,9 +876,15 @@ func (cfg *Server) SetEngineAffinities(log logging.Logger, affSources ...EngineA
// Fail if any engine has an explicit pin and non-zero first_core.
for idx, engineCfg := range cfg.Engines {
if legacyMode {
if engineCfg.PinnedNumaNode != nil {
log.Infof("pinned_numa_node setting ignored on engine %d", idx)
engineCfg.PinnedNumaNode = nil
}
log.Debugf("setting legacy core allocation algorithm on engine %d", idx)
engineCfg.PinnedNumaNode = nil
continue
} else if engineCfg.ServiceThreadCore != nil {
log.Infof("first_core setting ignored on engine %d", idx)
engineCfg.ServiceThreadCore = nil
}

numaAffinity, err := detectEngineAffinity(log, engineCfg, affSources...)
Expand Down
6 changes: 4 additions & 2 deletions src/control/server/ctl_storage_rpc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1569,7 +1569,10 @@ func TestServer_CtlSvc_StorageScan_PostEngineStart(t *testing.T) {
var engineCfgs []*engine.Config
for i, sc := range tc.storageCfgs {
log.Debugf("storage cfg contains bdevs %v for engine %d", sc.Bdevs(), i)
engineCfgs = append(engineCfgs, engine.MockConfig().WithStorage(sc...))
engineCfgs = append(engineCfgs,
engine.MockConfig().
WithStorage(sc...).
WithTargetCount(tc.engineTargetCount[i]))
}
sCfg := config.DefaultServer().WithEngines(engineCfgs...)
cs := mockControlService(t, log, sCfg, csbmbc, tc.smbc, tc.smsc)
Expand Down Expand Up @@ -1625,7 +1628,6 @@ func TestServer_CtlSvc_StorageScan_PostEngineStart(t *testing.T) {
}
te.setDrpcClient(newMockDrpcClient(dcc))
te._superblock.Rank = ranklist.NewRankPtr(uint32(idx + 1))
te.setTargetCount(tc.engineTargetCount[idx])
for _, tc := range te.storage.GetBdevConfigs() {
tc.Bdev.DeviceRoles.OptionBits = storage.OptionBits(storage.BdevRoleAll)
}
Expand Down
27 changes: 23 additions & 4 deletions src/control/server/engine/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ type Config struct {
Modules string `yaml:"modules,omitempty" cmdLongFlag:"--modules" cmdShortFlag:"-m"`
TargetCount int `yaml:"targets,omitempty" cmdLongFlag:"--targets,nonzero" cmdShortFlag:"-t,nonzero"`
HelperStreamCount int `yaml:"nr_xs_helpers" cmdLongFlag:"--xshelpernr" cmdShortFlag:"-x"`
ServiceThreadCore int `yaml:"first_core" cmdLongFlag:"--firstcore,nonzero" cmdShortFlag:"-f,nonzero"`
ServiceThreadCore *int `yaml:"first_core,omitempty" cmdLongFlag:"--firstcore" cmdShortFlag:"-f"`
SystemName string `yaml:"-" cmdLongFlag:"--group" cmdShortFlag:"-g"`
SocketDir string `yaml:"-" cmdLongFlag:"--socket_dir" cmdShortFlag:"-d"`
LogMask string `yaml:"log_mask,omitempty" cmdEnv:"D_LOG_MASK"`
Expand Down Expand Up @@ -160,10 +160,29 @@ func (c *Config) ReadLogSubsystems() (string, error) {

// Validate ensures that the configuration meets minimum standards.
func (c *Config) Validate() error {
if c.PinnedNumaNode != nil && c.ServiceThreadCore != 0 {
if c.PinnedNumaNode != nil && c.ServiceThreadCore != nil && *c.ServiceThreadCore != 0 {
return errors.New("cannot specify both pinned_numa_node and first_core")
}

errNegative := func(s string) error {
return errors.Errorf("%s must not be negative", s)
}
if c.TargetCount < 0 {
return errNegative("target count")
}
if c.HelperStreamCount < 0 {
return errNegative("helper stream count")
}
if c.ServiceThreadCore != nil && *c.ServiceThreadCore < 0 {
return errNegative("service thread core index")
}
if c.MemSize < 0 {
return errNegative("mem size")
}
if c.HugepageSz < 0 {
return errNegative("hugepage size")
}

if c.TargetCount == 0 {
return errors.New("target count must be nonzero")
}
Expand Down Expand Up @@ -222,7 +241,7 @@ func IsNUMAMismatch(err error) bool {
// SetNUMAAffinity sets the NUMA affinity for the engine,
// if not already set in the configuration.
func (c *Config) SetNUMAAffinity(node uint) error {
if c.PinnedNumaNode != nil && c.ServiceThreadCore != 0 {
if c.PinnedNumaNode != nil && c.ServiceThreadCore != nil && *c.ServiceThreadCore != 0 {
return errors.New("cannot set both NUMA node and service core")
}

Expand Down Expand Up @@ -464,7 +483,7 @@ func (c *Config) WithHelperStreamCount(count int) *Config {

// WithServiceThreadCore sets the core index to be used for running DAOS service threads.
func (c *Config) WithServiceThreadCore(idx int) *Config {
c.ServiceThreadCore = idx
c.ServiceThreadCore = &idx
return c
}

Expand Down
8 changes: 0 additions & 8 deletions src/control/server/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -338,14 +338,6 @@ func (ei *EngineInstance) setHugepageSz(hpSizeMb int) {
ei.runner.GetConfig().HugepageSz = hpSizeMb
}

// setTargetCount updates target count in engine config.
func (ei *EngineInstance) setTargetCount(numTargets int) {
ei.Lock()
defer ei.Unlock()

ei.runner.GetConfig().TargetCount = numTargets
}

// GetTargetCount returns the target count set for this instance.
func (ei *EngineInstance) GetTargetCount() int {
ei.RLock()
Expand Down
6 changes: 0 additions & 6 deletions src/control/server/instance_exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,6 @@ func (ei *EngineInstance) finishStartup(ctx context.Context, ready *srvpb.Notify
if err := ei.handleReady(ctx, ready); err != nil {
return err
}
// update engine target count to reflect allocated number of targets, not number requested
// when starting
// NOTE: Engine mem_size passed on engine invocation is based on the number of targets
// requested in config so if number of targets allocated doesn't match the number of
// targets requested the mem_size value may be inappropriate.
ei.setTargetCount(int(ready.GetNtgts()))

ei.ready.SetTrue()

Expand Down
Loading

0 comments on commit f16a7dd

Please sign in to comment.