Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-15739 engine: Add multi-socket support #14234

Merged
merged 22 commits into from
May 2, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
c198e6d
DAOS-15739 engine: Add multi-socket support
jolivier23 Apr 22, 2024
b706c29
minor fix
jolivier23 Apr 23, 2024
610a9a8
Remove DAOS_MULTISOCKET envirable
jolivier23 Apr 24, 2024
4b1730c
Add DAOS_FORWARD_SELF
jolivier23 Apr 24, 2024
e153977
Merge branch 'master' into jvolivie/add_multisocket
jolivier23 Apr 24, 2024
e86d14d
Skip-build-ubuntu20-rpm: true
jolivier23 Apr 24, 2024
34459d0
Merge branch 'master' into jvolivie/add_multisocket
jolivier23 Apr 25, 2024
89a7e24
Fix a bug with dss_core_offset
jolivier23 Apr 25, 2024
00fcc86
Merge branch 'master' into jvolivie/add_multisocket
jolivier23 Apr 30, 2024
e854c9f
Merge branch 'master' into jvolivie/add_multisocket
jolivier23 May 1, 2024
62d6591
Fix first_core handling in control plane so it
jolivier23 May 1, 2024
043c5dc
Avoid invalid assertion
jolivier23 May 1, 2024
e3e3a5a
autoconfig shouldn't be setting both pinned_numa_node and first_core
jolivier23 May 1, 2024
5e6a225
Fix up some configs to avoid setting first_core
jolivier23 May 1, 2024
a2cdd98
Revert "Fix up some configs to avoid setting first_core"
jolivier23 May 1, 2024
9dfe48f
Revert "autoconfig shouldn't be setting both pinned_numa_node and fir…
jolivier23 May 1, 2024
b61c48e
Allow first_core: 0 to be set with pinned_numa_node
jolivier23 May 1, 2024
ef9c4a5
Features: control
jolivier23 May 1, 2024
b8420f9
Print which setting is superfluous
jolivier23 May 1, 2024
a0a86df
Set first core to nil
jolivier23 May 1, 2024
e0ccb53
Add one comment
jolivier23 May 1, 2024
33c7d85
Features: control
jolivier23 May 2, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/admin/env_variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ Environment variables in this section only apply to the server side.
|DAOS\_DTX\_AGG\_THD\_AGE|DTX aggregation age threshold in seconds. The valid range is [210, 1830]. The default value is 630.|
|DAOS\_DTX\_RPC\_HELPER\_THD|DTX RPC helper threshold. The valid range is [18, unlimited). The default value is 513.|
|DAOS\_DTX\_BATCHED\_ULT\_MAX|The max count of DTX batched commit ULTs. The valid range is [0, unlimited). 0 means to commit DTX synchronously. The default value is 32.|
|DAOS\_FORWARD\_NEIGHBOR|Set to enable I/O forwarding on neighbor xstream in the absence of helper threads.|

## Server and Client environment variables

Expand Down
8 changes: 4 additions & 4 deletions src/control/cmd/daos_server/start.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// (C) Copyright 2019-2023 Intel Corporation.
// (C) Copyright 2019-2024 Intel Corporation.
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//
Expand All @@ -26,7 +26,7 @@ type startCmd struct {
Modules *string `short:"m" long:"modules" description:"List of server modules to load"`
Targets uint16 `short:"t" long:"targets" description:"Number of targets to use (default use all cores)"`
NrXsHelpers *uint16 `short:"x" long:"xshelpernr" description:"Number of helper XS per VOS target"`
FirstCore uint16 `short:"f" long:"firstcore" default:"0" description:"Index of first core for service thread"`
FirstCore *uint16 `short:"f" long:"firstcore" description:"Index of first core for service thread"`
Group string `short:"g" long:"group" description:"Server group name"`
SocketDir string `short:"d" long:"socket_dir" description:"Location for all daos_server & daos_engine sockets"`
Insecure bool `short:"i" long:"insecure" description:"Allow for insecure connections"`
Expand Down Expand Up @@ -73,8 +73,8 @@ func (cmd *startCmd) setCLIOverrides() error {
if cmd.NrXsHelpers != nil {
srv.WithHelperStreamCount(int(*cmd.NrXsHelpers))
}
if cmd.FirstCore > 0 {
srv.WithServiceThreadCore(int(cmd.FirstCore))
if cmd.FirstCore != nil {
srv.WithServiceThreadCore(int(*cmd.FirstCore))
}
}

Expand Down
2 changes: 0 additions & 2 deletions src/control/cmd/dmg/auto_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,6 @@ transport_config:
engines:
- targets: 12
nr_xs_helpers: 2
first_core: 0
log_file: /tmp/daos_engine.0.log
storage:
- class: dcpm
Expand All @@ -599,7 +598,6 @@ engines:
pinned_numa_node: 0
- targets: 6
nr_xs_helpers: 0
first_core: 0
log_file: /tmp/daos_engine.1.log
storage:
- class: dcpm
Expand Down
4 changes: 0 additions & 4 deletions src/control/lib/support/log_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,6 @@ transport_config:
engines:
- targets: 12
nr_xs_helpers: 2
first_core: 0
log_file: ` + engineLog0 + `
storage:
- class: dcpm
Expand All @@ -666,7 +665,6 @@ engines:
pinned_numa_node: 0
- targets: 6
nr_xs_helpers: 0
first_core: 0
log_file: ` + engineLog1 + `
storage:
- class: dcpm
Expand Down Expand Up @@ -710,7 +708,6 @@ transport_config:
engines:
- targets: 12
nr_xs_helpers: 2
first_core: 0
log_file: ` + targetTestDir + ` /dir1/invalid_engine0.log
storage:
- class: dcpm
Expand All @@ -729,7 +726,6 @@ engines:
pinned_numa_node: 0
- targets: 6
nr_xs_helpers: 0
first_core: 0
log_file: ` + targetTestDir + ` /dir1/invalid_engine1.log
storage:
- class: dcpm
Expand Down
2 changes: 1 addition & 1 deletion src/control/server/config/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -869,7 +869,7 @@ func (cfg *Server) SetEngineAffinities(log logging.Logger, affSources ...EngineA
// Detect legacy mode by checking if first_core is being used.
legacyMode := false
for _, engineCfg := range cfg.Engines {
if engineCfg.ServiceThreadCore != 0 {
if engineCfg.ServiceThreadCore != nil {
legacyMode = true
break
}
Expand Down
10 changes: 5 additions & 5 deletions src/control/server/engine/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ type Config struct {
Modules string `yaml:"modules,omitempty" cmdLongFlag:"--modules" cmdShortFlag:"-m"`
TargetCount int `yaml:"targets,omitempty" cmdLongFlag:"--targets,nonzero" cmdShortFlag:"-t,nonzero"`
HelperStreamCount int `yaml:"nr_xs_helpers" cmdLongFlag:"--xshelpernr" cmdShortFlag:"-x"`
ServiceThreadCore int `yaml:"first_core" cmdLongFlag:"--firstcore,nonzero" cmdShortFlag:"-f,nonzero"`
ServiceThreadCore *int `yaml:"first_core,omitempty" cmdLongFlag:"--firstcore" cmdShortFlag:"-f"`
SystemName string `yaml:"-" cmdLongFlag:"--group" cmdShortFlag:"-g"`
SocketDir string `yaml:"-" cmdLongFlag:"--socket_dir" cmdShortFlag:"-d"`
LogMask string `yaml:"log_mask,omitempty" cmdEnv:"D_LOG_MASK"`
Expand Down Expand Up @@ -289,7 +289,7 @@ func (c *Config) ReadLogSubsystems() (string, error) {

// Validate ensures that the configuration meets minimum standards.
func (c *Config) Validate() error {
if c.PinnedNumaNode != nil && c.ServiceThreadCore != 0 {
if c.PinnedNumaNode != nil && c.ServiceThreadCore != nil {
return errors.New("cannot specify both pinned_numa_node and first_core")
}

Expand All @@ -302,7 +302,7 @@ func (c *Config) Validate() error {
if c.HelperStreamCount < 0 {
return errNegative("helper stream count")
}
if c.ServiceThreadCore < 0 {
if c.ServiceThreadCore != nil && *c.ServiceThreadCore < 0 {
return errNegative("service thread core index")
}
if c.MemSize < 0 {
Expand Down Expand Up @@ -370,7 +370,7 @@ func IsNUMAMismatch(err error) bool {
// SetNUMAAffinity sets the NUMA affinity for the engine,
// if not already set in the configuration.
func (c *Config) SetNUMAAffinity(node uint) error {
if c.PinnedNumaNode != nil && c.ServiceThreadCore != 0 {
if c.PinnedNumaNode != nil && c.ServiceThreadCore != nil {
return errors.New("cannot set both NUMA node and service core")
}

Expand Down Expand Up @@ -612,7 +612,7 @@ func (c *Config) WithHelperStreamCount(count int) *Config {

// WithServiceThreadCore sets the core index to be used for running DAOS service threads.
func (c *Config) WithServiceThreadCore(idx int) *Config {
c.ServiceThreadCore = idx
c.ServiceThreadCore = &idx
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

while we are changing things it might make sense to change ServiceThreadCore to *uint

return c
}

Expand Down
Loading
Loading