Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

client: refactor cpuset manager initialization #14230

Merged
merged 1 commit into from
Aug 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/14230.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
client: Fixed a bug where cpuset initialization would not work on first agent startup
```
24 changes: 3 additions & 21 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import (
"net/rpc"
"os"
"path/filepath"
"runtime"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -395,7 +394,7 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie
invalidAllocs: make(map[string]struct{}),
serversContactedCh: make(chan struct{}),
serversContactedOnce: sync.Once{},
cpusetManager: cgutil.CreateCPUSetManager(cfg.CgroupParent, logger),
cpusetManager: cgutil.CreateCPUSetManager(cfg.CgroupParent, cfg.ReservableCores, logger),
getter: getter.NewGetter(cfg.Artifact),
EnterpriseClient: newEnterpriseClient(logger),
}
Expand Down Expand Up @@ -689,25 +688,8 @@ func (c *Client) init() error {
"reserved", reserved,
)

// Ensure cgroups are created on linux platform
if runtime.GOOS == "linux" && c.cpusetManager != nil {
// use the client configuration for reservable_cores if set
cores := conf.ReservableCores
if len(cores) == 0 {
// otherwise lookup the effective cores from the parent cgroup
cores, err = cgutil.GetCPUsFromCgroup(conf.CgroupParent)
if err != nil {
c.logger.Warn("failed to lookup cpuset from cgroup parent, and not set as reservable_cores", "parent", conf.CgroupParent)
// will continue with a disabled cpuset manager
}
}
if cpuErr := c.cpusetManager.Init(cores); cpuErr != nil {
// If the client cannot initialize the cgroup then reserved cores will not be reported and the cpuset manager
// will be disabled. this is common when running in dev mode under a non-root user for example.
c.logger.Warn("failed to initialize cpuset cgroup subsystem, cpuset management disabled", "error", cpuErr)
c.cpusetManager = new(cgutil.NoopCpusetManager)
}
}
// startup the CPUSet manager
c.cpusetManager.Init()

// setup the check store
c.checkStore = checkstore.NewStore(c.logger, c.stateDB)
Expand Down
6 changes: 4 additions & 2 deletions client/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
trstate "github.com/hashicorp/nomad/client/allocrunner/taskrunner/state"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/fingerprint"
"github.com/hashicorp/nomad/client/lib/cgutil"
regMock "github.com/hashicorp/nomad/client/serviceregistration/mock"
cstate "github.com/hashicorp/nomad/client/state"
"github.com/hashicorp/nomad/command/agent/consul"
Expand Down Expand Up @@ -736,8 +737,9 @@ func TestClient_Init(t *testing.T) {
config.Node = mock.Node()

client := &Client{
config: config,
logger: testlog.HCLogger(t),
config: config,
logger: testlog.HCLogger(t),
cpusetManager: new(cgutil.NoopCpusetManager),
}

if err := client.init(); err != nil {
Expand Down
20 changes: 13 additions & 7 deletions client/lib/cgutil/cgutil_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,25 @@ var UseV2 = cgroups.IsCgroup2UnifiedMode()
// will create cgroups. If parent is not set, an appropriate name for the version
// of cgroups will be used.
func GetCgroupParent(parent string) string {
if UseV2 {
return getParentV2(parent)
switch {
case parent != "":
return parent
case UseV2:
return DefaultCgroupParentV2
default:
return DefaultCgroupV1Parent
}
return getParentV1(parent)
}

// CreateCPUSetManager creates a V1 or V2 CpusetManager depending on system configuration.
func CreateCPUSetManager(parent string, logger hclog.Logger) CpusetManager {
func CreateCPUSetManager(parent string, reservable []uint16, logger hclog.Logger) CpusetManager {
parent = GetCgroupParent(parent) // use appropriate default parent if not set in client config
if UseV2 {
return NewCpusetManagerV2(parent, logger.Named("cpuset.v2"))
switch {
case UseV2:
return NewCpusetManagerV2(parent, reservable, logger.Named("cpuset.v2"))
default:
return NewCpusetManagerV1(parent, reservable, logger.Named("cpuset.v1"))
}
return NewCpusetManagerV1(parent, logger.Named("cpuset.v1"))
}

// GetCPUsFromCgroup gets the effective cpuset value for the given cgroup.
Expand Down
19 changes: 11 additions & 8 deletions client/lib/cgutil/cgutil_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/hashicorp/nomad/helper/uuid"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -58,19 +59,21 @@ func TestUtil_CreateCPUSetManager(t *testing.T) {
t.Run("v1", func(t *testing.T) {
testutil.CgroupsCompatibleV1(t)
parent := "/" + uuid.Short()
manager := CreateCPUSetManager(parent, logger)
err := manager.Init([]uint16{0})
require.NoError(t, err)
require.NoError(t, cgroups.RemovePath(filepath.Join(CgroupRoot, parent)))
manager := CreateCPUSetManager(parent, []uint16{0}, logger)
manager.Init()
_, ok := manager.(*cpusetManagerV1)
must.True(t, ok)
must.NoError(t, cgroups.RemovePath(filepath.Join(CgroupRoot, parent)))
})

t.Run("v2", func(t *testing.T) {
testutil.CgroupsCompatibleV2(t)
parent := uuid.Short() + ".slice"
manager := CreateCPUSetManager(parent, logger)
err := manager.Init([]uint16{0})
require.NoError(t, err)
require.NoError(t, cgroups.RemovePath(filepath.Join(CgroupRoot, parent)))
manager := CreateCPUSetManager(parent, []uint16{0}, logger)
manager.Init()
_, ok := manager.(*cpusetManagerV2)
must.True(t, ok)
must.NoError(t, cgroups.RemovePath(filepath.Join(CgroupRoot, parent)))
})
}

Expand Down
2 changes: 1 addition & 1 deletion client/lib/cgutil/cgutil_noop.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ const (
var UseV2 = false

// CreateCPUSetManager creates a no-op CpusetManager for non-Linux operating systems.
func CreateCPUSetManager(string, hclog.Logger) CpusetManager {
func CreateCPUSetManager(string, []uint16, hclog.Logger) CpusetManager {
return new(NoopCpusetManager)
}

Expand Down
10 changes: 4 additions & 6 deletions client/lib/cgutil/cpuset_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,9 @@ const (

// CpusetManager is used to setup cpuset cgroups for each task.
type CpusetManager interface {
// Init should be called with the initial set of reservable cores before any
// allocations are managed. Ensures the parent cgroup exists and proper permissions
// are available for managing cgroups.
Init([]uint16) error
// Init should be called before the client starts running allocations. This
// is where the cpuset manager should start doing background operations.
Init()

// AddAlloc adds an allocation to the manager
AddAlloc(alloc *structs.Allocation)
Expand All @@ -36,8 +35,7 @@ type CpusetManager interface {

type NoopCpusetManager struct{}

func (n NoopCpusetManager) Init([]uint16) error {
return nil
func (n NoopCpusetManager) Init() {
}

func (n NoopCpusetManager) AddAlloc(alloc *structs.Allocation) {
Expand Down
92 changes: 43 additions & 49 deletions client/lib/cgutil/cpuset_manager_v1.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,52 @@ const (
)

// NewCpusetManagerV1 creates a CpusetManager compatible with cgroups.v1
func NewCpusetManagerV1(cgroupParent string, logger hclog.Logger) CpusetManager {
func NewCpusetManagerV1(cgroupParent string, _ []uint16, logger hclog.Logger) CpusetManager {
if cgroupParent == "" {
cgroupParent = DefaultCgroupV1Parent
}

cgroupParentPath, err := GetCgroupPathHelperV1("cpuset", cgroupParent)
if err != nil {
logger.Warn("failed to get cgroup path; disable cpuset management", "error", err)
return new(NoopCpusetManager)
}

// ensures that shared cpuset exists and that the cpuset values are copied from the parent if created
if err = cpusetEnsureParentV1(filepath.Join(cgroupParentPath, SharedCpusetCgroupName)); err != nil {
logger.Warn("failed to ensure cgroup parent exists; disable cpuset management", "error", err)
return new(NoopCpusetManager)
}

parentCpus, parentMems, err := getCpusetSubsystemSettingsV1(cgroupParentPath)
if err != nil {
logger.Warn("failed to detect parent cpuset settings; disable cpuset management", "error", err)
return new(NoopCpusetManager)
}

parentCpuset, err := cpuset.Parse(parentCpus)
if err != nil {
logger.Warn("failed to parse parent cpuset.cpus setting; disable cpuset management", "error", err)
return new(NoopCpusetManager)
}

// ensure the reserved cpuset exists, but only copy the mems from the parent if creating the cgroup
if err = os.Mkdir(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), 0755); err != nil {
logger.Warn("failed to ensure reserved cpuset.cpus interface exists; disable cpuset management", "error", err)
return new(NoopCpusetManager)
}

if err = cgroups.WriteFile(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), "cpuset.mems", parentMems); err != nil {
logger.Warn("failed to ensure reserved cpuset.mems interface exists; disable cpuset management", "error", err)
return new(NoopCpusetManager)
}

return &cpusetManagerV1{
cgroupParent: cgroupParent,
cgroupInfo: map[string]allocTaskCgroupInfo{},
logger: logger,
parentCpuset: parentCpuset,
cgroupParent: cgroupParent,
cgroupParentPath: cgroupParentPath,
cgroupInfo: map[string]allocTaskCgroupInfo{},
logger: logger,
}
}

Expand Down Expand Up @@ -140,48 +178,11 @@ type allocTaskCgroupInfo map[string]*TaskCgroupInfo
// Init checks that the cgroup parent and expected child cgroups have been created
// If the cgroup parent is set to /nomad then this will ensure that the /nomad/shared
// cgroup is initialized.
func (c *cpusetManagerV1) Init(_ []uint16) error {
cgroupParentPath, err := GetCgroupPathHelperV1("cpuset", c.cgroupParent)
if err != nil {
return err
}
c.cgroupParentPath = cgroupParentPath

// ensures that shared cpuset exists and that the cpuset values are copied from the parent if created
if err := cpusetEnsureParentV1(filepath.Join(cgroupParentPath, SharedCpusetCgroupName)); err != nil {
return err
}

parentCpus, parentMems, err := getCpusetSubsystemSettingsV1(cgroupParentPath)
if err != nil {
return fmt.Errorf("failed to detect parent cpuset settings: %v", err)
}
c.parentCpuset, err = cpuset.Parse(parentCpus)
if err != nil {
return fmt.Errorf("failed to parse parent cpuset.cpus setting: %v", err)
}

// ensure the reserved cpuset exists, but only copy the mems from the parent if creating the cgroup
if err := os.Mkdir(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), 0755); err == nil {
// cgroup created, leave cpuset.cpus empty but copy cpuset.mems from parent
if err != nil {
return err
}
} else if !os.IsExist(err) {
return err
}

if err := cgroups.WriteFile(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), "cpuset.mems", parentMems); err != nil {
return err
}

func (c *cpusetManagerV1) Init() {
c.doneCh = make(chan struct{})
c.signalCh = make(chan struct{})

c.logger.Info("initialized cpuset cgroup manager", "parent", c.cgroupParent, "cpuset", c.parentCpuset.String())

go c.reconcileLoop()
return nil
}

func (c *cpusetManagerV1) reconcileLoop() {
Expand Down Expand Up @@ -339,13 +340,6 @@ func getCPUsFromCgroupV1(group string) ([]uint16, error) {
return stats.CPUSetStats.CPUs, nil
}

func getParentV1(parent string) string {
if parent == "" {
return DefaultCgroupV1Parent
}
return parent
}

// cpusetEnsureParentV1 makes sure that the parent directories of current
// are created and populated with the proper cpus and mems files copied
// from their respective parent. It does that recursively, starting from
Expand Down
15 changes: 5 additions & 10 deletions client/lib/cgutil/cpuset_manager_v1_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import (
"github.com/stretchr/testify/require"
)

func tmpCpusetManagerV1(t *testing.T) (manager *cpusetManagerV1, cleanup func()) {
func tmpCpusetManagerV1(t *testing.T) (*cpusetManagerV1, func()) {
mount, err := FindCgroupMountpointDir()
if err != nil || mount == "" {
t.Skipf("Failed to find cgroup mount: %v %v", mount, err)
Expand All @@ -25,15 +25,10 @@ func tmpCpusetManagerV1(t *testing.T) (manager *cpusetManagerV1, cleanup func())
parent := "/gotest-" + uuid.Short()
require.NoError(t, cpusetEnsureParentV1(parent))

manager = &cpusetManagerV1{
cgroupParent: parent,
cgroupInfo: map[string]allocTaskCgroupInfo{},
logger: testlog.HCLogger(t),
}

parentPath, err := GetCgroupPathHelperV1("cpuset", parent)
require.NoError(t, err)

manager := NewCpusetManagerV1(parent, nil, testlog.HCLogger(t)).(*cpusetManagerV1)
return manager, func() { require.NoError(t, cgroups.RemovePaths(map[string]string{"cpuset": parentPath})) }
}

Expand All @@ -42,7 +37,7 @@ func TestCpusetManager_V1_Init(t *testing.T) {

manager, cleanup := tmpCpusetManagerV1(t)
defer cleanup()
require.NoError(t, manager.Init(nil))
manager.Init()

require.DirExists(t, filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName))
require.FileExists(t, filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus"))
Expand All @@ -59,7 +54,7 @@ func TestCpusetManager_V1_AddAlloc_single(t *testing.T) {

manager, cleanup := tmpCpusetManagerV1(t)
defer cleanup()
require.NoError(t, manager.Init(nil))
manager.Init()

alloc := mock.Alloc()
// reserve just one core (the 0th core, which probably exists)
Expand Down Expand Up @@ -114,7 +109,7 @@ func TestCpusetManager_V1_RemoveAlloc(t *testing.T) {

manager, cleanup := tmpCpusetManagerV1(t)
defer cleanup()
require.NoError(t, manager.Init(nil))
manager.Init()

alloc1 := mock.Alloc()
alloc1Cpuset := cpuset.New(manager.parentCpuset.ToSlice()[0])
Expand Down
Loading