Skip to content

Commit

Permalink
Add job containers package
Browse files Browse the repository at this point in the history
* Add `JobContainer` and `JobProcess` types as the two types to represent a job container
and a process in a job container.
* Add logic to find the executable being asked to run for a job container.
* Logic to launch the container as specific user.
* Logic to mount the containers scratch space on the host to a directory.
* Small subset of tests added to jobobject package

Signed-off-by: Daniel Canter <[email protected]>
  • Loading branch information
dcantah committed Mar 8, 2021
1 parent 60a28f3 commit 6321a7a
Show file tree
Hide file tree
Showing 23 changed files with 1,811 additions and 121 deletions.
197 changes: 110 additions & 87 deletions internal/hcsoci/hcsdoc_wcow.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (

"github.com/Microsoft/hcsshim/internal/layers"
"github.com/Microsoft/hcsshim/internal/log"
"github.com/Microsoft/hcsshim/internal/logfields"
"github.com/Microsoft/hcsshim/internal/oci"
"github.com/Microsoft/hcsshim/internal/processorinfo"
"github.com/Microsoft/hcsshim/internal/schema1"
Expand All @@ -20,6 +19,7 @@ import (
"github.com/Microsoft/hcsshim/internal/uvmfolder"
"github.com/Microsoft/hcsshim/internal/wclayer"
"github.com/Microsoft/hcsshim/osversion"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)

Expand Down Expand Up @@ -85,6 +85,55 @@ func createMountsConfig(ctx context.Context, coi *createOptionsInternal) (*mount
return &config, nil
}

// ConvertCPULimits handles the logic of converting and validating the containers CPU limits
// specified in the OCI spec to what HCS expects.
//
// `cid` is the container's ID.
//
// `vmid` is the Utility VM's ID if the container we're constructing is going to belong to
// one.
//
// `spec` is the OCI spec for the container.
//
// `maxCPUCount` is the maximum cpu count allowed for the container. This value should
// be the number of processors on the host, or in the case of a hypervisor isolated container
// the number of processors assigned to the guest/Utility VM.
//
// Returns the cpu count, cpu limit, and cpu weight in this order. Returns an error if more than one of
// cpu count, cpu limit, or cpu weight was specified in the OCI spec as they are mutually
// exclusive.
func ConvertCPULimits(ctx context.Context, cid string, spec *specs.Spec, maxCPUCount int32) (int32, int32, int32, error) {
cpuNumSet := 0
cpuCount := oci.ParseAnnotationsCPUCount(ctx, spec, oci.AnnotationContainerProcessorCount, 0)
if cpuCount > 0 {
cpuNumSet++
}

cpuLimit := oci.ParseAnnotationsCPULimit(ctx, spec, oci.AnnotationContainerProcessorLimit, 0)
if cpuLimit > 0 {
cpuNumSet++
}

cpuWeight := oci.ParseAnnotationsCPUWeight(ctx, spec, oci.AnnotationContainerProcessorWeight, 0)
if cpuWeight > 0 {
cpuNumSet++
}

if cpuNumSet > 1 {
return 0, 0, 0, fmt.Errorf("invalid spec - Windows Container CPU Count: '%d', Limit: '%d', and Weight: '%d' are mutually exclusive", cpuCount, cpuLimit, cpuWeight)
} else if cpuNumSet == 1 {
if cpuCount > maxCPUCount {
log.G(ctx).WithFields(logrus.Fields{
"cid": cid,
"requested": cpuCount,
"assigned": maxCPUCount,
}).Warn("Changing user requested CPUCount to current number of processors")
cpuCount = maxCPUCount
}
}
return cpuCount, cpuLimit, cpuWeight, nil
}

// createWindowsContainerDocument creates documents for passing to HCS or GCS to create
// a container, both hosted and process isolated. It creates both v1 and v2
// container objects, WCOW only. The containers storage should have been mounted already.
Expand Down Expand Up @@ -122,100 +171,74 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter
v2Container.GuestOs = &hcsschema.GuestOs{HostName: coi.Spec.Hostname}
}

// CPU Resources
cpuNumSet := 0
cpuCount := oci.ParseAnnotationsCPUCount(ctx, coi.Spec, oci.AnnotationContainerProcessorCount, 0)
if cpuCount > 0 {
cpuNumSet++
}
var (
uvmCPUCount int32
hostCPUCount = processorinfo.ProcessorCount()
maxCPUCount = hostCPUCount
)

cpuLimit := oci.ParseAnnotationsCPULimit(ctx, coi.Spec, oci.AnnotationContainerProcessorLimit, 0)
if cpuLimit > 0 {
cpuNumSet++
if coi.HostingSystem != nil {
uvmCPUCount = coi.HostingSystem.ProcessorCount()
maxCPUCount = uvmCPUCount
}

cpuWeight := oci.ParseAnnotationsCPUWeight(ctx, coi.Spec, oci.AnnotationContainerProcessorWeight, 0)
if cpuWeight > 0 {
cpuNumSet++
cpuCount, cpuLimit, cpuWeight, err := ConvertCPULimits(ctx, coi.ID, coi.Spec, maxCPUCount)
if err != nil {
return nil, nil, err
}

if cpuNumSet > 1 {
return nil, nil, fmt.Errorf("invalid spec - Windows Process Container CPU Count: '%d', Limit: '%d', and Weight: '%d' are mutually exclusive", cpuCount, cpuLimit, cpuWeight)
} else if cpuNumSet == 1 {
hostCPUCount := processorinfo.ProcessorCount()
// usableCPUCount is the number of processors present in whatever environment
// the container is running in. It will be either the processor count of the
// host, or the UVM, based on if the container is process or hypervisor isolated.
usableCPUCount := hostCPUCount
var uvmCPUCount int32
if coi.HostingSystem != nil {
uvmCPUCount = coi.HostingSystem.ProcessorCount()
usableCPUCount = uvmCPUCount
}
if cpuCount > usableCPUCount {
l := log.G(ctx).WithField(logfields.ContainerID, coi.ID)
if coi.HostingSystem != nil {
l.Data[logfields.UVMID] = coi.HostingSystem.ID()
}
l.WithFields(logrus.Fields{
"requested": cpuCount,
"assigned": usableCPUCount,
}).Warn("Changing user requested CPUCount to current number of processors")
cpuCount = usableCPUCount
}
if coi.ScaleCPULimitsToSandbox && cpuLimit > 0 && coi.HostingSystem != nil {
// When ScaleCPULimitsToSandbox is set and we are running in a UVM, we assume
// the CPU limit has been calculated based on the number of processors on the
// host, and instead re-calculate it based on the number of processors in the UVM.
//
// This is needed to work correctly with assumptions kubelet makes when computing
// the CPU limit value:
// - kubelet thinks about CPU limits in terms of millicores, which are 1000ths of
// cores. So if 2000 millicores are assigned, the container can use 2 processors.
// - In Windows, the job object CPU limit is global across all processors on the
// system, and is represented as a fraction out of 10000. In this model, a limit
// of 10000 means the container can use all processors fully, regardless of how
// many processors exist on the system.
// - To convert the millicores value into the job object limit, kubelet divides
// the millicores by the number of CPU cores on the host. This causes problems
// when running inside a UVM, as the UVM may have a different number of processors
// than the host system.
//
// To work around this, we undo the division by the number of host processors, and
// re-do the division based on the number of processors inside the UVM. This will
// give the correct value based on the actual number of millicores that the kubelet
// wants the container to have.
//
// Kubelet formula to compute CPU limit:
// cpuMaximum := 10000 * cpuLimit.MilliValue() / int64(runtime.NumCPU()) / 1000
newCPULimit := cpuLimit * hostCPUCount / uvmCPUCount
// We only apply bounds here because we are calculating the CPU limit ourselves,
// and this matches the kubelet behavior where they also bound the CPU limit by [1, 10000].
// In the case where we use the value directly from the user, we don't alter it to fit
// within the bounds, but just let the platform throw an error if it is invalid.
if newCPULimit < 1 {
newCPULimit = 1
} else if newCPULimit > 10000 {
newCPULimit = 10000
}
log.G(ctx).WithFields(logrus.Fields{
"hostCPUCount": hostCPUCount,
"uvmCPUCount": uvmCPUCount,
"oldCPULimit": cpuLimit,
"newCPULimit": newCPULimit,
}).Info("rescaling CPU limit for UVM sandbox")
cpuLimit = newCPULimit
if coi.HostingSystem != nil && coi.ScaleCPULimitsToSandbox && cpuLimit > 0 {
// When ScaleCPULimitsToSandbox is set and we are running in a UVM, we assume
// the CPU limit has been calculated based on the number of processors on the
// host, and instead re-calculate it based on the number of processors in the UVM.
//
// This is needed to work correctly with assumptions kubelet makes when computing
// the CPU limit value:
// - kubelet thinks about CPU limits in terms of millicores, which are 1000ths of
// cores. So if 2000 millicores are assigned, the container can use 2 processors.
// - In Windows, the job object CPU limit is global across all processors on the
// system, and is represented as a fraction out of 10000. In this model, a limit
// of 10000 means the container can use all processors fully, regardless of how
// many processors exist on the system.
// - To convert the millicores value into the job object limit, kubelet divides
// the millicores by the number of CPU cores on the host. This causes problems
// when running inside a UVM, as the UVM may have a different number of processors
// than the host system.
//
// To work around this, we undo the division by the number of host processors, and
// re-do the division based on the number of processors inside the UVM. This will
// give the correct value based on the actual number of millicores that the kubelet
// wants the container to have.
//
// Kubelet formula to compute CPU limit:
// cpuMaximum := 10000 * cpuLimit.MilliValue() / int64(runtime.NumCPU()) / 1000
newCPULimit := cpuLimit * hostCPUCount / uvmCPUCount
// We only apply bounds here because we are calculating the CPU limit ourselves,
// and this matches the kubelet behavior where they also bound the CPU limit by [1, 10000].
// In the case where we use the value directly from the user, we don't alter it to fit
// within the bounds, but just let the platform throw an error if it is invalid.
if newCPULimit < 1 {
newCPULimit = 1
} else if newCPULimit > 10000 {
newCPULimit = 10000
}
log.G(ctx).WithFields(logrus.Fields{
"hostCPUCount": hostCPUCount,
"uvmCPUCount": uvmCPUCount,
"oldCPULimit": cpuLimit,
"newCPULimit": newCPULimit,
}).Info("rescaling CPU limit for UVM sandbox")
cpuLimit = newCPULimit
}

v1.ProcessorCount = uint32(cpuCount)
v1.ProcessorMaximum = int64(cpuLimit)
v1.ProcessorWeight = uint64(cpuWeight)
v1.ProcessorCount = uint32(cpuCount)
v1.ProcessorMaximum = int64(cpuLimit)
v1.ProcessorWeight = uint64(cpuWeight)

v2Container.Processor = &hcsschema.Processor{
Count: cpuCount,
Maximum: cpuLimit,
Weight: cpuWeight,
}
v2Container.Processor = &hcsschema.Processor{
Count: cpuCount,
Maximum: cpuLimit,
Weight: cpuWeight,
}

// Memory Resources
Expand Down
28 changes: 28 additions & 0 deletions internal/jobcontainers/cpurate_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package jobcontainers

import (
"testing"
)

func assertEqual(t *testing.T, a uint32, b uint32) {
if a != b {
t.Fatalf("%d != %d", a, b)
}
}

func TestJobCPURate(t *testing.T) {
rate := calculateJobCPURate(10, 1)
assertEqual(t, rate, 1000)

rate = calculateJobCPURate(10, 5)
assertEqual(t, rate, 5000)

rate = calculateJobCPURate(20, 5)
assertEqual(t, rate, 2500)

rate = calculateJobCPURate(1, 1)
assertEqual(t, rate, 10000)

rate = calculateJobCPURate(1, 0)
assertEqual(t, rate, 1)
}
46 changes: 46 additions & 0 deletions internal/jobcontainers/env.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package jobcontainers

import (
"unicode/utf16"
"unsafe"

"github.com/pkg/errors"
"golang.org/x/sys/windows"
)

// defaultEnvBlock will return a new environment block in the context of the user token
// `token`.
//
// This is almost a direct copy of the go stdlib implementation with some slight changes
// to force a valid token to be passed.
// https://github.com/golang/go/blob/f21be2fdc6f1becdbed1592ea0b245cdeedc5ac8/src/internal/syscall/execenv/execenv_windows.go#L24
func defaultEnvBlock(token windows.Token) (env []string, err error) {
if token == 0 {
return nil, errors.New("invalid token for creating environment block")
}

var block *uint16
if err := windows.CreateEnvironmentBlock(&block, token, false); err != nil {
return nil, err
}
defer windows.DestroyEnvironmentBlock(block)

blockp := uintptr(unsafe.Pointer(block))
for {
// find NUL terminator
end := unsafe.Pointer(blockp)
for *(*uint16)(end) != 0 {
end = unsafe.Pointer(uintptr(end) + 2)
}

n := (uintptr(end) - uintptr(unsafe.Pointer(blockp))) / 2
if n == 0 {
// environment block ends with empty string
break
}
entry := (*[(1 << 30) - 1]uint16)(unsafe.Pointer(blockp))[:n:n]
env = append(env, string(utf16.Decode(entry)))
blockp += 2 * (uintptr(len(entry)) + 1)
}
return
}
Loading

0 comments on commit 6321a7a

Please sign in to comment.