From 6321a7ae93db4e3249ae7c64714327e34b2b493f Mon Sep 17 00:00:00 2001 From: Daniel Canter Date: Fri, 4 Dec 2020 17:29:35 -0800 Subject: [PATCH] Add job containers package * Add `JobContainer` and `JobProcess` types as the two types to represent a job container and a process in a job container. * Add logic to find the executable being asked to run for a job container. * Logic to launch the container as specific user. * Logic to mount the containers scratch space on the host to a directory. * Small subset of tests added to jobobject package Signed-off-by: Daniel Canter --- internal/hcsoci/hcsdoc_wcow.go | 197 +++++----- internal/jobcontainers/cpurate_test.go | 28 ++ internal/jobcontainers/env.go | 46 +++ internal/jobcontainers/jobcontainer.go | 449 +++++++++++++++++++++++ internal/jobcontainers/logon.go | 60 +++ internal/jobcontainers/oci.go | 72 ++++ internal/jobcontainers/path.go | 228 ++++++++++++ internal/jobcontainers/path_test.go | 53 +++ internal/jobcontainers/process.go | 233 ++++++++++++ internal/jobcontainers/storage.go | 93 +++++ internal/jobobject/jobobject.go | 27 +- internal/jobobject/jobobject_test.go | 237 +++++++++++- internal/jobobject/limits.go | 13 +- internal/layers/layers.go | 2 +- internal/oci/uvm.go | 5 + internal/winapi/memory.go | 16 + internal/winapi/path.go | 2 +- internal/winapi/process.go | 7 + internal/winapi/thread.go | 3 + internal/winapi/winapi.go | 2 +- internal/winapi/zsyscall_windows.go | 70 +++- test/cri-containerd/jobcontainer_test.go | 87 +++++ test/cri-containerd/main.go | 2 + 23 files changed, 1811 insertions(+), 121 deletions(-) create mode 100644 internal/jobcontainers/cpurate_test.go create mode 100644 internal/jobcontainers/env.go create mode 100644 internal/jobcontainers/jobcontainer.go create mode 100644 internal/jobcontainers/logon.go create mode 100644 internal/jobcontainers/oci.go create mode 100644 internal/jobcontainers/path.go create mode 100644 internal/jobcontainers/path_test.go create mode 100644 internal/jobcontainers/process.go create mode 100644 internal/jobcontainers/storage.go create mode 100644 internal/winapi/thread.go create mode 100644 test/cri-containerd/jobcontainer_test.go diff --git a/internal/hcsoci/hcsdoc_wcow.go b/internal/hcsoci/hcsdoc_wcow.go index 09a9150103..439740fe9b 100644 --- a/internal/hcsoci/hcsdoc_wcow.go +++ b/internal/hcsoci/hcsdoc_wcow.go @@ -11,7 +11,6 @@ import ( "github.com/Microsoft/hcsshim/internal/layers" "github.com/Microsoft/hcsshim/internal/log" - "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/oci" "github.com/Microsoft/hcsshim/internal/processorinfo" "github.com/Microsoft/hcsshim/internal/schema1" @@ -20,6 +19,7 @@ import ( "github.com/Microsoft/hcsshim/internal/uvmfolder" "github.com/Microsoft/hcsshim/internal/wclayer" "github.com/Microsoft/hcsshim/osversion" + specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" ) @@ -85,6 +85,55 @@ func createMountsConfig(ctx context.Context, coi *createOptionsInternal) (*mount return &config, nil } +// ConvertCPULimits handles the logic of converting and validating the containers CPU limits +// specified in the OCI spec to what HCS expects. +// +// `cid` is the container's ID. +// +// `vmid` is the Utility VM's ID if the container we're constructing is going to belong to +// one. +// +// `spec` is the OCI spec for the container. +// +// `maxCPUCount` is the maximum cpu count allowed for the container. This value should +// be the number of processors on the host, or in the case of a hypervisor isolated container +// the number of processors assigned to the guest/Utility VM. +// +// Returns the cpu count, cpu limit, and cpu weight in this order. Returns an error if more than one of +// cpu count, cpu limit, or cpu weight was specified in the OCI spec as they are mutually +// exclusive. +func ConvertCPULimits(ctx context.Context, cid string, spec *specs.Spec, maxCPUCount int32) (int32, int32, int32, error) { + cpuNumSet := 0 + cpuCount := oci.ParseAnnotationsCPUCount(ctx, spec, oci.AnnotationContainerProcessorCount, 0) + if cpuCount > 0 { + cpuNumSet++ + } + + cpuLimit := oci.ParseAnnotationsCPULimit(ctx, spec, oci.AnnotationContainerProcessorLimit, 0) + if cpuLimit > 0 { + cpuNumSet++ + } + + cpuWeight := oci.ParseAnnotationsCPUWeight(ctx, spec, oci.AnnotationContainerProcessorWeight, 0) + if cpuWeight > 0 { + cpuNumSet++ + } + + if cpuNumSet > 1 { + return 0, 0, 0, fmt.Errorf("invalid spec - Windows Container CPU Count: '%d', Limit: '%d', and Weight: '%d' are mutually exclusive", cpuCount, cpuLimit, cpuWeight) + } else if cpuNumSet == 1 { + if cpuCount > maxCPUCount { + log.G(ctx).WithFields(logrus.Fields{ + "cid": cid, + "requested": cpuCount, + "assigned": maxCPUCount, + }).Warn("Changing user requested CPUCount to current number of processors") + cpuCount = maxCPUCount + } + } + return cpuCount, cpuLimit, cpuWeight, nil +} + // createWindowsContainerDocument creates documents for passing to HCS or GCS to create // a container, both hosted and process isolated. It creates both v1 and v2 // container objects, WCOW only. The containers storage should have been mounted already. @@ -122,100 +171,74 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter v2Container.GuestOs = &hcsschema.GuestOs{HostName: coi.Spec.Hostname} } - // CPU Resources - cpuNumSet := 0 - cpuCount := oci.ParseAnnotationsCPUCount(ctx, coi.Spec, oci.AnnotationContainerProcessorCount, 0) - if cpuCount > 0 { - cpuNumSet++ - } + var ( + uvmCPUCount int32 + hostCPUCount = processorinfo.ProcessorCount() + maxCPUCount = hostCPUCount + ) - cpuLimit := oci.ParseAnnotationsCPULimit(ctx, coi.Spec, oci.AnnotationContainerProcessorLimit, 0) - if cpuLimit > 0 { - cpuNumSet++ + if coi.HostingSystem != nil { + uvmCPUCount = coi.HostingSystem.ProcessorCount() + maxCPUCount = uvmCPUCount } - cpuWeight := oci.ParseAnnotationsCPUWeight(ctx, coi.Spec, oci.AnnotationContainerProcessorWeight, 0) - if cpuWeight > 0 { - cpuNumSet++ + cpuCount, cpuLimit, cpuWeight, err := ConvertCPULimits(ctx, coi.ID, coi.Spec, maxCPUCount) + if err != nil { + return nil, nil, err } - if cpuNumSet > 1 { - return nil, nil, fmt.Errorf("invalid spec - Windows Process Container CPU Count: '%d', Limit: '%d', and Weight: '%d' are mutually exclusive", cpuCount, cpuLimit, cpuWeight) - } else if cpuNumSet == 1 { - hostCPUCount := processorinfo.ProcessorCount() - // usableCPUCount is the number of processors present in whatever environment - // the container is running in. It will be either the processor count of the - // host, or the UVM, based on if the container is process or hypervisor isolated. - usableCPUCount := hostCPUCount - var uvmCPUCount int32 - if coi.HostingSystem != nil { - uvmCPUCount = coi.HostingSystem.ProcessorCount() - usableCPUCount = uvmCPUCount - } - if cpuCount > usableCPUCount { - l := log.G(ctx).WithField(logfields.ContainerID, coi.ID) - if coi.HostingSystem != nil { - l.Data[logfields.UVMID] = coi.HostingSystem.ID() - } - l.WithFields(logrus.Fields{ - "requested": cpuCount, - "assigned": usableCPUCount, - }).Warn("Changing user requested CPUCount to current number of processors") - cpuCount = usableCPUCount - } - if coi.ScaleCPULimitsToSandbox && cpuLimit > 0 && coi.HostingSystem != nil { - // When ScaleCPULimitsToSandbox is set and we are running in a UVM, we assume - // the CPU limit has been calculated based on the number of processors on the - // host, and instead re-calculate it based on the number of processors in the UVM. - // - // This is needed to work correctly with assumptions kubelet makes when computing - // the CPU limit value: - // - kubelet thinks about CPU limits in terms of millicores, which are 1000ths of - // cores. So if 2000 millicores are assigned, the container can use 2 processors. - // - In Windows, the job object CPU limit is global across all processors on the - // system, and is represented as a fraction out of 10000. In this model, a limit - // of 10000 means the container can use all processors fully, regardless of how - // many processors exist on the system. - // - To convert the millicores value into the job object limit, kubelet divides - // the millicores by the number of CPU cores on the host. This causes problems - // when running inside a UVM, as the UVM may have a different number of processors - // than the host system. - // - // To work around this, we undo the division by the number of host processors, and - // re-do the division based on the number of processors inside the UVM. This will - // give the correct value based on the actual number of millicores that the kubelet - // wants the container to have. - // - // Kubelet formula to compute CPU limit: - // cpuMaximum := 10000 * cpuLimit.MilliValue() / int64(runtime.NumCPU()) / 1000 - newCPULimit := cpuLimit * hostCPUCount / uvmCPUCount - // We only apply bounds here because we are calculating the CPU limit ourselves, - // and this matches the kubelet behavior where they also bound the CPU limit by [1, 10000]. - // In the case where we use the value directly from the user, we don't alter it to fit - // within the bounds, but just let the platform throw an error if it is invalid. - if newCPULimit < 1 { - newCPULimit = 1 - } else if newCPULimit > 10000 { - newCPULimit = 10000 - } - log.G(ctx).WithFields(logrus.Fields{ - "hostCPUCount": hostCPUCount, - "uvmCPUCount": uvmCPUCount, - "oldCPULimit": cpuLimit, - "newCPULimit": newCPULimit, - }).Info("rescaling CPU limit for UVM sandbox") - cpuLimit = newCPULimit + if coi.HostingSystem != nil && coi.ScaleCPULimitsToSandbox && cpuLimit > 0 { + // When ScaleCPULimitsToSandbox is set and we are running in a UVM, we assume + // the CPU limit has been calculated based on the number of processors on the + // host, and instead re-calculate it based on the number of processors in the UVM. + // + // This is needed to work correctly with assumptions kubelet makes when computing + // the CPU limit value: + // - kubelet thinks about CPU limits in terms of millicores, which are 1000ths of + // cores. So if 2000 millicores are assigned, the container can use 2 processors. + // - In Windows, the job object CPU limit is global across all processors on the + // system, and is represented as a fraction out of 10000. In this model, a limit + // of 10000 means the container can use all processors fully, regardless of how + // many processors exist on the system. + // - To convert the millicores value into the job object limit, kubelet divides + // the millicores by the number of CPU cores on the host. This causes problems + // when running inside a UVM, as the UVM may have a different number of processors + // than the host system. + // + // To work around this, we undo the division by the number of host processors, and + // re-do the division based on the number of processors inside the UVM. This will + // give the correct value based on the actual number of millicores that the kubelet + // wants the container to have. + // + // Kubelet formula to compute CPU limit: + // cpuMaximum := 10000 * cpuLimit.MilliValue() / int64(runtime.NumCPU()) / 1000 + newCPULimit := cpuLimit * hostCPUCount / uvmCPUCount + // We only apply bounds here because we are calculating the CPU limit ourselves, + // and this matches the kubelet behavior where they also bound the CPU limit by [1, 10000]. + // In the case where we use the value directly from the user, we don't alter it to fit + // within the bounds, but just let the platform throw an error if it is invalid. + if newCPULimit < 1 { + newCPULimit = 1 + } else if newCPULimit > 10000 { + newCPULimit = 10000 } + log.G(ctx).WithFields(logrus.Fields{ + "hostCPUCount": hostCPUCount, + "uvmCPUCount": uvmCPUCount, + "oldCPULimit": cpuLimit, + "newCPULimit": newCPULimit, + }).Info("rescaling CPU limit for UVM sandbox") + cpuLimit = newCPULimit + } - v1.ProcessorCount = uint32(cpuCount) - v1.ProcessorMaximum = int64(cpuLimit) - v1.ProcessorWeight = uint64(cpuWeight) + v1.ProcessorCount = uint32(cpuCount) + v1.ProcessorMaximum = int64(cpuLimit) + v1.ProcessorWeight = uint64(cpuWeight) - v2Container.Processor = &hcsschema.Processor{ - Count: cpuCount, - Maximum: cpuLimit, - Weight: cpuWeight, - } + v2Container.Processor = &hcsschema.Processor{ + Count: cpuCount, + Maximum: cpuLimit, + Weight: cpuWeight, } // Memory Resources diff --git a/internal/jobcontainers/cpurate_test.go b/internal/jobcontainers/cpurate_test.go new file mode 100644 index 0000000000..486b42f783 --- /dev/null +++ b/internal/jobcontainers/cpurate_test.go @@ -0,0 +1,28 @@ +package jobcontainers + +import ( + "testing" +) + +func assertEqual(t *testing.T, a uint32, b uint32) { + if a != b { + t.Fatalf("%d != %d", a, b) + } +} + +func TestJobCPURate(t *testing.T) { + rate := calculateJobCPURate(10, 1) + assertEqual(t, rate, 1000) + + rate = calculateJobCPURate(10, 5) + assertEqual(t, rate, 5000) + + rate = calculateJobCPURate(20, 5) + assertEqual(t, rate, 2500) + + rate = calculateJobCPURate(1, 1) + assertEqual(t, rate, 10000) + + rate = calculateJobCPURate(1, 0) + assertEqual(t, rate, 1) +} diff --git a/internal/jobcontainers/env.go b/internal/jobcontainers/env.go new file mode 100644 index 0000000000..ed1017fbf2 --- /dev/null +++ b/internal/jobcontainers/env.go @@ -0,0 +1,46 @@ +package jobcontainers + +import ( + "unicode/utf16" + "unsafe" + + "github.com/pkg/errors" + "golang.org/x/sys/windows" +) + +// defaultEnvBlock will return a new environment block in the context of the user token +// `token`. +// +// This is almost a direct copy of the go stdlib implementation with some slight changes +// to force a valid token to be passed. +// https://github.com/golang/go/blob/f21be2fdc6f1becdbed1592ea0b245cdeedc5ac8/src/internal/syscall/execenv/execenv_windows.go#L24 +func defaultEnvBlock(token windows.Token) (env []string, err error) { + if token == 0 { + return nil, errors.New("invalid token for creating environment block") + } + + var block *uint16 + if err := windows.CreateEnvironmentBlock(&block, token, false); err != nil { + return nil, err + } + defer windows.DestroyEnvironmentBlock(block) + + blockp := uintptr(unsafe.Pointer(block)) + for { + // find NUL terminator + end := unsafe.Pointer(blockp) + for *(*uint16)(end) != 0 { + end = unsafe.Pointer(uintptr(end) + 2) + } + + n := (uintptr(end) - uintptr(unsafe.Pointer(blockp))) / 2 + if n == 0 { + // environment block ends with empty string + break + } + entry := (*[(1 << 30) - 1]uint16)(unsafe.Pointer(blockp))[:n:n] + env = append(env, string(utf16.Decode(entry))) + blockp += 2 * (uintptr(len(entry)) + 1) + } + return +} diff --git a/internal/jobcontainers/jobcontainer.go b/internal/jobcontainers/jobcontainer.go new file mode 100644 index 0000000000..c4f589357b --- /dev/null +++ b/internal/jobcontainers/jobcontainer.go @@ -0,0 +1,449 @@ +package jobcontainers + +import ( + "context" + "fmt" + "os" + "os/exec" + "regexp" + "strings" + "sync" + "syscall" + "time" + + "github.com/Microsoft/go-winio/pkg/guid" + "github.com/Microsoft/hcsshim/internal/cow" + "github.com/Microsoft/hcsshim/internal/hcs" + "github.com/Microsoft/hcsshim/internal/jobobject" + "github.com/Microsoft/hcsshim/internal/layers" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/queue" + "github.com/Microsoft/hcsshim/internal/schema1" + hcsschema "github.com/Microsoft/hcsshim/internal/schema2" + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/sys/windows" +) + +// Split arguments but ignore spaces in quotes. +// +// For example instead of: +// "\"Hello good\" morning world" --> ["\"Hello", "good\"", "morning", "world"] +// we get ["\"Hello good\"", "morning", "world"] +func splitArgs(cmdLine string) []string { + r := regexp.MustCompile(`[^\s"]+|"([^"]*)"`) + return r.FindAllString(cmdLine, -1) +} + +// Convert environment map to a slice of environment variables in the form [Key1=val1, key2=val2] +func envMapToSlice(m map[string]string) []string { + var s []string + for k, v := range m { + s = append(s, k+"="+v) + } + return s +} + +const ( + jobContainerNameFmt = "JobContainer_%s" + // Environment variable set in every process in the job detailing where the containers volume + // is mounted on the host. + sandboxMountPointEnvVar = "CONTAINER_SANDBOX_MOUNT_POINT" +) + +type initProc struct { + initDoOnce sync.Once + proc *JobProcess + initBlock chan struct{} +} + +// JobContainer represents a lightweight container composed from a job object. +type JobContainer struct { + id string + spec *specs.Spec // OCI spec used to create the container + job *jobobject.JobObject // Object representing the job object the container owns + sandboxMount string // Path to where the sandbox is mounted on the host + m sync.Mutex + closedWaitOnce sync.Once + init initProc + startTimestamp time.Time + exited chan struct{} + waitBlock chan struct{} + waitError error +} + +var _ cow.ProcessHost = &JobContainer{} +var _ cow.Container = &JobContainer{} + +func newJobContainer(id string, s *specs.Spec) *JobContainer { + return &JobContainer{ + id: id, + spec: s, + waitBlock: make(chan struct{}), + exited: make(chan struct{}), + init: initProc{initBlock: make(chan struct{})}, + } +} + +// Create creates a new JobContainer from `s`. +func Create(ctx context.Context, id string, s *specs.Spec) (_ cow.Container, err error) { + log.G(ctx).WithField("id", id).Debug("Creating job container") + + if s == nil { + return nil, errors.New("Spec must be supplied") + } + + if id == "" { + g, err := guid.NewV4() + if err != nil { + return nil, err + } + id = g.String() + } + + if err := mountLayers(ctx, s); err != nil { + return nil, errors.Wrap(err, "failed to mount container layers") + } + + volumeGUIDRegex := `^\\\\\?\\(Volume)\{{0,1}[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}(\}){0,1}\}(|\\)$` + if matched, err := regexp.MatchString(volumeGUIDRegex, s.Root.Path); !matched || err != nil { + return nil, fmt.Errorf(`invalid container spec - Root.Path '%s' must be a volume GUID path in the format '\\?\Volume{GUID}\'`, s.Root.Path) + } + if s.Root.Path[len(s.Root.Path)-1] != '\\' { + s.Root.Path += `\` // Be nice to clients and make sure well-formed for back-compat + } + + container := newJobContainer(id, s) + + // Create the job object all processes will run in. + options := &jobobject.Options{ + Name: fmt.Sprintf(jobContainerNameFmt, id), + Notifications: true, + } + job, err := jobobject.Create(ctx, options) + if err != nil { + return nil, errors.Wrap(err, "failed to create job object") + } + + // Parity with how we handle process isolated containers. We set the same flag which + // behaves the same way for a silo. + if err := job.SetTerminateOnLastHandleClose(); err != nil { + return nil, errors.Wrap(err, "failed to set terminate on last handle close on job container") + } + container.job = job + + var path string + defer func() { + if err != nil { + container.Close() + if path != "" { + removeSandboxMountPoint(ctx, path) + } + } + }() + + limits, err := specToLimits(ctx, id, s) + if err != nil { + return nil, errors.Wrap(err, "failed to convert OCI spec to job object limits") + } + + // Set resource limits on the job object based off of oci spec. + if err := job.SetResourceLimits(limits); err != nil { + return nil, errors.Wrap(err, "failed to set resource limits") + } + + // Setup directory sandbox volume will be mounted + sandboxPath := fmt.Sprintf(sandboxMountFormat, id) + if _, err := os.Stat(sandboxPath); os.IsNotExist(err) { + if err := os.MkdirAll(sandboxPath, 0777); err != nil { + return nil, errors.Wrap(err, "failed to create mounted folder") + } + } + path = sandboxPath + + if err := mountSandboxVolume(ctx, path, s.Root.Path); err != nil { + return nil, errors.Wrap(err, "failed to bind payload directory on host") + } + + container.sandboxMount = path + go container.waitBackground(ctx) + return container, nil +} + +// CreateProcess creates a process on the host, starts it, adds it to the containers +// job object and then waits for exit. +func (c *JobContainer) CreateProcess(ctx context.Context, config interface{}) (_ cow.Process, err error) { + conf, ok := config.(*hcsschema.ProcessParameters) + if !ok { + return nil, errors.New("unsupported process config passed in") + } + + if conf.EmulateConsole { + return nil, errors.New("console emulation not supported for job containers") + } + + absPath, commandLine, err := getApplicationName(conf.CommandLine, c.sandboxMount, os.Getenv("PATH")) + if err != nil { + return nil, errors.Wrapf(err, "failed to get application name from commandline %q", conf.CommandLine) + } + + commandLine = strings.ReplaceAll(commandLine, "%"+sandboxMountPointEnvVar+"%", c.sandboxMount) + commandLine = strings.ReplaceAll(commandLine, "$env:"+sandboxMountPointEnvVar, c.sandboxMount) + + var token windows.Token + if getUserTokenInheritAnnotation(c.spec.Annotations) { + token, err = openCurrentProcessToken() + if err != nil { + return nil, err + } + } else { + token, err = processToken(conf.User) + if err != nil { + return nil, errors.Wrap(err, "failed to create user process token") + } + } + defer token.Close() + + env, err := defaultEnvBlock(token) + if err != nil { + return nil, errors.Wrap(err, "failed to get default environment block") + } + env = append(env, envMapToSlice(conf.Environment)...) + env = append(env, sandboxMountPointEnvVar+"="+c.sandboxMount) + + cmd := &exec.Cmd{ + Env: env, + Dir: c.sandboxMount, + Path: absPath, + Args: splitArgs(commandLine), + SysProcAttr: &syscall.SysProcAttr{ + CreationFlags: windows.CREATE_NEW_PROCESS_GROUP, + Token: syscall.Token(token), + }, + } + process := newProcess(cmd) + + // Create process pipes if asked for. + if conf.CreateStdInPipe { + stdin, err := process.cmd.StdinPipe() + if err != nil { + return nil, errors.Wrap(err, "failed to create stdin pipe") + } + process.stdin = stdin + } + + if conf.CreateStdOutPipe { + stdout, err := process.cmd.StdoutPipe() + if err != nil { + return nil, errors.Wrap(err, "failed to create stdout pipe") + } + process.stdout = stdout + } + + if conf.CreateStdErrPipe { + stderr, err := process.cmd.StderrPipe() + if err != nil { + return nil, errors.Wrap(err, "failed to create stderr pipe") + } + process.stderr = stderr + } + + defer func() { + if err != nil { + process.Close() + } + }() + + if err = process.Start(); err != nil { + return nil, errors.Wrap(err, "failed to start host process") + } + + if err = c.job.Assign(uint32(process.Pid())); err != nil { + return nil, errors.Wrap(err, "failed to assign process to job object") + } + + // Assign the first process made as the init process of the container. + c.init.initDoOnce.Do(func() { + c.init.proc = process + close(c.init.initBlock) + }) + + // Wait for process exit + go c.pollJobMsgs(ctx) + go process.waitBackground(ctx) + return process, nil +} + +func (c *JobContainer) Modify(ctx context.Context, config interface{}) (err error) { + return errors.New("modify not supported for job containers") +} + +// Release unmounts all of the container layers. Safe to call multiple times, if no storage +// is mounted this call will just return nil. +func (c *JobContainer) Release(ctx context.Context) error { + c.m.Lock() + defer c.m.Unlock() + + log.G(ctx).WithFields(logrus.Fields{ + "id": c.id, + "path": c.sandboxMount, + }).Warn("removing sandbox volume mount") + + if c.sandboxMount != "" { + if err := removeSandboxMountPoint(ctx, c.sandboxMount); err != nil { + return errors.Wrap(err, "failed to remove sandbox volume mount path") + } + if err := layers.UnmountContainerLayers(ctx, c.spec.Windows.LayerFolders, "", nil, layers.UnmountOperationAll); err != nil { + return errors.Wrap(err, "failed to unmount container layers") + } + c.sandboxMount = "" + } + return nil +} + +// Start starts the container. There's nothing to "start" for job containers, so this just +// sets the start timestamp. +func (c *JobContainer) Start(ctx context.Context) error { + c.startTimestamp = time.Now() + return nil +} + +// Close closes any open handles. +func (c *JobContainer) Close() error { + if err := c.job.Close(); err != nil { + return err + } + c.closedWaitOnce.Do(func() { + c.waitError = hcs.ErrAlreadyClosed + close(c.waitBlock) + }) + return nil +} + +// ID returns the ID of the container. This is the name used to create the job object. +func (c *JobContainer) ID() string { + return c.id +} + +// Shutdown gracefully shuts down the container. +func (c *JobContainer) Shutdown(ctx context.Context) error { + log.G(ctx).WithField("id", c.id).Debug("shutting down job container") + + ctx, cancel := context.WithTimeout(ctx, time.Second*5) + defer cancel() + return c.shutdown(ctx) +} + +// shutdown will loop through all the pids in the container and send a signal to exit. +// If there are no processes in the container it will early return nil. +// If the all processes exited message is not received within the context timeout set, it will +// terminate the job. +func (c *JobContainer) shutdown(ctx context.Context) error { + pids, err := c.job.Pids() + if err != nil { + return errors.Wrap(err, "failed to get pids in container") + } + + if len(pids) == 0 { + return nil + } + + for _, pid := range pids { + // If any process can't be signaled just wait until the timeout hits + if err := signalProcess(pid, windows.CTRL_SHUTDOWN_EVENT); err != nil { + log.G(ctx).WithField("pid", pid).Error("failed to signal process in job container") + } + } + + select { + case <-c.exited: + case <-ctx.Done(): + return c.Terminate(ctx) + } + return nil +} + +// PropertiesV2 returns properties relating to the job container. This is an HCS construct but +// to adhere to the interface for containers on Windows it is partially implemented. The only +// supported property is schema2.PTStatistics. +func (c *JobContainer) PropertiesV2(ctx context.Context, types ...hcsschema.PropertyType) (*hcsschema.Properties, error) { + return nil, errors.New("`PropertiesV2` call is not implemented for job containers") +} + +// Properties is not implemented for job containers. This is just to satisfy the cow.Container interface. +func (c *JobContainer) Properties(ctx context.Context, types ...schema1.PropertyType) (*schema1.ContainerProperties, error) { + return nil, errors.New("`Properties` call is not implemented for job containers") +} + +// Terminate terminates the job object (kills every process in the job). +func (c *JobContainer) Terminate(ctx context.Context) error { + log.G(ctx).WithField("id", c.id).Debug("terminating job container") + + if err := c.job.Terminate(1); err != nil { + return errors.Wrap(err, "failed to terminate job container") + } + return nil +} + +// Wait synchronously waits for the container to shutdown or terminate. If +// the container has already exited returns the previous error (if any). +func (c *JobContainer) Wait() error { + <-c.waitBlock + return c.waitError +} + +func (c *JobContainer) waitBackground(ctx context.Context) { + // Wait for there to be an init process assigned. + <-c.init.initBlock + + // Once the init process finishes, if there's any other processes in the container we need to signal + // them to exit. + <-c.init.proc.waitBlock + + ctx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + if err := c.Shutdown(ctx); err != nil { + c.Terminate(ctx) + } + + c.closedWaitOnce.Do(func() { + c.waitError = c.init.proc.waitError + close(c.waitBlock) + }) +} + +// Polls for notifications from the job objects assigned IO completion port. +func (c *JobContainer) pollJobMsgs(ctx context.Context) { + for { + notif, err := c.job.PollNotification() + if err != nil { + // Queues closed or we somehow aren't registered to receive notifications. There won't be + // any notifications arriving so we're safe to return. + if err == queue.ErrQueueClosed || err == jobobject.ErrNotRegistered { + return + } + log.G(ctx).WithError(err).Warn("error while polling for job container notification") + } + + switch msg := notif.(type) { + // All processes have exited. Close the waitblock so we can cleanup and then return. + case jobobject.MsgAllProcessesExited: + close(c.exited) + return + case jobobject.MsgUnimplemented: + default: + log.G(ctx).WithField("message", msg).Warn("unknown job object notification encountered") + } + } +} + +// IsOCI - Just to satisfy the cow.ProcessHost interface. Follow the WCOW behavior +func (c *JobContainer) IsOCI() bool { + return false +} + +// OS returns the operating system name as a string. This should always be windows. +func (c *JobContainer) OS() string { + return "windows" +} diff --git a/internal/jobcontainers/logon.go b/internal/jobcontainers/logon.go new file mode 100644 index 0000000000..01ac968c04 --- /dev/null +++ b/internal/jobcontainers/logon.go @@ -0,0 +1,60 @@ +package jobcontainers + +import ( + "fmt" + "strings" + + "github.com/Microsoft/hcsshim/internal/winapi" + "github.com/pkg/errors" + "golang.org/x/sys/windows" +) + +// processToken returns a user token for the user specified by `user`. This should be in the form +// of either a DOMAIN\username or just username. +func processToken(user string) (windows.Token, error) { + var ( + domain string + userName string + token windows.Token + ) + + split := strings.Split(user, "\\") + if len(split) == 2 { + domain = split[0] + userName = split[1] + } else if len(split) == 1 { + userName = split[0] + } else { + return 0, fmt.Errorf("invalid user string `%s`", user) + } + + if user == "" { + return 0, errors.New("empty user string passed") + } + + logonType := winapi.LOGON32_LOGON_INTERACTIVE + // User asking to run as a local system account (NETWORK SERVICE, LOCAL SERVICE, SYSTEM) + if domain == "NT AUTHORITY" { + logonType = winapi.LOGON32_LOGON_SERVICE + } + + if err := winapi.LogonUser( + windows.StringToUTF16Ptr(userName), + windows.StringToUTF16Ptr(domain), + nil, + logonType, + winapi.LOGON32_PROVIDER_DEFAULT, + &token, + ); err != nil { + return 0, errors.Wrap(err, "failed to logon user") + } + return token, nil +} + +func openCurrentProcessToken() (windows.Token, error) { + var token windows.Token + if err := windows.OpenProcessToken(windows.CurrentProcess(), windows.TOKEN_ALL_ACCESS, &token); err != nil { + return 0, errors.Wrap(err, "failed to open current process token") + } + return token, nil +} diff --git a/internal/jobcontainers/oci.go b/internal/jobcontainers/oci.go new file mode 100644 index 0000000000..d78adfe43a --- /dev/null +++ b/internal/jobcontainers/oci.go @@ -0,0 +1,72 @@ +package jobcontainers + +import ( + "context" + + "github.com/Microsoft/hcsshim/internal/hcsoci" + "github.com/Microsoft/hcsshim/internal/processorinfo" + + "github.com/Microsoft/hcsshim/internal/jobobject" + + "github.com/Microsoft/hcsshim/internal/oci" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +// This file contains helpers for converting parts of the oci spec to useful +// structures/limits to be applied to a job object. +func calculateJobCPUWeight(processorWeight uint32) uint32 { + if processorWeight == 0 { + return 0 + } + return 1 + uint32((8*processorWeight)/jobobject.CPUWeightMax) +} + +func calculateJobCPURate(hostProcs uint32, processorCount uint32) uint32 { + rate := (processorCount * 10000) / hostProcs + if rate == 0 { + return 1 + } + return rate +} + +func getUserTokenInheritAnnotation(annotations map[string]string) bool { + val, ok := annotations[oci.AnnotationHostProcessInheritUser] + return ok && val == "true" +} + +// Oci spec to job object limit information. Will do any conversions to job object specific values from +// their respective OCI representations. E.g. we convert CPU count into the correct job object cpu +// rate value internally. +func specToLimits(ctx context.Context, cid string, s *specs.Spec) (*jobobject.JobLimits, error) { + hostCPUCount := processorinfo.ProcessorCount() + cpuCount, cpuLimit, cpuWeight, err := hcsoci.ConvertCPULimits(ctx, cid, s, hostCPUCount) + if err != nil { + return nil, err + } + + realCPULimit, realCPUWeight := uint32(cpuLimit), uint32(cpuWeight) + if cpuCount != 0 { + // Job object API does not support "CPU count". Instead, we translate the notion of "count" into + // CPU limit, which represents the amount of the host system's processors that the job can use to + // a percentage times 100. For example, to let the job use 20% of the available LPs the rate would + // be 20 times 100, or 2,000. + realCPULimit = calculateJobCPURate(uint32(hostCPUCount), uint32(cpuCount)) + } else if cpuWeight != 0 { + realCPUWeight = calculateJobCPUWeight(realCPUWeight) + } + + // Memory limit + memLimitMB := oci.ParseAnnotationsMemory(ctx, s, oci.AnnotationContainerMemorySizeInMB, 0) + + // IO limits + maxBandwidth := int64(oci.ParseAnnotationsStorageBps(ctx, s, oci.AnnotationContainerStorageQoSBandwidthMaximum, 0)) + maxIops := int64(oci.ParseAnnotationsStorageIops(ctx, s, oci.AnnotationContainerStorageQoSIopsMaximum, 0)) + + return &jobobject.JobLimits{ + CPULimit: realCPULimit, + CPUWeight: realCPUWeight, + MaxIOPS: maxIops, + MaxBandwidth: maxBandwidth, + MemoryLimitInBytes: memLimitMB * 1024 * 1024, + }, nil +} diff --git a/internal/jobcontainers/path.go b/internal/jobcontainers/path.go new file mode 100644 index 0000000000..ba68535c3d --- /dev/null +++ b/internal/jobcontainers/path.go @@ -0,0 +1,228 @@ +package jobcontainers + +import ( + "fmt" + "os" + "strings" + + "github.com/Microsoft/hcsshim/internal/winapi" + "github.com/pkg/errors" + "golang.org/x/sys/windows" +) + +// This file emulates the path resolution logic that is used for launching regular +// process and hypervisor isolated Windows containers. + +// getApplicationName resolves a given command line string and returns the path to the executable that should be launched, and +// an adjusted commandline if needed. The resolution logic may appear overcomplicated but is designed to match the logic used by +// standard Windows containers, as well as that used by CreateProcess (see notes for the lpApplicationName parameter). +// +// The logic follows this set of steps: +// - Construct a list of searchable paths to find the application. This includes the standard Windows system paths +// which are generally located at C:\Windows, C:\Windows\System32 and C:\Windows\System. If a working directory or path is specified +// via the `workingDirectory` or `pathEnv` parameters then these will be appended to the paths to search from as well. The +// searching logic is handled by the Windows API function `SearchPathW` which accepts a semicolon separated list of paths to search +// in. +// https://docs.microsoft.com/en-us/windows/win32/api/processenv/nf-processenv-searchpathw +// +// - If the commandline is quoted, simply grab whatever is in the quotes and search for this directly. +// We don't try any other logic here, if the application can't be found from the quoted contents we return an error. +// +// - If the commandline is not quoted, we iterate over each possible application name by splitting the arguments and iterating +// over them one by one while appending the last search each time until we either find a match or don't and return +// an error. If we don't find the application on the first try, this means that the application name has a space in it +// and we must adjust the commandline to add quotes around the application name. +// +// - If the application is found, we return the fullpath to the executable and the adjusted commandline (if needed). +// +// Examples: +// - Input: "C:\Program Files\sub dir\program name" +// Search order: +// - C:\Program.exe +// - C:\Program Files\sub.exe +// - C:\Program Files\sub dir\program.exe +// - C:\Program Files\sub dir\program name.exe +// Returned commandline: "\"C:\Program Files\sub dir\program name\"" +// +// - Input: "\"program name\"" +// Search order: +// - program name.exe +// Returned commandline: "\"program name\" +// +// - Input: "\"program name\" -flags -for -program" +// Search order: +// - program.exe +// - program name.exe +// Returned commandline: "\"program name\" -flags -for -program" +// +// - Input: "\"C:\path\to\program name\"" +// Search Order: +// - "C:\path\to\program name.exe" +// Returned commandline: "\"C:\path\to\program name"" +// +// - Input: "C:\path\to\program" +// Search Order: +// - "C:\path\to\program.exe" +// Returned commandline: "C:\path\to\program" +// +// CreateProcess documentation: https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-createprocessa +func getApplicationName(commandLine, workingDirectory, pathEnv string) (string, string, error) { + var ( + searchPath string + result string + ) + // First we get the system paths concatenated with semicolons (C:\windows;C:\windows\system32;C:\windows\system;) + // and use this as the basis for the directories to search for the application. + systemPaths, err := getSystemPaths() + if err != nil { + return "", "", err + } + + // If there's a working directory we should also add this to the list of directories to search. + if workingDirectory != "" { + searchPath += workingDirectory + ";" + } + + // Append the path environment to the list of directories to search. + if pathEnv != "" { + searchPath += pathEnv + ";" + } + searchPath += systemPaths + + if searchPath[len(searchPath)-1] == ';' { + searchPath = searchPath[:len(searchPath)-1] + } + + // Application name was quoted, just search directly. + // + // For example given the commandline: "hello goodbye" -foo -bar -baz + // we would search for the executable 'hello goodbye.exe' + if commandLine != "" && commandLine[0] == '"' { + index := strings.Index(commandLine[1:], "\"") + if index == -1 { + return "", "", errors.New("no ending quotation mark found in command") + } + path, err := searchPathForExe(commandLine[1:index+1], searchPath) + if err != nil { + return "", "", err + } + return path, commandLine, nil + } + + // Application name wasn't quoted, try each possible application name. + // For example given the commandline: hello goodbye, we would first try + // to find 'hello.exe' and then 'hello goodbye.exe' + var ( + trialName string + quoteCmdLine bool + argsIndex int + ) + args := splitArgs(commandLine) + + // Loop through each element of the commandline and try and determine if any of them are executables. + // + // For example given the commandline: foo bar baz + // if foo.exe is successfully found we will stop and return with the full path to 'foo.exe'. If foo doesn't succeed we + // then try 'foo bar.exe' and 'foo bar baz.exe'. + for argsIndex < len(args) { + trialName += args[argsIndex] + fullPath, err := searchPathForExe(trialName, searchPath) + if err == nil { + result = fullPath + break + } + trialName += " " + quoteCmdLine = true + argsIndex++ + } + + // If we searched through every argument and didn't find an executable, we need to error out. + if argsIndex == len(args) { + return "", "", fmt.Errorf("failed to find executable %q", commandLine) + } + + // If we found an executable but after we concatenated two arguments together, + // we need to adjust the commandline to be quoted. + // + // For example given the commandline: foo bar + // if 'foo bar.exe' is found, we need to adjust the commandline to + // be quoted as this is what the platform expects (CreateProcess call). + adjustedCommandLine := commandLine + if quoteCmdLine { + trialName = "\"" + trialName + "\"" + trialName += " " + strings.Join(args[argsIndex+1:], " ") + adjustedCommandLine = trialName + } + + return result, adjustedCommandLine, nil +} + +// searchPathForExe calls the Windows API function `SearchPathW` to try and locate +// `fileName` by searching in `pathsToSearch`. `pathsToSearch` is generally a semicolon +// seperated string of paths to search that `SearchPathW` will iterate through one by one. +// If the path resolved for `fileName` ends up being a directory, this function will return an +// error. +func searchPathForExe(fileName, pathsToSearch string) (string, error) { + fileNamePtr, err := windows.UTF16PtrFromString(fileName) + if err != nil { + return "", err + } + + pathsToSearchPtr, err := windows.UTF16PtrFromString(pathsToSearch) + if err != nil { + return "", err + } + + extension, err := windows.UTF16PtrFromString(".exe") + if err != nil { + return "", err + } + + path := make([]uint16, windows.MAX_PATH) + _, err = winapi.SearchPath( + pathsToSearchPtr, + fileNamePtr, + extension, + windows.MAX_PATH, + &path[0], + nil, + ) + if err != nil { + return "", err + } + + exePath := windows.UTF16PtrToString(&path[0]) + // Need to check if we just found a directory with the name of the executable and + // .exe at the end. ping.exe is a perfectly valid directory name for example. + attrs, err := os.Stat(exePath) + if err != nil { + return "", err + } + + if attrs.IsDir() { + return "", fmt.Errorf("found directory instead of executable %q", exePath) + } + + return exePath, nil +} + +// Returns the system paths (system32, system, and windows) as a search path, +// including a terminating ;. +// +// Typical output would be `C:\WINDOWS\system32;C:\WINDOWS\System;C:\WINDOWS;` +func getSystemPaths() (string, error) { + var searchPath string + systemDir, err := windows.GetSystemDirectory() + if err != nil { + return "", errors.Wrap(err, "failed to get system directory") + } + searchPath += systemDir + ";" + + windowsDir, err := windows.GetWindowsDirectory() + if err != nil { + return "", errors.Wrap(err, "failed to get Windows directory") + } + + searchPath += windowsDir + "\\System;" + windowsDir + ";" + return searchPath, nil +} diff --git a/internal/jobcontainers/path_test.go b/internal/jobcontainers/path_test.go new file mode 100644 index 0000000000..9f631cff31 --- /dev/null +++ b/internal/jobcontainers/path_test.go @@ -0,0 +1,53 @@ +package jobcontainers + +import ( + "os" + "os/exec" + "testing" +) + +func TestSearchPath(t *testing.T) { + // Testing that relative paths work. + _, err := searchPathForExe("windows\\system32\\ping", "C:\\") + if err != nil { + t.Fatal(err) + } + + _, err = searchPathForExe("system32\\ping", "C:\\windows") + if err != nil { + t.Fatal(err) + } +} + +func TestGetApplicationName(t *testing.T) { + cwd, err := os.Getwd() + if err != nil { + t.Fatal(err) + } + + _, _, err = getApplicationName("ping", cwd, os.Getenv("PATH")) + if err != nil { + t.Fatal(err) + } + + // Test that we only find the first element of the commandline if the binary exists. + _, _, err = getApplicationName("ping test", cwd, os.Getenv("PATH")) + if err != nil { + t.Fatal(err) + } + + // Test quoted application name with an argument afterwards. + path, cmdLine, err := getApplicationName("\"ping\" 127.0.0.1", cwd, os.Getenv("PATH")) + if err != nil { + t.Fatal(err) + } + + args := splitArgs(cmdLine) + cmd := &exec.Cmd{ + Path: path, + Args: args, + } + if err := cmd.Run(); err != nil { + t.Fatal(err) + } +} diff --git a/internal/jobcontainers/process.go b/internal/jobcontainers/process.go new file mode 100644 index 0000000000..8d8e9e0f3b --- /dev/null +++ b/internal/jobcontainers/process.go @@ -0,0 +1,233 @@ +package jobcontainers + +import ( + "context" + "fmt" + "io" + "os/exec" + "sync" + + "github.com/Microsoft/hcsshim/internal/cow" + "github.com/Microsoft/hcsshim/internal/guestrequest" + "github.com/Microsoft/hcsshim/internal/hcs" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/winapi" + "github.com/pkg/errors" + "golang.org/x/sys/windows" +) + +// JobProcess represents a process run in a job object. +type JobProcess struct { + cmd *exec.Cmd + procLock sync.Mutex + stdioLock sync.Mutex + stdin io.WriteCloser + stdout io.ReadCloser + stderr io.ReadCloser + waitBlock chan struct{} + closedWaitOnce sync.Once + waitError error +} + +var sigMap = map[string]int{ + "CtrlC": windows.CTRL_BREAK_EVENT, + "CtrlBreak": windows.CTRL_BREAK_EVENT, + "CtrlClose": windows.CTRL_CLOSE_EVENT, + "CtrlLogOff": windows.CTRL_LOGOFF_EVENT, + "CtrlShutdown": windows.CTRL_SHUTDOWN_EVENT, +} + +var _ cow.Process = &JobProcess{} + +func newProcess(cmd *exec.Cmd) *JobProcess { + return &JobProcess{ + cmd: cmd, + waitBlock: make(chan struct{}), + } +} + +func (p *JobProcess) ResizeConsole(ctx context.Context, width, height uint16) error { + return nil +} + +// Stdio returns the stdio pipes of the process +func (p *JobProcess) Stdio() (io.Writer, io.Reader, io.Reader) { + return p.stdin, p.stdout, p.stderr +} + +// Signal sends a signal to the process and returns whether the signal was delivered. +func (p *JobProcess) Signal(ctx context.Context, options interface{}) (bool, error) { + p.procLock.Lock() + defer p.procLock.Unlock() + + if p.exited() { + return false, errors.New("signal not sent. process has already exited") + } + + // If options is nil it's assumed we got a sigterm + if options == nil { + if err := p.cmd.Process.Kill(); err != nil { + return false, err + } + return true, nil + } + + signalOptions, ok := options.(*guestrequest.SignalProcessOptionsWCOW) + if !ok { + return false, errors.New("unknown signal options") + } + + signal, ok := sigMap[string(signalOptions.Signal)] + if !ok { + return false, fmt.Errorf("unknown signal %s encountered", signalOptions.Signal) + } + + if err := signalProcess(uint32(p.cmd.Process.Pid), signal); err != nil { + return false, errors.Wrap(err, "failed to send signal") + } + return true, nil +} + +// CloseStdin closes the stdin pipe of the process. +func (p *JobProcess) CloseStdin(ctx context.Context) error { + p.stdioLock.Lock() + defer p.stdioLock.Unlock() + return p.stdin.Close() +} + +// Wait waits for the process to exit. If the process has already exited returns +// the previous error (if any). +func (p *JobProcess) Wait() error { + <-p.waitBlock + return p.waitError +} + +// Start starts the job object process +func (p *JobProcess) Start() error { + return p.cmd.Start() +} + +// This should only be called once. +func (p *JobProcess) waitBackground(ctx context.Context) { + log.G(ctx).WithField("pid", p.Pid()).Debug("waitBackground for JobProcess") + + // Wait for process to get signaled/exit/terminate/. + err := p.cmd.Wait() + + // Wait closes the stdio pipes so theres no need to later on. + p.stdioLock.Lock() + p.stdin = nil + p.stdout = nil + p.stderr = nil + p.stdioLock.Unlock() + + p.closedWaitOnce.Do(func() { + p.waitError = err + close(p.waitBlock) + }) +} + +// ExitCode returns the exit code of the process. +func (p *JobProcess) ExitCode() (int, error) { + p.procLock.Lock() + defer p.procLock.Unlock() + + if !p.exited() { + return -1, errors.New("process has not exited") + } + return p.cmd.ProcessState.ExitCode(), nil +} + +// Pid returns the processes PID +func (p *JobProcess) Pid() int { + if process := p.cmd.Process; process != nil { + return process.Pid + } + return 0 +} + +// Close cleans up any state associated with the process but does not kill it. +func (p *JobProcess) Close() error { + p.stdioLock.Lock() + if p.stdin != nil { + p.stdin.Close() + p.stdin = nil + } + if p.stdout != nil { + p.stdout.Close() + p.stdout = nil + } + if p.stderr != nil { + p.stderr.Close() + p.stderr = nil + } + p.stdioLock.Unlock() + + p.closedWaitOnce.Do(func() { + p.waitError = hcs.ErrAlreadyClosed + close(p.waitBlock) + }) + return nil +} + +// Kill signals the process to terminate. +// Returns a bool signifying whether the signal was successfully delivered. +func (p *JobProcess) Kill(ctx context.Context) (bool, error) { + log.G(ctx).WithField("pid", p.Pid()).Debug("killing job process") + + p.procLock.Lock() + defer p.procLock.Unlock() + + if p.exited() { + return false, errors.New("kill not sent. process already exited") + } + + if p.cmd.Process != nil { + if err := p.cmd.Process.Kill(); err != nil { + return false, err + } + } + return true, nil +} + +func (p *JobProcess) exited() bool { + if p.cmd.ProcessState == nil { + return false + } + return p.cmd.ProcessState.Exited() +} + +// signalProcess sends the specified signal to a process. +func signalProcess(pid uint32, signal int) error { + hProc, err := windows.OpenProcess(winapi.PROCESS_ALL_ACCESS, true, pid) + if err != nil { + return errors.Wrap(err, "failed to open process") + } + defer windows.Close(hProc) + + // We can't use GenerateConsoleCtrlEvent since that only supports CTRL_C_EVENT and CTRL_BREAK_EVENT. + // Instead, to handle an arbitrary signal we open a CtrlRoutine thread inside the target process and + // give it the specified signal to handle. This is safe even with ASLR as even though kernel32.dll's + // location will be randomized each boot, it will be in the same address for every process. This is why + // we're able to get the address from a different process and use this as the start address for the routine + // that the thread will run. + // + // Note: This is a hack which is not officially supported. + k32, err := windows.LoadLibrary("kernel32.dll") + if err != nil { + return errors.Wrap(err, "failed to load kernel32 library") + } + defer windows.Close(k32) + + proc, err := windows.GetProcAddress(k32, "CtrlRoutine") + if err != nil { + return errors.Wrap(err, "failed to load CtrlRoutine") + } + + threadHandle, err := winapi.CreateRemoteThread(hProc, nil, 0, proc, uintptr(signal), 0, nil) + if err != nil { + return errors.Wrapf(err, "failed to open remote thread in target process %d", pid) + } + defer windows.Close(threadHandle) + return nil +} diff --git a/internal/jobcontainers/storage.go b/internal/jobcontainers/storage.go new file mode 100644 index 0000000000..270d44def2 --- /dev/null +++ b/internal/jobcontainers/storage.go @@ -0,0 +1,93 @@ +package jobcontainers + +import ( + "context" + "os" + "path/filepath" + + "github.com/Microsoft/hcsshim/internal/layers" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/wclayer" + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/sys/windows" +) + +// Trailing backslash required for SetVolumeMountPoint and DeleteVolumeMountPoint +const sandboxMountFormat = `C:\C\%s\` + +func mountLayers(ctx context.Context, s *specs.Spec) error { + if s == nil || s.Windows == nil || s.Windows.LayerFolders == nil { + return errors.New("field 'Spec.Windows.Layerfolders' is not populated") + } + + // Last layer always contains the sandbox.vhdx, or 'scratch' space for the container. + scratchFolder := s.Windows.LayerFolders[len(s.Windows.LayerFolders)-1] + if _, err := os.Stat(scratchFolder); os.IsNotExist(err) { + if err := os.MkdirAll(scratchFolder, 0777); err != nil { + return errors.Wrapf(err, "failed to auto-create container scratch folder %s", scratchFolder) + } + } + + // Create sandbox.vhdx if it doesn't exist in the scratch folder. + if _, err := os.Stat(filepath.Join(scratchFolder, "sandbox.vhdx")); os.IsNotExist(err) { + if err := wclayer.CreateScratchLayer(ctx, scratchFolder, s.Windows.LayerFolders[:len(s.Windows.LayerFolders)-1]); err != nil { + return errors.Wrap(err, "failed to CreateSandboxLayer") + } + } + + if s.Root == nil { + s.Root = &specs.Root{} + } + + if s.Root.Path == "" { + log.G(ctx).Debug("mounting job container storage") + containerRootPath, err := layers.MountContainerLayers(ctx, s.Windows.LayerFolders, "", nil) + if err != nil { + return errors.Wrap(err, "failed to mount container storage") + } + s.Root.Path = containerRootPath + } + return nil +} + +// Mount the sandbox vhd to a user friendly path. +func mountSandboxVolume(ctx context.Context, hostPath, volumeName string) (err error) { + log.G(ctx).WithFields(logrus.Fields{ + "hostpath": hostPath, + "volumeName": volumeName, + }).Debug("mounting sandbox volume for job container") + + if _, err := os.Stat(hostPath); os.IsNotExist(err) { + if err := os.MkdirAll(hostPath, 0777); err != nil { + return err + } + } + + defer func() { + if err != nil { + os.RemoveAll(hostPath) + } + }() + + if err = windows.SetVolumeMountPoint(windows.StringToUTF16Ptr(hostPath), windows.StringToUTF16Ptr(volumeName)); err != nil { + return errors.Wrapf(err, "failed to mount sandbox volume to %s on host", hostPath) + } + return nil +} + +// Remove volume mount point. And remove folder afterwards. +func removeSandboxMountPoint(ctx context.Context, hostPath string) error { + log.G(ctx).WithFields(logrus.Fields{ + "hostpath": hostPath, + }).Debug("mounting sandbox volume for job container") + + if err := windows.DeleteVolumeMountPoint(windows.StringToUTF16Ptr(hostPath)); err != nil { + return errors.Wrap(err, "failed to delete sandbox volume mount point") + } + if err := os.Remove(hostPath); err != nil { + return errors.Wrapf(err, "failed to remove sandbox mounted folder path %q", hostPath) + } + return nil +} diff --git a/internal/jobobject/jobobject.go b/internal/jobobject/jobobject.go index 98e4c51557..fe8992bc71 100644 --- a/internal/jobobject/jobobject.go +++ b/internal/jobobject/jobobject.go @@ -2,7 +2,6 @@ package jobobject import ( "context" - "fmt" "sync" "unsafe" @@ -196,6 +195,13 @@ func Open(ctx context.Context, options *Options) (_ *JobObject, err error) { // helper function to setup notifications for creating/opening a job object func setupNotifications(ctx context.Context, job *JobObject) (*queue.MessageQueue, error) { + job.handleLock.RLock() + defer job.handleLock.RUnlock() + + if job.handle == 0 { + return nil, ErrAlreadyClosed + } + ioInitOnce.Do(func() { h, err := windows.CreateIoCompletionPort(windows.InvalidHandle, 0, 0, 0xffffffff) if err != nil { @@ -210,18 +216,11 @@ func setupNotifications(ctx context.Context, job *JobObject) (*queue.MessageQueu return nil, initIOErr } - job.handleLock.RLock() - defer job.handleLock.RUnlock() - - if job.handle == 0 { - return nil, ErrAlreadyClosed - } - mq := queue.NewMessageQueue() jobMap.Store(uintptr(job.handle), mq) if err := attachIOCP(job.handle, ioCompletionPort); err != nil { jobMap.Delete(uintptr(job.handle)) - return nil, err + return nil, errors.Wrap(err, "failed to attach job to IO completion port") } return mq, nil } @@ -326,7 +325,7 @@ func (job *JobObject) Pids() ([]uint32, error) { } if err != winapi.ERROR_MORE_DATA { - return nil, fmt.Errorf("failed initial query for PIDs in job object: %s", err) + return nil, errors.Wrap(err, "failed initial query for PIDs in job object") } jobBasicProcessIDListSize := unsafe.Sizeof(info) + (unsafe.Sizeof(info.ProcessIdList[0]) * uintptr(info.NumberOfAssignedProcesses-1)) @@ -338,7 +337,7 @@ func (job *JobObject) Pids() ([]uint32, error) { uint32(len(buf)), nil, ); err != nil { - return nil, fmt.Errorf("failed to query for PIDs in job object: %s", err) + return nil, errors.Wrap(err, "failed to query for PIDs in job object") } bufInfo := (*winapi.JOBOBJECT_BASIC_PROCESS_ID_LIST)(unsafe.Pointer(&buf[0])) @@ -367,7 +366,7 @@ func (job *JobObject) QueryMemoryStats() (*winapi.JOBOBJECT_MEMORY_USAGE_INFORMA uint32(unsafe.Sizeof(info)), nil, ); err != nil { - return nil, fmt.Errorf("failed to query for job object memory stats: %s", err) + return nil, errors.Wrap(err, "failed to query for job object memory stats") } return &info, nil } @@ -389,7 +388,7 @@ func (job *JobObject) QueryProcessorStats() (*winapi.JOBOBJECT_BASIC_ACCOUNTING_ uint32(unsafe.Sizeof(info)), nil, ); err != nil { - return nil, fmt.Errorf("failed to query for job object process stats: %s", err) + return nil, errors.Wrap(err, "failed to query for job object process stats") } return &info, nil } @@ -411,7 +410,7 @@ func (job *JobObject) QueryStorageStats() (*winapi.JOBOBJECT_BASIC_AND_IO_ACCOUN uint32(unsafe.Sizeof(info)), nil, ); err != nil { - return nil, fmt.Errorf("failed to query for job object storage stats: %s", err) + return nil, errors.Wrap(err, "failed to query for job object storage stats") } return &info, nil } diff --git a/internal/jobobject/jobobject_test.go b/internal/jobobject/jobobject_test.go index 8169d3d1bd..d251dab9a8 100644 --- a/internal/jobobject/jobobject_test.go +++ b/internal/jobobject/jobobject_test.go @@ -2,7 +2,13 @@ package jobobject import ( "context" + "os/exec" + "syscall" "testing" + "time" + + "github.com/pkg/errors" + "golang.org/x/sys/windows" ) func TestJobNilOptions(t *testing.T) { @@ -18,13 +24,240 @@ func TestJobCreateAndOpen(t *testing.T) { options = &Options{Name: "test"} ) - _, err := Create(ctx, options) + jobCreate, err := Create(ctx, options) + if err != nil { + t.Fatal(err) + } + defer jobCreate.Close() + + jobOpen, err := Open(ctx, options) + if err != nil { + t.Fatal(err) + } + defer jobOpen.Close() +} + +func createProcsAndAssign(num int, job *JobObject) (_ []*exec.Cmd, err error) { + var procs []*exec.Cmd + + defer func() { + if err != nil { + for _, proc := range procs { + proc.Process.Kill() + } + } + }() + + for i := 0; i < num; i++ { + cmd := exec.Command("ping", "-t", "127.0.0.1") + cmd.SysProcAttr = &syscall.SysProcAttr{ + CreationFlags: windows.CREATE_NEW_PROCESS_GROUP, + } + + if err := cmd.Start(); err != nil { + return nil, err + } + + if err := job.Assign(uint32(cmd.Process.Pid)); err != nil { + return nil, err + } + procs = append(procs, cmd) + } + return procs, nil +} + +func TestSetTerminateOnLastHandleClose(t *testing.T) { + options := &Options{ + Name: "test", + Notifications: true, + } + job, err := Create(context.Background(), options) + if err != nil { + t.Fatal(err) + } + defer job.Close() + + if err := job.SetTerminateOnLastHandleClose(); err != nil { + t.Fatal(err) + } + + procs, err := createProcsAndAssign(1, job) + if err != nil { + t.Fatal(err) + } + + errCh := make(chan error) + go func() { + if err := job.Close(); err != nil { + errCh <- err + } + if err := procs[0].Wait(); err != nil { + errCh <- err + } + // Check if process is still alive after job handle close (it should not be). + // If wait returned it should be gone but just to be explicit check anyways. + if !procs[0].ProcessState.Exited() { + errCh <- errors.New("process should have exited after closing job handle") + } + errCh <- nil + }() + + select { + case err := <-errCh: + if err != nil { + t.Fatal(err) + } + case <-time.After(time.Second * 10): + procs[0].Process.Kill() + t.Fatal("process didn't complete wait within timeout") + } +} + +func TestSetMultipleExtendedLimits(t *testing.T) { + // Tests setting two different properties on the job that modify + // JOBOBJECT_EXTENDED_LIMIT_INFORMATION + options := &Options{ + Name: "test", + Notifications: true, + } + job, err := Create(context.Background(), options) + if err != nil { + t.Fatal(err) + } + defer job.Close() + + // No reason for this limit in particular. Could be any value. + memLimitInMB := uint64(10 * 1024 * 1204) + if err := job.SetMemoryLimit(memLimitInMB); err != nil { + t.Fatal(err) + } + + if err := job.SetTerminateOnLastHandleClose(); err != nil { + t.Fatal(err) + } + + eli, err := job.getExtendedInformation() if err != nil { t.Fatal(err) } - _, err = Open(ctx, options) + if !isFlagSet(windows.JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE, eli.BasicLimitInformation.LimitFlags) { + t.Fatal("the job does not have cpu rate control enabled") + } + + if !isFlagSet(windows.JOB_OBJECT_LIMIT_JOB_MEMORY, eli.BasicLimitInformation.LimitFlags) { + t.Fatal("the job does not have cpu rate control enabled") + } + + if eli.JobMemoryLimit != uintptr(memLimitInMB) { + t.Fatal("job memory limit not persisted") + } +} + +func TestNoMoreProcessesMessageKill(t *testing.T) { + // Test that we receive the no more processes in job message after killing all of + // the processes in the job. + options := &Options{ + Name: "test", + Notifications: true, + } + job, err := Create(context.Background(), options) if err != nil { t.Fatal(err) } + defer job.Close() + + if err := job.SetTerminateOnLastHandleClose(); err != nil { + t.Fatal(err) + } + + procs, err := createProcsAndAssign(2, job) + if err != nil { + t.Fatal(err) + } + + errCh := make(chan error) + go func() { + for _, proc := range procs { + if err := proc.Process.Kill(); err != nil { + errCh <- err + } + } + + for { + notif, err := job.PollNotification() + if err != nil { + errCh <- err + } + + switch notif.(type) { + case MsgAllProcessesExited: + errCh <- nil + case MsgUnimplemented: + default: + } + } + }() + + select { + case err := <-errCh: + if err != nil { + t.Fatal(err) + } + case <-time.After(time.Second * 10): + t.Fatal("didn't receive no more processes message within timeout") + } +} + +func TestNoMoreProcessesMessageTerminate(t *testing.T) { + // Test that we receive the no more processes in job message after terminating the + // job (terminates every process in the job). + options := &Options{ + Name: "test", + Notifications: true, + } + job, err := Create(context.Background(), options) + if err != nil { + t.Fatal(err) + } + defer job.Close() + + if err := job.SetTerminateOnLastHandleClose(); err != nil { + t.Fatal(err) + } + + _, err = createProcsAndAssign(2, job) + if err != nil { + t.Fatal(err) + } + + errCh := make(chan error) + go func() { + if err := job.Terminate(1); err != nil { + errCh <- err + } + + for { + notif, err := job.PollNotification() + if err != nil { + errCh <- err + } + + switch notif.(type) { + case MsgAllProcessesExited: + errCh <- nil + case MsgUnimplemented: + default: + } + } + }() + + select { + case err := <-errCh: + if err != nil { + t.Fatal(err) + } + case <-time.After(time.Second * 10): + t.Fatal("didn't receive no more processes message within timeout") + } } diff --git a/internal/jobobject/limits.go b/internal/jobobject/limits.go index 990af100cf..4657f9322d 100644 --- a/internal/jobobject/limits.go +++ b/internal/jobobject/limits.go @@ -14,7 +14,7 @@ const ( memoryLimitMax uint64 = 0xffffffffffffffff ) -func isFlagSet(flag uint32, controlFlags uint32) bool { +func isFlagSet(flag, controlFlags uint32) bool { return (flag & controlFlags) == flag } @@ -45,6 +45,17 @@ func (job *JobObject) SetResourceLimits(limits *JobLimits) error { return nil } +// SetTerminateOnLastHandleClose sets the job object flag that specifies that the job should terminate +// all processes in the job on the last open handle being closed. +func (job *JobObject) SetTerminateOnLastHandleClose() error { + info, err := job.getExtendedInformation() + if err != nil { + return err + } + info.BasicLimitInformation.LimitFlags |= windows.JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE + return job.setExtendedInformation(info) +} + // SetMemoryLimit sets the memory limit of the job object based on the given `memoryLimitInBytes`. func (job *JobObject) SetMemoryLimit(memoryLimitInBytes uint64) error { if memoryLimitInBytes >= memoryLimitMax { diff --git a/internal/layers/layers.go b/internal/layers/layers.go index 5f815b783a..3fffd827be 100644 --- a/internal/layers/layers.go +++ b/internal/layers/layers.go @@ -74,7 +74,7 @@ func MountContainerLayers(ctx context.Context, layerFolders []string, guestRoot if uvm == nil { if len(layerFolders) < 2 { - return "", fmt.Errorf("need at least two layers - base and scratch") + return "", errors.New("need at least two layers - base and scratch") } path := layerFolders[len(layerFolders)-1] rest := layerFolders[:len(layerFolders)-1] diff --git a/internal/oci/uvm.go b/internal/oci/uvm.go index 5c2cb3c3b5..9b8c6933aa 100644 --- a/internal/oci/uvm.go +++ b/internal/oci/uvm.go @@ -87,6 +87,11 @@ const ( // files and information needed to install given driver(s). This may include .sys, // .inf, .cer, and/or other files used during standard installation with pnputil. AnnotationAssignedDeviceKernelDrivers = "io.microsoft.assigneddevice.kerneldrivers" + // AnnotationHostProcessInheritUser indicates whether to ignore the username passed in to run a host process + // container as and instead inherit the user token from the executable that is launching the container process. + AnnotationHostProcessInheritUser = "microsoft.com/hostprocess-inherit-user" + // AnnotationHostProcessContainer indicates to launch a host process container (job container in this repository). + AnnotationHostProcessContainer = "microsoft.com/hostprocess-container" annotationAllowOvercommit = "io.microsoft.virtualmachine.computetopology.memory.allowovercommit" annotationEnableDeferredCommit = "io.microsoft.virtualmachine.computetopology.memory.enabledeferredcommit" diff --git a/internal/winapi/memory.go b/internal/winapi/memory.go index ccaf5a624f..83f7040644 100644 --- a/internal/winapi/memory.go +++ b/internal/winapi/memory.go @@ -9,3 +9,19 @@ package winapi //sys LocalAlloc(flags uint32, size int) (ptr uintptr) = kernel32.LocalAlloc //sys LocalFree(ptr uintptr) = kernel32.LocalFree + +// BOOL QueryWorkingSet( +// HANDLE hProcess, +// PVOID pv, +// DWORD cb +// ); +//sys QueryWorkingSet(handle windows.Handle, pv uintptr, cb uint32) (err error) = psapi.QueryWorkingSet + +type PSAPI_WORKING_SET_INFORMATION struct { + NumberOfEntries uintptr + WorkingSetInfo [1]PSAPI_WORKING_SET_BLOCK +} + +type PSAPI_WORKING_SET_BLOCK struct { + Flags uintptr +} diff --git a/internal/winapi/path.go b/internal/winapi/path.go index 0ae8f33ea6..908920e872 100644 --- a/internal/winapi/path.go +++ b/internal/winapi/path.go @@ -8,4 +8,4 @@ package winapi // LPWSTR lpBuffer, // LPWSTR *lpFilePart // ); -//sys SearchPath(lpPath *uint16, lpFileName *uint16, lpExtension *uint16, nBufferLength uint32, lpBuffer *uint16, lpFilePath **uint16) (size uint32, err error) = kernel32.SearchPathW +//sys SearchPath(lpPath *uint16, lpFileName *uint16, lpExtension *uint16, nBufferLength uint32, lpBuffer *uint16, lpFilePath *uint16) (size uint32, err error) = kernel32.SearchPathW diff --git a/internal/winapi/process.go b/internal/winapi/process.go index adf0168eae..b87068327c 100644 --- a/internal/winapi/process.go +++ b/internal/winapi/process.go @@ -1,3 +1,10 @@ package winapi const PROCESS_ALL_ACCESS uint32 = 2097151 + +// DWORD GetProcessImageFileNameW( +// HANDLE hProcess, +// LPWSTR lpImageFileName, +// DWORD nSize +// ); +//sys GetProcessImageFileName(hProcess windows.Handle, imageFileName *uint16, nSize uint32) (size uint32, err error) = kernel32.GetProcessImageFileNameW diff --git a/internal/winapi/thread.go b/internal/winapi/thread.go new file mode 100644 index 0000000000..2a1dac26ac --- /dev/null +++ b/internal/winapi/thread.go @@ -0,0 +1,3 @@ +package winapi + +//sys CreateRemoteThread(process windows.Handle, sa *windows.SecurityAttributes, stackSize uint32, startAddr uintptr, parameter uintptr, creationFlags uint32, threadID *uint32) (handle windows.Handle, err error) = kernel32.CreateRemoteThread diff --git a/internal/winapi/winapi.go b/internal/winapi/winapi.go index 77ea13e3e3..e783d61f66 100644 --- a/internal/winapi/winapi.go +++ b/internal/winapi/winapi.go @@ -2,4 +2,4 @@ // be thought of as an extension to golang.org/x/sys/windows. package winapi -//go:generate go run ..\..\mksyscall_windows.go -output zsyscall_windows.go net.go iocp.go jobobject.go path.go logon.go memory.go processor.go devices.go filesystem.go errors.go +//go:generate go run ..\..\mksyscall_windows.go -output zsyscall_windows.go net.go path.go thread.go iocp.go jobobject.go logon.go memory.go process.go processor.go devices.go filesystem.go errors.go diff --git a/internal/winapi/zsyscall_windows.go b/internal/winapi/zsyscall_windows.go index 3a54c1fa1b..8dd3d3e9bd 100644 --- a/internal/winapi/zsyscall_windows.go +++ b/internal/winapi/zsyscall_windows.go @@ -41,9 +41,12 @@ var ( modkernel32 = windows.NewLazySystemDLL("kernel32.dll") modntdll = windows.NewLazySystemDLL("ntdll.dll") modadvapi32 = windows.NewLazySystemDLL("advapi32.dll") + modpsapi = windows.NewLazySystemDLL("psapi.dll") modcfgmgr32 = windows.NewLazySystemDLL("cfgmgr32.dll") procSetJobCompartmentId = modiphlpapi.NewProc("SetJobCompartmentId") + procSearchPathW = modkernel32.NewProc("SearchPathW") + procCreateRemoteThread = modkernel32.NewProc("CreateRemoteThread") procGetQueuedCompletionStatus = modkernel32.NewProc("GetQueuedCompletionStatus") procIsProcessInJob = modkernel32.NewProc("IsProcessInJob") procQueryInformationJobObject = modkernel32.NewProc("QueryInformationJobObject") @@ -52,11 +55,12 @@ var ( procQueryIoRateControlInformationJobObject = modkernel32.NewProc("QueryIoRateControlInformationJobObject") procNtOpenJobObject = modntdll.NewProc("NtOpenJobObject") procNtCreateJobObject = modntdll.NewProc("NtCreateJobObject") - procSearchPathW = modkernel32.NewProc("SearchPathW") procLogonUserW = modadvapi32.NewProc("LogonUserW") procRtlMoveMemory = modkernel32.NewProc("RtlMoveMemory") procLocalAlloc = modkernel32.NewProc("LocalAlloc") procLocalFree = modkernel32.NewProc("LocalFree") + procQueryWorkingSet = modpsapi.NewProc("QueryWorkingSet") + procGetProcessImageFileNameW = modkernel32.NewProc("GetProcessImageFileNameW") procGetActiveProcessorCount = modkernel32.NewProc("GetActiveProcessorCount") procCM_Get_Device_ID_List_SizeA = modcfgmgr32.NewProc("CM_Get_Device_ID_List_SizeA") procCM_Get_Device_ID_ListA = modcfgmgr32.NewProc("CM_Get_Device_ID_ListA") @@ -77,6 +81,32 @@ func SetJobCompartmentId(handle windows.Handle, compartmentId uint32) (win32Err return } +func SearchPath(lpPath *uint16, lpFileName *uint16, lpExtension *uint16, nBufferLength uint32, lpBuffer *uint16, lpFilePath *uint16) (size uint32, err error) { + r0, _, e1 := syscall.Syscall6(procSearchPathW.Addr(), 6, uintptr(unsafe.Pointer(lpPath)), uintptr(unsafe.Pointer(lpFileName)), uintptr(unsafe.Pointer(lpExtension)), uintptr(nBufferLength), uintptr(unsafe.Pointer(lpBuffer)), uintptr(unsafe.Pointer(lpFilePath))) + size = uint32(r0) + if size == 0 { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = syscall.EINVAL + } + } + return +} + +func CreateRemoteThread(process windows.Handle, sa *windows.SecurityAttributes, stackSize uint32, startAddr uintptr, parameter uintptr, creationFlags uint32, threadID *uint32) (handle windows.Handle, err error) { + r0, _, e1 := syscall.Syscall9(procCreateRemoteThread.Addr(), 7, uintptr(process), uintptr(unsafe.Pointer(sa)), uintptr(stackSize), uintptr(startAddr), uintptr(parameter), uintptr(creationFlags), uintptr(unsafe.Pointer(threadID)), 0, 0) + handle = windows.Handle(r0) + if handle == 0 { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = syscall.EINVAL + } + } + return +} + func GetQueuedCompletionStatus(cphandle windows.Handle, qty *uint32, key *uintptr, overlapped **windows.Overlapped, timeout uint32) (err error) { r1, _, e1 := syscall.Syscall6(procGetQueuedCompletionStatus.Addr(), 5, uintptr(cphandle), uintptr(unsafe.Pointer(qty)), uintptr(unsafe.Pointer(key)), uintptr(unsafe.Pointer(overlapped)), uintptr(timeout), 0) if r1 == 0 { @@ -170,19 +200,6 @@ func NtCreateJobObject(jobHandle *windows.Handle, desiredAccess uint32, objAttri return } -func SearchPath(lpPath *uint16, lpFileName *uint16, lpExtension *uint16, nBufferLength uint32, lpBuffer *uint16, lpFilePath **uint16) (size uint32, err error) { - r0, _, e1 := syscall.Syscall6(procSearchPathW.Addr(), 6, uintptr(unsafe.Pointer(lpPath)), uintptr(unsafe.Pointer(lpFileName)), uintptr(unsafe.Pointer(lpExtension)), uintptr(nBufferLength), uintptr(unsafe.Pointer(lpBuffer)), uintptr(unsafe.Pointer(lpFilePath))) - size = uint32(r0) - if size == 0 { - if e1 != 0 { - err = errnoErr(e1) - } else { - err = syscall.EINVAL - } - } - return -} - func LogonUser(username *uint16, domain *uint16, password *uint16, logonType uint32, logonProvider uint32, token *windows.Token) (err error) { r1, _, e1 := syscall.Syscall6(procLogonUserW.Addr(), 6, uintptr(unsafe.Pointer(username)), uintptr(unsafe.Pointer(domain)), uintptr(unsafe.Pointer(password)), uintptr(logonType), uintptr(logonProvider), uintptr(unsafe.Pointer(token))) if r1 == 0 { @@ -218,6 +235,31 @@ func LocalFree(ptr uintptr) { return } +func QueryWorkingSet(handle windows.Handle, pv uintptr, cb uint32) (err error) { + r1, _, e1 := syscall.Syscall(procQueryWorkingSet.Addr(), 3, uintptr(handle), uintptr(pv), uintptr(cb)) + if r1 == 0 { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = syscall.EINVAL + } + } + return +} + +func GetProcessImageFileName(hProcess windows.Handle, imageFileName *uint16, nSize uint32) (size uint32, err error) { + r0, _, e1 := syscall.Syscall(procGetProcessImageFileNameW.Addr(), 3, uintptr(hProcess), uintptr(unsafe.Pointer(imageFileName)), uintptr(nSize)) + size = uint32(r0) + if size == 0 { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = syscall.EINVAL + } + } + return +} + func GetActiveProcessorCount(groupNumber uint16) (amount uint32) { r0, _, _ := syscall.Syscall(procGetActiveProcessorCount.Addr(), 1, uintptr(groupNumber), 0, 0) amount = uint32(r0) diff --git a/test/cri-containerd/jobcontainer_test.go b/test/cri-containerd/jobcontainer_test.go new file mode 100644 index 0000000000..59e6dcc3c0 --- /dev/null +++ b/test/cri-containerd/jobcontainer_test.go @@ -0,0 +1,87 @@ +// +build functional + +package cri_containerd + +import ( + "context" + "strings" + "testing" + "time" + + runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" +) + +func getJobContainerPodRequestWCOW(t *testing.T) *runtime.RunPodSandboxRequest { + return &runtime.RunPodSandboxRequest{ + Config: &runtime.PodSandboxConfig{ + Metadata: &runtime.PodSandboxMetadata{ + Name: t.Name(), + Namespace: testNamespace, + }, + Annotations: map[string]string{ + "microsoft.com/hostprocess-container": "true", + }, + }, + RuntimeHandler: wcowProcessRuntimeHandler, + } +} + +func getJobContainerRequestWCOW(t *testing.T, podID string, podConfig *runtime.PodSandboxConfig) *runtime.CreateContainerRequest { + return &runtime.CreateContainerRequest{ + Config: &runtime.ContainerConfig{ + Metadata: &runtime.ContainerMetadata{ + Name: t.Name() + "-Container", + }, + Image: &runtime.ImageSpec{ + Image: imageWindowsNanoserver, + }, + Command: []string{ + "cmd", + "/c", + "ping", + "-t", + "127.0.0.1", + }, + + Annotations: map[string]string{ + "microsoft.com/hostprocess": "true", + "microsoft.com/hostprocess-inherit-user": "true", + }, + }, + PodSandboxId: podID, + SandboxConfig: podConfig, + } +} + +func Test_RunContainer_InheritUser_JobContainer_WCOW(t *testing.T) { + requireFeatures(t, featureWCOWProcess, featureHostProcess) + + pullRequiredImages(t, []string{imageWindowsNanoserver}) + client := newTestRuntimeClient(t) + + username := "nt authority\\system" + podctx := context.Background() + sandboxRequest := getJobContainerPodRequestWCOW(t) + + podID := runPodSandbox(t, client, podctx, sandboxRequest) + defer removePodSandbox(t, client, podctx, podID) + defer stopPodSandbox(t, client, podctx, podID) + + containerRequest := getJobContainerRequestWCOW(t, podID, sandboxRequest.Config) + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) + defer cancel() + + containerID := createContainer(t, client, ctx, containerRequest) + defer removeContainer(t, client, ctx, containerID) + startContainer(t, client, ctx, containerID) + defer stopContainer(t, client, ctx, containerID) + + execResponse := execSync(t, client, ctx, &runtime.ExecSyncRequest{ + ContainerId: containerID, + Cmd: []string{"whoami"}, + }) + stdout := strings.Trim(string(execResponse.Stdout), " \r\n") + if !strings.Contains(stdout, username) { + t.Fatalf("expected user: '%s', got '%s'", username, stdout) + } +} diff --git a/test/cri-containerd/main.go b/test/cri-containerd/main.go index 08130aca83..6eab0e3153 100644 --- a/test/cri-containerd/main.go +++ b/test/cri-containerd/main.go @@ -75,6 +75,7 @@ const ( featureLCOW = "LCOW" featureWCOWProcess = "WCOWProcess" featureWCOWHypervisor = "WCOWHypervisor" + featureHostProcess = "HostProcess" featureGMSA = "GMSA" featureGPU = "GPU" ) @@ -83,6 +84,7 @@ var allFeatures = []string{ featureLCOW, featureWCOWProcess, featureWCOWHypervisor, + featureHostProcess, featureGMSA, featureGPU, }