From 6321a7ae93db4e3249ae7c64714327e34b2b493f Mon Sep 17 00:00:00 2001
From: Daniel Canter <dcanter@microsoft.com>
Date: Fri, 4 Dec 2020 17:29:35 -0800
Subject: [PATCH] Add job containers package

* Add `JobContainer` and `JobProcess` types as the two types to represent a job container
and a process in a job container.
* Add logic to find the executable being asked to run for a job container.
* Logic to launch the container as specific user.
* Logic to mount the containers scratch space on the host to a directory.
* Small subset of tests added to jobobject package

Signed-off-by: Daniel Canter <dcanter@microsoft.com>
---
 internal/hcsoci/hcsdoc_wcow.go           | 197 +++++-----
 internal/jobcontainers/cpurate_test.go   |  28 ++
 internal/jobcontainers/env.go            |  46 +++
 internal/jobcontainers/jobcontainer.go   | 449 +++++++++++++++++++++++
 internal/jobcontainers/logon.go          |  60 +++
 internal/jobcontainers/oci.go            |  72 ++++
 internal/jobcontainers/path.go           | 228 ++++++++++++
 internal/jobcontainers/path_test.go      |  53 +++
 internal/jobcontainers/process.go        | 233 ++++++++++++
 internal/jobcontainers/storage.go        |  93 +++++
 internal/jobobject/jobobject.go          |  27 +-
 internal/jobobject/jobobject_test.go     | 237 +++++++++++-
 internal/jobobject/limits.go             |  13 +-
 internal/layers/layers.go                |   2 +-
 internal/oci/uvm.go                      |   5 +
 internal/winapi/memory.go                |  16 +
 internal/winapi/path.go                  |   2 +-
 internal/winapi/process.go               |   7 +
 internal/winapi/thread.go                |   3 +
 internal/winapi/winapi.go                |   2 +-
 internal/winapi/zsyscall_windows.go      |  70 +++-
 test/cri-containerd/jobcontainer_test.go |  87 +++++
 test/cri-containerd/main.go              |   2 +
 23 files changed, 1811 insertions(+), 121 deletions(-)
 create mode 100644 internal/jobcontainers/cpurate_test.go
 create mode 100644 internal/jobcontainers/env.go
 create mode 100644 internal/jobcontainers/jobcontainer.go
 create mode 100644 internal/jobcontainers/logon.go
 create mode 100644 internal/jobcontainers/oci.go
 create mode 100644 internal/jobcontainers/path.go
 create mode 100644 internal/jobcontainers/path_test.go
 create mode 100644 internal/jobcontainers/process.go
 create mode 100644 internal/jobcontainers/storage.go
 create mode 100644 internal/winapi/thread.go
 create mode 100644 test/cri-containerd/jobcontainer_test.go

diff --git a/internal/hcsoci/hcsdoc_wcow.go b/internal/hcsoci/hcsdoc_wcow.go
index 09a9150103..439740fe9b 100644
--- a/internal/hcsoci/hcsdoc_wcow.go
+++ b/internal/hcsoci/hcsdoc_wcow.go
@@ -11,7 +11,6 @@ import (
 
 	"github.com/Microsoft/hcsshim/internal/layers"
 	"github.com/Microsoft/hcsshim/internal/log"
-	"github.com/Microsoft/hcsshim/internal/logfields"
 	"github.com/Microsoft/hcsshim/internal/oci"
 	"github.com/Microsoft/hcsshim/internal/processorinfo"
 	"github.com/Microsoft/hcsshim/internal/schema1"
@@ -20,6 +19,7 @@ import (
 	"github.com/Microsoft/hcsshim/internal/uvmfolder"
 	"github.com/Microsoft/hcsshim/internal/wclayer"
 	"github.com/Microsoft/hcsshim/osversion"
+	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/sirupsen/logrus"
 )
 
@@ -85,6 +85,55 @@ func createMountsConfig(ctx context.Context, coi *createOptionsInternal) (*mount
 	return &config, nil
 }
 
+// ConvertCPULimits handles the logic of converting and validating the containers CPU limits
+// specified in the OCI spec to what HCS expects.
+//
+// `cid` is the container's ID.
+//
+// `vmid` is the Utility VM's ID if the container we're constructing is going to belong to
+// one.
+//
+// `spec` is the OCI spec for the container.
+//
+// `maxCPUCount` is the maximum cpu count allowed for the container. This value should
+// be the number of processors on the host, or in the case of a hypervisor isolated container
+// the number of processors assigned to the guest/Utility VM.
+//
+// Returns the cpu count, cpu limit, and cpu weight in this order. Returns an error if more than one of
+// cpu count, cpu limit, or cpu weight was specified in the OCI spec as they are mutually
+// exclusive.
+func ConvertCPULimits(ctx context.Context, cid string, spec *specs.Spec, maxCPUCount int32) (int32, int32, int32, error) {
+	cpuNumSet := 0
+	cpuCount := oci.ParseAnnotationsCPUCount(ctx, spec, oci.AnnotationContainerProcessorCount, 0)
+	if cpuCount > 0 {
+		cpuNumSet++
+	}
+
+	cpuLimit := oci.ParseAnnotationsCPULimit(ctx, spec, oci.AnnotationContainerProcessorLimit, 0)
+	if cpuLimit > 0 {
+		cpuNumSet++
+	}
+
+	cpuWeight := oci.ParseAnnotationsCPUWeight(ctx, spec, oci.AnnotationContainerProcessorWeight, 0)
+	if cpuWeight > 0 {
+		cpuNumSet++
+	}
+
+	if cpuNumSet > 1 {
+		return 0, 0, 0, fmt.Errorf("invalid spec - Windows Container CPU Count: '%d', Limit: '%d', and Weight: '%d' are mutually exclusive", cpuCount, cpuLimit, cpuWeight)
+	} else if cpuNumSet == 1 {
+		if cpuCount > maxCPUCount {
+			log.G(ctx).WithFields(logrus.Fields{
+				"cid":       cid,
+				"requested": cpuCount,
+				"assigned":  maxCPUCount,
+			}).Warn("Changing user requested CPUCount to current number of processors")
+			cpuCount = maxCPUCount
+		}
+	}
+	return cpuCount, cpuLimit, cpuWeight, nil
+}
+
 // createWindowsContainerDocument creates documents for passing to HCS or GCS to create
 // a container, both hosted and process isolated. It creates both v1 and v2
 // container objects, WCOW only. The containers storage should have been mounted already.
@@ -122,100 +171,74 @@ func createWindowsContainerDocument(ctx context.Context, coi *createOptionsInter
 		v2Container.GuestOs = &hcsschema.GuestOs{HostName: coi.Spec.Hostname}
 	}
 
-	// CPU Resources
-	cpuNumSet := 0
-	cpuCount := oci.ParseAnnotationsCPUCount(ctx, coi.Spec, oci.AnnotationContainerProcessorCount, 0)
-	if cpuCount > 0 {
-		cpuNumSet++
-	}
+	var (
+		uvmCPUCount  int32
+		hostCPUCount = processorinfo.ProcessorCount()
+		maxCPUCount  = hostCPUCount
+	)
 
-	cpuLimit := oci.ParseAnnotationsCPULimit(ctx, coi.Spec, oci.AnnotationContainerProcessorLimit, 0)
-	if cpuLimit > 0 {
-		cpuNumSet++
+	if coi.HostingSystem != nil {
+		uvmCPUCount = coi.HostingSystem.ProcessorCount()
+		maxCPUCount = uvmCPUCount
 	}
 
-	cpuWeight := oci.ParseAnnotationsCPUWeight(ctx, coi.Spec, oci.AnnotationContainerProcessorWeight, 0)
-	if cpuWeight > 0 {
-		cpuNumSet++
+	cpuCount, cpuLimit, cpuWeight, err := ConvertCPULimits(ctx, coi.ID, coi.Spec, maxCPUCount)
+	if err != nil {
+		return nil, nil, err
 	}
 
-	if cpuNumSet > 1 {
-		return nil, nil, fmt.Errorf("invalid spec - Windows Process Container CPU Count: '%d', Limit: '%d', and Weight: '%d' are mutually exclusive", cpuCount, cpuLimit, cpuWeight)
-	} else if cpuNumSet == 1 {
-		hostCPUCount := processorinfo.ProcessorCount()
-		// usableCPUCount is the number of processors present in whatever environment
-		// the container is running in. It will be either the processor count of the
-		// host, or the UVM, based on if the container is process or hypervisor isolated.
-		usableCPUCount := hostCPUCount
-		var uvmCPUCount int32
-		if coi.HostingSystem != nil {
-			uvmCPUCount = coi.HostingSystem.ProcessorCount()
-			usableCPUCount = uvmCPUCount
-		}
-		if cpuCount > usableCPUCount {
-			l := log.G(ctx).WithField(logfields.ContainerID, coi.ID)
-			if coi.HostingSystem != nil {
-				l.Data[logfields.UVMID] = coi.HostingSystem.ID()
-			}
-			l.WithFields(logrus.Fields{
-				"requested": cpuCount,
-				"assigned":  usableCPUCount,
-			}).Warn("Changing user requested CPUCount to current number of processors")
-			cpuCount = usableCPUCount
-		}
-		if coi.ScaleCPULimitsToSandbox && cpuLimit > 0 && coi.HostingSystem != nil {
-			// When ScaleCPULimitsToSandbox is set and we are running in a UVM, we assume
-			// the CPU limit has been calculated based on the number of processors on the
-			// host, and instead re-calculate it based on the number of processors in the UVM.
-			//
-			// This is needed to work correctly with assumptions kubelet makes when computing
-			// the CPU limit value:
-			// - kubelet thinks about CPU limits in terms of millicores, which are 1000ths of
-			//   cores. So if 2000 millicores are assigned, the container can use 2 processors.
-			// - In Windows, the job object CPU limit is global across all processors on the
-			//   system, and is represented as a fraction out of 10000. In this model, a limit
-			//   of 10000 means the container can use all processors fully, regardless of how
-			//   many processors exist on the system.
-			// - To convert the millicores value into the job object limit, kubelet divides
-			//   the millicores by the number of CPU cores on the host. This causes problems
-			//   when running inside a UVM, as the UVM may have a different number of processors
-			//   than the host system.
-			//
-			// To work around this, we undo the division by the number of host processors, and
-			// re-do the division based on the number of processors inside the UVM. This will
-			// give the correct value based on the actual number of millicores that the kubelet
-			// wants the container to have.
-			//
-			// Kubelet formula to compute CPU limit:
-			// cpuMaximum := 10000 * cpuLimit.MilliValue() / int64(runtime.NumCPU()) / 1000
-			newCPULimit := cpuLimit * hostCPUCount / uvmCPUCount
-			// We only apply bounds here because we are calculating the CPU limit ourselves,
-			// and this matches the kubelet behavior where they also bound the CPU limit by [1, 10000].
-			// In the case where we use the value directly from the user, we don't alter it to fit
-			// within the bounds, but just let the platform throw an error if it is invalid.
-			if newCPULimit < 1 {
-				newCPULimit = 1
-			} else if newCPULimit > 10000 {
-				newCPULimit = 10000
-			}
-			log.G(ctx).WithFields(logrus.Fields{
-				"hostCPUCount": hostCPUCount,
-				"uvmCPUCount":  uvmCPUCount,
-				"oldCPULimit":  cpuLimit,
-				"newCPULimit":  newCPULimit,
-			}).Info("rescaling CPU limit for UVM sandbox")
-			cpuLimit = newCPULimit
+	if coi.HostingSystem != nil && coi.ScaleCPULimitsToSandbox && cpuLimit > 0 {
+		// When ScaleCPULimitsToSandbox is set and we are running in a UVM, we assume
+		// the CPU limit has been calculated based on the number of processors on the
+		// host, and instead re-calculate it based on the number of processors in the UVM.
+		//
+		// This is needed to work correctly with assumptions kubelet makes when computing
+		// the CPU limit value:
+		// - kubelet thinks about CPU limits in terms of millicores, which are 1000ths of
+		//   cores. So if 2000 millicores are assigned, the container can use 2 processors.
+		// - In Windows, the job object CPU limit is global across all processors on the
+		//   system, and is represented as a fraction out of 10000. In this model, a limit
+		//   of 10000 means the container can use all processors fully, regardless of how
+		//   many processors exist on the system.
+		// - To convert the millicores value into the job object limit, kubelet divides
+		//   the millicores by the number of CPU cores on the host. This causes problems
+		//   when running inside a UVM, as the UVM may have a different number of processors
+		//   than the host system.
+		//
+		// To work around this, we undo the division by the number of host processors, and
+		// re-do the division based on the number of processors inside the UVM. This will
+		// give the correct value based on the actual number of millicores that the kubelet
+		// wants the container to have.
+		//
+		// Kubelet formula to compute CPU limit:
+		// cpuMaximum := 10000 * cpuLimit.MilliValue() / int64(runtime.NumCPU()) / 1000
+		newCPULimit := cpuLimit * hostCPUCount / uvmCPUCount
+		// We only apply bounds here because we are calculating the CPU limit ourselves,
+		// and this matches the kubelet behavior where they also bound the CPU limit by [1, 10000].
+		// In the case where we use the value directly from the user, we don't alter it to fit
+		// within the bounds, but just let the platform throw an error if it is invalid.
+		if newCPULimit < 1 {
+			newCPULimit = 1
+		} else if newCPULimit > 10000 {
+			newCPULimit = 10000
 		}
+		log.G(ctx).WithFields(logrus.Fields{
+			"hostCPUCount": hostCPUCount,
+			"uvmCPUCount":  uvmCPUCount,
+			"oldCPULimit":  cpuLimit,
+			"newCPULimit":  newCPULimit,
+		}).Info("rescaling CPU limit for UVM sandbox")
+		cpuLimit = newCPULimit
+	}
 
-		v1.ProcessorCount = uint32(cpuCount)
-		v1.ProcessorMaximum = int64(cpuLimit)
-		v1.ProcessorWeight = uint64(cpuWeight)
+	v1.ProcessorCount = uint32(cpuCount)
+	v1.ProcessorMaximum = int64(cpuLimit)
+	v1.ProcessorWeight = uint64(cpuWeight)
 
-		v2Container.Processor = &hcsschema.Processor{
-			Count:   cpuCount,
-			Maximum: cpuLimit,
-			Weight:  cpuWeight,
-		}
+	v2Container.Processor = &hcsschema.Processor{
+		Count:   cpuCount,
+		Maximum: cpuLimit,
+		Weight:  cpuWeight,
 	}
 
 	// Memory Resources
diff --git a/internal/jobcontainers/cpurate_test.go b/internal/jobcontainers/cpurate_test.go
new file mode 100644
index 0000000000..486b42f783
--- /dev/null
+++ b/internal/jobcontainers/cpurate_test.go
@@ -0,0 +1,28 @@
+package jobcontainers
+
+import (
+	"testing"
+)
+
+func assertEqual(t *testing.T, a uint32, b uint32) {
+	if a != b {
+		t.Fatalf("%d != %d", a, b)
+	}
+}
+
+func TestJobCPURate(t *testing.T) {
+	rate := calculateJobCPURate(10, 1)
+	assertEqual(t, rate, 1000)
+
+	rate = calculateJobCPURate(10, 5)
+	assertEqual(t, rate, 5000)
+
+	rate = calculateJobCPURate(20, 5)
+	assertEqual(t, rate, 2500)
+
+	rate = calculateJobCPURate(1, 1)
+	assertEqual(t, rate, 10000)
+
+	rate = calculateJobCPURate(1, 0)
+	assertEqual(t, rate, 1)
+}
diff --git a/internal/jobcontainers/env.go b/internal/jobcontainers/env.go
new file mode 100644
index 0000000000..ed1017fbf2
--- /dev/null
+++ b/internal/jobcontainers/env.go
@@ -0,0 +1,46 @@
+package jobcontainers
+
+import (
+	"unicode/utf16"
+	"unsafe"
+
+	"github.com/pkg/errors"
+	"golang.org/x/sys/windows"
+)
+
+// defaultEnvBlock will return a new environment block in the context of the user token
+// `token`.
+//
+// This is almost a direct copy of the go stdlib implementation with some slight changes
+// to force a valid token to be passed.
+// https://github.com/golang/go/blob/f21be2fdc6f1becdbed1592ea0b245cdeedc5ac8/src/internal/syscall/execenv/execenv_windows.go#L24
+func defaultEnvBlock(token windows.Token) (env []string, err error) {
+	if token == 0 {
+		return nil, errors.New("invalid token for creating environment block")
+	}
+
+	var block *uint16
+	if err := windows.CreateEnvironmentBlock(&block, token, false); err != nil {
+		return nil, err
+	}
+	defer windows.DestroyEnvironmentBlock(block)
+
+	blockp := uintptr(unsafe.Pointer(block))
+	for {
+		// find NUL terminator
+		end := unsafe.Pointer(blockp)
+		for *(*uint16)(end) != 0 {
+			end = unsafe.Pointer(uintptr(end) + 2)
+		}
+
+		n := (uintptr(end) - uintptr(unsafe.Pointer(blockp))) / 2
+		if n == 0 {
+			// environment block ends with empty string
+			break
+		}
+		entry := (*[(1 << 30) - 1]uint16)(unsafe.Pointer(blockp))[:n:n]
+		env = append(env, string(utf16.Decode(entry)))
+		blockp += 2 * (uintptr(len(entry)) + 1)
+	}
+	return
+}
diff --git a/internal/jobcontainers/jobcontainer.go b/internal/jobcontainers/jobcontainer.go
new file mode 100644
index 0000000000..c4f589357b
--- /dev/null
+++ b/internal/jobcontainers/jobcontainer.go
@@ -0,0 +1,449 @@
+package jobcontainers
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"os/exec"
+	"regexp"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+
+	"github.com/Microsoft/go-winio/pkg/guid"
+	"github.com/Microsoft/hcsshim/internal/cow"
+	"github.com/Microsoft/hcsshim/internal/hcs"
+	"github.com/Microsoft/hcsshim/internal/jobobject"
+	"github.com/Microsoft/hcsshim/internal/layers"
+	"github.com/Microsoft/hcsshim/internal/log"
+	"github.com/Microsoft/hcsshim/internal/queue"
+	"github.com/Microsoft/hcsshim/internal/schema1"
+	hcsschema "github.com/Microsoft/hcsshim/internal/schema2"
+	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
+	"golang.org/x/sys/windows"
+)
+
+// Split arguments but ignore spaces in quotes.
+//
+// For example instead of:
+// "\"Hello good\" morning world" --> ["\"Hello", "good\"", "morning", "world"]
+// we get ["\"Hello good\"", "morning", "world"]
+func splitArgs(cmdLine string) []string {
+	r := regexp.MustCompile(`[^\s"]+|"([^"]*)"`)
+	return r.FindAllString(cmdLine, -1)
+}
+
+// Convert environment map to a slice of environment variables in the form [Key1=val1, key2=val2]
+func envMapToSlice(m map[string]string) []string {
+	var s []string
+	for k, v := range m {
+		s = append(s, k+"="+v)
+	}
+	return s
+}
+
+const (
+	jobContainerNameFmt = "JobContainer_%s"
+	// Environment variable set in every process in the job detailing where the containers volume
+	// is mounted on the host.
+	sandboxMountPointEnvVar = "CONTAINER_SANDBOX_MOUNT_POINT"
+)
+
+type initProc struct {
+	initDoOnce sync.Once
+	proc       *JobProcess
+	initBlock  chan struct{}
+}
+
+// JobContainer represents a lightweight container composed from a job object.
+type JobContainer struct {
+	id             string
+	spec           *specs.Spec          // OCI spec used to create the container
+	job            *jobobject.JobObject // Object representing the job object the container owns
+	sandboxMount   string               // Path to where the sandbox is mounted on the host
+	m              sync.Mutex
+	closedWaitOnce sync.Once
+	init           initProc
+	startTimestamp time.Time
+	exited         chan struct{}
+	waitBlock      chan struct{}
+	waitError      error
+}
+
+var _ cow.ProcessHost = &JobContainer{}
+var _ cow.Container = &JobContainer{}
+
+func newJobContainer(id string, s *specs.Spec) *JobContainer {
+	return &JobContainer{
+		id:        id,
+		spec:      s,
+		waitBlock: make(chan struct{}),
+		exited:    make(chan struct{}),
+		init:      initProc{initBlock: make(chan struct{})},
+	}
+}
+
+// Create creates a new JobContainer from `s`.
+func Create(ctx context.Context, id string, s *specs.Spec) (_ cow.Container, err error) {
+	log.G(ctx).WithField("id", id).Debug("Creating job container")
+
+	if s == nil {
+		return nil, errors.New("Spec must be supplied")
+	}
+
+	if id == "" {
+		g, err := guid.NewV4()
+		if err != nil {
+			return nil, err
+		}
+		id = g.String()
+	}
+
+	if err := mountLayers(ctx, s); err != nil {
+		return nil, errors.Wrap(err, "failed to mount container layers")
+	}
+
+	volumeGUIDRegex := `^\\\\\?\\(Volume)\{{0,1}[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}(\}){0,1}\}(|\\)$`
+	if matched, err := regexp.MatchString(volumeGUIDRegex, s.Root.Path); !matched || err != nil {
+		return nil, fmt.Errorf(`invalid container spec - Root.Path '%s' must be a volume GUID path in the format '\\?\Volume{GUID}\'`, s.Root.Path)
+	}
+	if s.Root.Path[len(s.Root.Path)-1] != '\\' {
+		s.Root.Path += `\` // Be nice to clients and make sure well-formed for back-compat
+	}
+
+	container := newJobContainer(id, s)
+
+	// Create the job object all processes will run in.
+	options := &jobobject.Options{
+		Name:          fmt.Sprintf(jobContainerNameFmt, id),
+		Notifications: true,
+	}
+	job, err := jobobject.Create(ctx, options)
+	if err != nil {
+		return nil, errors.Wrap(err, "failed to create job object")
+	}
+
+	// Parity with how we handle process isolated containers. We set the same flag which
+	// behaves the same way for a silo.
+	if err := job.SetTerminateOnLastHandleClose(); err != nil {
+		return nil, errors.Wrap(err, "failed to set terminate on last handle close on job container")
+	}
+	container.job = job
+
+	var path string
+	defer func() {
+		if err != nil {
+			container.Close()
+			if path != "" {
+				removeSandboxMountPoint(ctx, path)
+			}
+		}
+	}()
+
+	limits, err := specToLimits(ctx, id, s)
+	if err != nil {
+		return nil, errors.Wrap(err, "failed to convert OCI spec to job object limits")
+	}
+
+	// Set resource limits on the job object based off of oci spec.
+	if err := job.SetResourceLimits(limits); err != nil {
+		return nil, errors.Wrap(err, "failed to set resource limits")
+	}
+
+	// Setup directory sandbox volume will be mounted
+	sandboxPath := fmt.Sprintf(sandboxMountFormat, id)
+	if _, err := os.Stat(sandboxPath); os.IsNotExist(err) {
+		if err := os.MkdirAll(sandboxPath, 0777); err != nil {
+			return nil, errors.Wrap(err, "failed to create mounted folder")
+		}
+	}
+	path = sandboxPath
+
+	if err := mountSandboxVolume(ctx, path, s.Root.Path); err != nil {
+		return nil, errors.Wrap(err, "failed to bind payload directory on host")
+	}
+
+	container.sandboxMount = path
+	go container.waitBackground(ctx)
+	return container, nil
+}
+
+// CreateProcess creates a process on the host, starts it, adds it to the containers
+// job object and then waits for exit.
+func (c *JobContainer) CreateProcess(ctx context.Context, config interface{}) (_ cow.Process, err error) {
+	conf, ok := config.(*hcsschema.ProcessParameters)
+	if !ok {
+		return nil, errors.New("unsupported process config passed in")
+	}
+
+	if conf.EmulateConsole {
+		return nil, errors.New("console emulation not supported for job containers")
+	}
+
+	absPath, commandLine, err := getApplicationName(conf.CommandLine, c.sandboxMount, os.Getenv("PATH"))
+	if err != nil {
+		return nil, errors.Wrapf(err, "failed to get application name from commandline %q", conf.CommandLine)
+	}
+
+	commandLine = strings.ReplaceAll(commandLine, "%"+sandboxMountPointEnvVar+"%", c.sandboxMount)
+	commandLine = strings.ReplaceAll(commandLine, "$env:"+sandboxMountPointEnvVar, c.sandboxMount)
+
+	var token windows.Token
+	if getUserTokenInheritAnnotation(c.spec.Annotations) {
+		token, err = openCurrentProcessToken()
+		if err != nil {
+			return nil, err
+		}
+	} else {
+		token, err = processToken(conf.User)
+		if err != nil {
+			return nil, errors.Wrap(err, "failed to create user process token")
+		}
+	}
+	defer token.Close()
+
+	env, err := defaultEnvBlock(token)
+	if err != nil {
+		return nil, errors.Wrap(err, "failed to get default environment block")
+	}
+	env = append(env, envMapToSlice(conf.Environment)...)
+	env = append(env, sandboxMountPointEnvVar+"="+c.sandboxMount)
+
+	cmd := &exec.Cmd{
+		Env:  env,
+		Dir:  c.sandboxMount,
+		Path: absPath,
+		Args: splitArgs(commandLine),
+		SysProcAttr: &syscall.SysProcAttr{
+			CreationFlags: windows.CREATE_NEW_PROCESS_GROUP,
+			Token:         syscall.Token(token),
+		},
+	}
+	process := newProcess(cmd)
+
+	// Create process pipes if asked for.
+	if conf.CreateStdInPipe {
+		stdin, err := process.cmd.StdinPipe()
+		if err != nil {
+			return nil, errors.Wrap(err, "failed to create stdin pipe")
+		}
+		process.stdin = stdin
+	}
+
+	if conf.CreateStdOutPipe {
+		stdout, err := process.cmd.StdoutPipe()
+		if err != nil {
+			return nil, errors.Wrap(err, "failed to create stdout pipe")
+		}
+		process.stdout = stdout
+	}
+
+	if conf.CreateStdErrPipe {
+		stderr, err := process.cmd.StderrPipe()
+		if err != nil {
+			return nil, errors.Wrap(err, "failed to create stderr pipe")
+		}
+		process.stderr = stderr
+	}
+
+	defer func() {
+		if err != nil {
+			process.Close()
+		}
+	}()
+
+	if err = process.Start(); err != nil {
+		return nil, errors.Wrap(err, "failed to start host process")
+	}
+
+	if err = c.job.Assign(uint32(process.Pid())); err != nil {
+		return nil, errors.Wrap(err, "failed to assign process to job object")
+	}
+
+	// Assign the first process made as the init process of the container.
+	c.init.initDoOnce.Do(func() {
+		c.init.proc = process
+		close(c.init.initBlock)
+	})
+
+	// Wait for process exit
+	go c.pollJobMsgs(ctx)
+	go process.waitBackground(ctx)
+	return process, nil
+}
+
+func (c *JobContainer) Modify(ctx context.Context, config interface{}) (err error) {
+	return errors.New("modify not supported for job containers")
+}
+
+// Release unmounts all of the container layers. Safe to call multiple times, if no storage
+// is mounted this call will just return nil.
+func (c *JobContainer) Release(ctx context.Context) error {
+	c.m.Lock()
+	defer c.m.Unlock()
+
+	log.G(ctx).WithFields(logrus.Fields{
+		"id":   c.id,
+		"path": c.sandboxMount,
+	}).Warn("removing sandbox volume mount")
+
+	if c.sandboxMount != "" {
+		if err := removeSandboxMountPoint(ctx, c.sandboxMount); err != nil {
+			return errors.Wrap(err, "failed to remove sandbox volume mount path")
+		}
+		if err := layers.UnmountContainerLayers(ctx, c.spec.Windows.LayerFolders, "", nil, layers.UnmountOperationAll); err != nil {
+			return errors.Wrap(err, "failed to unmount container layers")
+		}
+		c.sandboxMount = ""
+	}
+	return nil
+}
+
+// Start starts the container. There's nothing to "start" for job containers, so this just
+// sets the start timestamp.
+func (c *JobContainer) Start(ctx context.Context) error {
+	c.startTimestamp = time.Now()
+	return nil
+}
+
+// Close closes any open handles.
+func (c *JobContainer) Close() error {
+	if err := c.job.Close(); err != nil {
+		return err
+	}
+	c.closedWaitOnce.Do(func() {
+		c.waitError = hcs.ErrAlreadyClosed
+		close(c.waitBlock)
+	})
+	return nil
+}
+
+// ID returns the ID of the container. This is the name used to create the job object.
+func (c *JobContainer) ID() string {
+	return c.id
+}
+
+// Shutdown gracefully shuts down the container.
+func (c *JobContainer) Shutdown(ctx context.Context) error {
+	log.G(ctx).WithField("id", c.id).Debug("shutting down job container")
+
+	ctx, cancel := context.WithTimeout(ctx, time.Second*5)
+	defer cancel()
+	return c.shutdown(ctx)
+}
+
+// shutdown will loop through all the pids in the container and send a signal to exit.
+// If there are no processes in the container it will early return nil.
+// If the all processes exited message is not received within the context timeout set, it will
+// terminate the job.
+func (c *JobContainer) shutdown(ctx context.Context) error {
+	pids, err := c.job.Pids()
+	if err != nil {
+		return errors.Wrap(err, "failed to get pids in container")
+	}
+
+	if len(pids) == 0 {
+		return nil
+	}
+
+	for _, pid := range pids {
+		// If any process can't be signaled just wait until the timeout hits
+		if err := signalProcess(pid, windows.CTRL_SHUTDOWN_EVENT); err != nil {
+			log.G(ctx).WithField("pid", pid).Error("failed to signal process in job container")
+		}
+	}
+
+	select {
+	case <-c.exited:
+	case <-ctx.Done():
+		return c.Terminate(ctx)
+	}
+	return nil
+}
+
+// PropertiesV2 returns properties relating to the job container. This is an HCS construct but
+// to adhere to the interface for containers on Windows it is partially implemented. The only
+// supported property is schema2.PTStatistics.
+func (c *JobContainer) PropertiesV2(ctx context.Context, types ...hcsschema.PropertyType) (*hcsschema.Properties, error) {
+	return nil, errors.New("`PropertiesV2` call is not implemented for job containers")
+}
+
+// Properties is not implemented for job containers. This is just to satisfy the cow.Container interface.
+func (c *JobContainer) Properties(ctx context.Context, types ...schema1.PropertyType) (*schema1.ContainerProperties, error) {
+	return nil, errors.New("`Properties` call is not implemented for job containers")
+}
+
+// Terminate terminates the job object (kills every process in the job).
+func (c *JobContainer) Terminate(ctx context.Context) error {
+	log.G(ctx).WithField("id", c.id).Debug("terminating job container")
+
+	if err := c.job.Terminate(1); err != nil {
+		return errors.Wrap(err, "failed to terminate job container")
+	}
+	return nil
+}
+
+// Wait synchronously waits for the container to shutdown or terminate. If
+// the container has already exited returns the previous error (if any).
+func (c *JobContainer) Wait() error {
+	<-c.waitBlock
+	return c.waitError
+}
+
+func (c *JobContainer) waitBackground(ctx context.Context) {
+	// Wait for there to be an init process assigned.
+	<-c.init.initBlock
+
+	// Once the init process finishes, if there's any other processes in the container we need to signal
+	// them to exit.
+	<-c.init.proc.waitBlock
+
+	ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
+	defer cancel()
+	if err := c.Shutdown(ctx); err != nil {
+		c.Terminate(ctx)
+	}
+
+	c.closedWaitOnce.Do(func() {
+		c.waitError = c.init.proc.waitError
+		close(c.waitBlock)
+	})
+}
+
+// Polls for notifications from the job objects assigned IO completion port.
+func (c *JobContainer) pollJobMsgs(ctx context.Context) {
+	for {
+		notif, err := c.job.PollNotification()
+		if err != nil {
+			// Queues closed or we somehow aren't registered to receive notifications. There won't be
+			// any notifications arriving so we're safe to return.
+			if err == queue.ErrQueueClosed || err == jobobject.ErrNotRegistered {
+				return
+			}
+			log.G(ctx).WithError(err).Warn("error while polling for job container notification")
+		}
+
+		switch msg := notif.(type) {
+		// All processes have exited. Close the waitblock so we can cleanup and then return.
+		case jobobject.MsgAllProcessesExited:
+			close(c.exited)
+			return
+		case jobobject.MsgUnimplemented:
+		default:
+			log.G(ctx).WithField("message", msg).Warn("unknown job object notification encountered")
+		}
+	}
+}
+
+// IsOCI - Just to satisfy the cow.ProcessHost interface. Follow the WCOW behavior
+func (c *JobContainer) IsOCI() bool {
+	return false
+}
+
+// OS returns the operating system name as a string. This should always be windows.
+func (c *JobContainer) OS() string {
+	return "windows"
+}
diff --git a/internal/jobcontainers/logon.go b/internal/jobcontainers/logon.go
new file mode 100644
index 0000000000..01ac968c04
--- /dev/null
+++ b/internal/jobcontainers/logon.go
@@ -0,0 +1,60 @@
+package jobcontainers
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/Microsoft/hcsshim/internal/winapi"
+	"github.com/pkg/errors"
+	"golang.org/x/sys/windows"
+)
+
+// processToken returns a user token for the user specified by `user`. This should be in the form
+// of either a DOMAIN\username or just username.
+func processToken(user string) (windows.Token, error) {
+	var (
+		domain   string
+		userName string
+		token    windows.Token
+	)
+
+	split := strings.Split(user, "\\")
+	if len(split) == 2 {
+		domain = split[0]
+		userName = split[1]
+	} else if len(split) == 1 {
+		userName = split[0]
+	} else {
+		return 0, fmt.Errorf("invalid user string `%s`", user)
+	}
+
+	if user == "" {
+		return 0, errors.New("empty user string passed")
+	}
+
+	logonType := winapi.LOGON32_LOGON_INTERACTIVE
+	// User asking to run as a local system account (NETWORK SERVICE, LOCAL SERVICE, SYSTEM)
+	if domain == "NT AUTHORITY" {
+		logonType = winapi.LOGON32_LOGON_SERVICE
+	}
+
+	if err := winapi.LogonUser(
+		windows.StringToUTF16Ptr(userName),
+		windows.StringToUTF16Ptr(domain),
+		nil,
+		logonType,
+		winapi.LOGON32_PROVIDER_DEFAULT,
+		&token,
+	); err != nil {
+		return 0, errors.Wrap(err, "failed to logon user")
+	}
+	return token, nil
+}
+
+func openCurrentProcessToken() (windows.Token, error) {
+	var token windows.Token
+	if err := windows.OpenProcessToken(windows.CurrentProcess(), windows.TOKEN_ALL_ACCESS, &token); err != nil {
+		return 0, errors.Wrap(err, "failed to open current process token")
+	}
+	return token, nil
+}
diff --git a/internal/jobcontainers/oci.go b/internal/jobcontainers/oci.go
new file mode 100644
index 0000000000..d78adfe43a
--- /dev/null
+++ b/internal/jobcontainers/oci.go
@@ -0,0 +1,72 @@
+package jobcontainers
+
+import (
+	"context"
+
+	"github.com/Microsoft/hcsshim/internal/hcsoci"
+	"github.com/Microsoft/hcsshim/internal/processorinfo"
+
+	"github.com/Microsoft/hcsshim/internal/jobobject"
+
+	"github.com/Microsoft/hcsshim/internal/oci"
+	specs "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+// This file contains helpers for converting parts of the oci spec to useful
+// structures/limits to be applied to a job object.
+func calculateJobCPUWeight(processorWeight uint32) uint32 {
+	if processorWeight == 0 {
+		return 0
+	}
+	return 1 + uint32((8*processorWeight)/jobobject.CPUWeightMax)
+}
+
+func calculateJobCPURate(hostProcs uint32, processorCount uint32) uint32 {
+	rate := (processorCount * 10000) / hostProcs
+	if rate == 0 {
+		return 1
+	}
+	return rate
+}
+
+func getUserTokenInheritAnnotation(annotations map[string]string) bool {
+	val, ok := annotations[oci.AnnotationHostProcessInheritUser]
+	return ok && val == "true"
+}
+
+// Oci spec to job object limit information. Will do any conversions to job object specific values from
+// their respective OCI representations. E.g. we convert CPU count into the correct job object cpu
+// rate value internally.
+func specToLimits(ctx context.Context, cid string, s *specs.Spec) (*jobobject.JobLimits, error) {
+	hostCPUCount := processorinfo.ProcessorCount()
+	cpuCount, cpuLimit, cpuWeight, err := hcsoci.ConvertCPULimits(ctx, cid, s, hostCPUCount)
+	if err != nil {
+		return nil, err
+	}
+
+	realCPULimit, realCPUWeight := uint32(cpuLimit), uint32(cpuWeight)
+	if cpuCount != 0 {
+		// Job object API does not support "CPU count". Instead, we translate the notion of "count" into
+		// CPU limit, which represents the amount of the host system's processors that the job can use to
+		// a percentage times 100. For example, to let the job use 20% of the available LPs the rate would
+		// be 20 times 100, or 2,000.
+		realCPULimit = calculateJobCPURate(uint32(hostCPUCount), uint32(cpuCount))
+	} else if cpuWeight != 0 {
+		realCPUWeight = calculateJobCPUWeight(realCPUWeight)
+	}
+
+	// Memory limit
+	memLimitMB := oci.ParseAnnotationsMemory(ctx, s, oci.AnnotationContainerMemorySizeInMB, 0)
+
+	// IO limits
+	maxBandwidth := int64(oci.ParseAnnotationsStorageBps(ctx, s, oci.AnnotationContainerStorageQoSBandwidthMaximum, 0))
+	maxIops := int64(oci.ParseAnnotationsStorageIops(ctx, s, oci.AnnotationContainerStorageQoSIopsMaximum, 0))
+
+	return &jobobject.JobLimits{
+		CPULimit:           realCPULimit,
+		CPUWeight:          realCPUWeight,
+		MaxIOPS:            maxIops,
+		MaxBandwidth:       maxBandwidth,
+		MemoryLimitInBytes: memLimitMB * 1024 * 1024,
+	}, nil
+}
diff --git a/internal/jobcontainers/path.go b/internal/jobcontainers/path.go
new file mode 100644
index 0000000000..ba68535c3d
--- /dev/null
+++ b/internal/jobcontainers/path.go
@@ -0,0 +1,228 @@
+package jobcontainers
+
+import (
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/Microsoft/hcsshim/internal/winapi"
+	"github.com/pkg/errors"
+	"golang.org/x/sys/windows"
+)
+
+// This file emulates the path resolution logic that is used for launching regular
+// process and hypervisor isolated Windows containers.
+
+// getApplicationName resolves a given command line string and returns the path to the executable that should be launched, and
+// an adjusted commandline if needed. The resolution logic may appear overcomplicated but is designed to match the logic used by
+// standard Windows containers, as well as that used by CreateProcess (see notes for the lpApplicationName parameter).
+//
+// The logic follows this set of steps:
+// - Construct a list of searchable paths to find the application. This includes the standard Windows system paths
+// which are generally located at C:\Windows, C:\Windows\System32 and C:\Windows\System. If a working directory or path is specified
+// via the `workingDirectory` or `pathEnv` parameters then these will be appended to the paths to search from as well. The
+// searching logic is handled by the Windows API function `SearchPathW` which accepts a semicolon separated list of paths to search
+// in.
+// https://docs.microsoft.com/en-us/windows/win32/api/processenv/nf-processenv-searchpathw
+//
+// - If the commandline is quoted, simply grab whatever is in the quotes and search for this directly.
+// We don't try any other logic here, if the application can't be found from the quoted contents we return an error.
+//
+// - If the commandline is not quoted, we iterate over each possible application name by splitting the arguments and iterating
+// over them one by one while appending the last search each time until we either find a match or don't and return
+// an error. If we don't find the application on the first try, this means that the application name has a space in it
+// and we must adjust the commandline to add quotes around the application name.
+//
+// - If the application is found, we return the fullpath to the executable and the adjusted commandline (if needed).
+//
+// Examples:
+// - Input: "C:\Program Files\sub dir\program name"
+//  Search order:
+//  - C:\Program.exe
+//  - C:\Program Files\sub.exe
+//  - C:\Program Files\sub dir\program.exe
+//  - C:\Program Files\sub dir\program name.exe
+//  Returned commandline: "\"C:\Program Files\sub dir\program name\""
+//
+// - Input: "\"program name\""
+//  Search order:
+//  - program name.exe
+//  Returned commandline: "\"program name\"
+//
+// - Input: "\"program name\" -flags -for -program"
+//  Search order:
+//  - program.exe
+//  - program name.exe
+//  Returned commandline: "\"program name\" -flags -for -program"
+//
+// - Input: "\"C:\path\to\program name\""
+//  Search Order:
+//  - "C:\path\to\program name.exe"
+//  Returned commandline: "\"C:\path\to\program name""
+//
+// - Input: "C:\path\to\program"
+//  Search Order:
+//  - "C:\path\to\program.exe"
+//  Returned commandline: "C:\path\to\program"
+//
+// CreateProcess documentation: https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-createprocessa
+func getApplicationName(commandLine, workingDirectory, pathEnv string) (string, string, error) {
+	var (
+		searchPath string
+		result     string
+	)
+	// First we get the system paths concatenated with semicolons (C:\windows;C:\windows\system32;C:\windows\system;)
+	// and use this as the basis for the directories to search for the application.
+	systemPaths, err := getSystemPaths()
+	if err != nil {
+		return "", "", err
+	}
+
+	// If there's a working directory we should also add this to the list of directories to search.
+	if workingDirectory != "" {
+		searchPath += workingDirectory + ";"
+	}
+
+	// Append the path environment to the list of directories to search.
+	if pathEnv != "" {
+		searchPath += pathEnv + ";"
+	}
+	searchPath += systemPaths
+
+	if searchPath[len(searchPath)-1] == ';' {
+		searchPath = searchPath[:len(searchPath)-1]
+	}
+
+	// Application name was quoted, just search directly.
+	//
+	// For example given the commandline: "hello goodbye" -foo -bar -baz
+	// we would search for the executable 'hello goodbye.exe'
+	if commandLine != "" && commandLine[0] == '"' {
+		index := strings.Index(commandLine[1:], "\"")
+		if index == -1 {
+			return "", "", errors.New("no ending quotation mark found in command")
+		}
+		path, err := searchPathForExe(commandLine[1:index+1], searchPath)
+		if err != nil {
+			return "", "", err
+		}
+		return path, commandLine, nil
+	}
+
+	// Application name wasn't quoted, try each possible application name.
+	// For example given the commandline: hello goodbye, we would first try
+	// to find 'hello.exe' and then 'hello goodbye.exe'
+	var (
+		trialName    string
+		quoteCmdLine bool
+		argsIndex    int
+	)
+	args := splitArgs(commandLine)
+
+	// Loop through each element of the commandline and try and determine if any of them are executables.
+	//
+	// For example given the commandline: foo bar baz
+	// if foo.exe is successfully found we will stop and return with the full path to 'foo.exe'. If foo doesn't succeed we
+	// then try 'foo bar.exe' and 'foo bar baz.exe'.
+	for argsIndex < len(args) {
+		trialName += args[argsIndex]
+		fullPath, err := searchPathForExe(trialName, searchPath)
+		if err == nil {
+			result = fullPath
+			break
+		}
+		trialName += " "
+		quoteCmdLine = true
+		argsIndex++
+	}
+
+	// If we searched through every argument and didn't find an executable, we need to error out.
+	if argsIndex == len(args) {
+		return "", "", fmt.Errorf("failed to find executable %q", commandLine)
+	}
+
+	// If we found an executable but after we concatenated two arguments together,
+	// we need to adjust the commandline to be quoted.
+	//
+	// For example given the commandline: foo bar
+	// if 'foo bar.exe' is found, we need to adjust the commandline to
+	// be quoted as this is what the platform expects (CreateProcess call).
+	adjustedCommandLine := commandLine
+	if quoteCmdLine {
+		trialName = "\"" + trialName + "\""
+		trialName += " " + strings.Join(args[argsIndex+1:], " ")
+		adjustedCommandLine = trialName
+	}
+
+	return result, adjustedCommandLine, nil
+}
+
+// searchPathForExe calls the Windows API function `SearchPathW` to try and locate
+// `fileName` by searching in `pathsToSearch`. `pathsToSearch` is generally a semicolon
+// seperated string of paths to search that `SearchPathW` will iterate through one by one.
+// If the path resolved for `fileName` ends up being a directory, this function will return an
+// error.
+func searchPathForExe(fileName, pathsToSearch string) (string, error) {
+	fileNamePtr, err := windows.UTF16PtrFromString(fileName)
+	if err != nil {
+		return "", err
+	}
+
+	pathsToSearchPtr, err := windows.UTF16PtrFromString(pathsToSearch)
+	if err != nil {
+		return "", err
+	}
+
+	extension, err := windows.UTF16PtrFromString(".exe")
+	if err != nil {
+		return "", err
+	}
+
+	path := make([]uint16, windows.MAX_PATH)
+	_, err = winapi.SearchPath(
+		pathsToSearchPtr,
+		fileNamePtr,
+		extension,
+		windows.MAX_PATH,
+		&path[0],
+		nil,
+	)
+	if err != nil {
+		return "", err
+	}
+
+	exePath := windows.UTF16PtrToString(&path[0])
+	// Need to check if we just found a directory with the name of the executable and
+	// .exe at the end. ping.exe is a perfectly valid directory name for example.
+	attrs, err := os.Stat(exePath)
+	if err != nil {
+		return "", err
+	}
+
+	if attrs.IsDir() {
+		return "", fmt.Errorf("found directory instead of executable %q", exePath)
+	}
+
+	return exePath, nil
+}
+
+// Returns the system paths (system32, system, and windows) as a search path,
+// including a terminating ;.
+//
+// Typical output would be `C:\WINDOWS\system32;C:\WINDOWS\System;C:\WINDOWS;`
+func getSystemPaths() (string, error) {
+	var searchPath string
+	systemDir, err := windows.GetSystemDirectory()
+	if err != nil {
+		return "", errors.Wrap(err, "failed to get system directory")
+	}
+	searchPath += systemDir + ";"
+
+	windowsDir, err := windows.GetWindowsDirectory()
+	if err != nil {
+		return "", errors.Wrap(err, "failed to get Windows directory")
+	}
+
+	searchPath += windowsDir + "\\System;" + windowsDir + ";"
+	return searchPath, nil
+}
diff --git a/internal/jobcontainers/path_test.go b/internal/jobcontainers/path_test.go
new file mode 100644
index 0000000000..9f631cff31
--- /dev/null
+++ b/internal/jobcontainers/path_test.go
@@ -0,0 +1,53 @@
+package jobcontainers
+
+import (
+	"os"
+	"os/exec"
+	"testing"
+)
+
+func TestSearchPath(t *testing.T) {
+	// Testing that relative paths work.
+	_, err := searchPathForExe("windows\\system32\\ping", "C:\\")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	_, err = searchPathForExe("system32\\ping", "C:\\windows")
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestGetApplicationName(t *testing.T) {
+	cwd, err := os.Getwd()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	_, _, err = getApplicationName("ping", cwd, os.Getenv("PATH"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Test that we only find the first element of the commandline if the binary exists.
+	_, _, err = getApplicationName("ping test", cwd, os.Getenv("PATH"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Test quoted application name with an argument afterwards.
+	path, cmdLine, err := getApplicationName("\"ping\" 127.0.0.1", cwd, os.Getenv("PATH"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	args := splitArgs(cmdLine)
+	cmd := &exec.Cmd{
+		Path: path,
+		Args: args,
+	}
+	if err := cmd.Run(); err != nil {
+		t.Fatal(err)
+	}
+}
diff --git a/internal/jobcontainers/process.go b/internal/jobcontainers/process.go
new file mode 100644
index 0000000000..8d8e9e0f3b
--- /dev/null
+++ b/internal/jobcontainers/process.go
@@ -0,0 +1,233 @@
+package jobcontainers
+
+import (
+	"context"
+	"fmt"
+	"io"
+	"os/exec"
+	"sync"
+
+	"github.com/Microsoft/hcsshim/internal/cow"
+	"github.com/Microsoft/hcsshim/internal/guestrequest"
+	"github.com/Microsoft/hcsshim/internal/hcs"
+	"github.com/Microsoft/hcsshim/internal/log"
+	"github.com/Microsoft/hcsshim/internal/winapi"
+	"github.com/pkg/errors"
+	"golang.org/x/sys/windows"
+)
+
+// JobProcess represents a process run in a job object.
+type JobProcess struct {
+	cmd            *exec.Cmd
+	procLock       sync.Mutex
+	stdioLock      sync.Mutex
+	stdin          io.WriteCloser
+	stdout         io.ReadCloser
+	stderr         io.ReadCloser
+	waitBlock      chan struct{}
+	closedWaitOnce sync.Once
+	waitError      error
+}
+
+var sigMap = map[string]int{
+	"CtrlC":        windows.CTRL_BREAK_EVENT,
+	"CtrlBreak":    windows.CTRL_BREAK_EVENT,
+	"CtrlClose":    windows.CTRL_CLOSE_EVENT,
+	"CtrlLogOff":   windows.CTRL_LOGOFF_EVENT,
+	"CtrlShutdown": windows.CTRL_SHUTDOWN_EVENT,
+}
+
+var _ cow.Process = &JobProcess{}
+
+func newProcess(cmd *exec.Cmd) *JobProcess {
+	return &JobProcess{
+		cmd:       cmd,
+		waitBlock: make(chan struct{}),
+	}
+}
+
+func (p *JobProcess) ResizeConsole(ctx context.Context, width, height uint16) error {
+	return nil
+}
+
+// Stdio returns the stdio pipes of the process
+func (p *JobProcess) Stdio() (io.Writer, io.Reader, io.Reader) {
+	return p.stdin, p.stdout, p.stderr
+}
+
+// Signal sends a signal to the process and returns whether the signal was delivered.
+func (p *JobProcess) Signal(ctx context.Context, options interface{}) (bool, error) {
+	p.procLock.Lock()
+	defer p.procLock.Unlock()
+
+	if p.exited() {
+		return false, errors.New("signal not sent. process has already exited")
+	}
+
+	// If options is nil it's assumed we got a sigterm
+	if options == nil {
+		if err := p.cmd.Process.Kill(); err != nil {
+			return false, err
+		}
+		return true, nil
+	}
+
+	signalOptions, ok := options.(*guestrequest.SignalProcessOptionsWCOW)
+	if !ok {
+		return false, errors.New("unknown signal options")
+	}
+
+	signal, ok := sigMap[string(signalOptions.Signal)]
+	if !ok {
+		return false, fmt.Errorf("unknown signal %s encountered", signalOptions.Signal)
+	}
+
+	if err := signalProcess(uint32(p.cmd.Process.Pid), signal); err != nil {
+		return false, errors.Wrap(err, "failed to send signal")
+	}
+	return true, nil
+}
+
+// CloseStdin closes the stdin pipe of the process.
+func (p *JobProcess) CloseStdin(ctx context.Context) error {
+	p.stdioLock.Lock()
+	defer p.stdioLock.Unlock()
+	return p.stdin.Close()
+}
+
+// Wait waits for the process to exit. If the process has already exited returns
+// the previous error (if any).
+func (p *JobProcess) Wait() error {
+	<-p.waitBlock
+	return p.waitError
+}
+
+// Start starts the job object process
+func (p *JobProcess) Start() error {
+	return p.cmd.Start()
+}
+
+// This should only be called once.
+func (p *JobProcess) waitBackground(ctx context.Context) {
+	log.G(ctx).WithField("pid", p.Pid()).Debug("waitBackground for JobProcess")
+
+	// Wait for process to get signaled/exit/terminate/.
+	err := p.cmd.Wait()
+
+	// Wait closes the stdio pipes so theres no need to later on.
+	p.stdioLock.Lock()
+	p.stdin = nil
+	p.stdout = nil
+	p.stderr = nil
+	p.stdioLock.Unlock()
+
+	p.closedWaitOnce.Do(func() {
+		p.waitError = err
+		close(p.waitBlock)
+	})
+}
+
+// ExitCode returns the exit code of the process.
+func (p *JobProcess) ExitCode() (int, error) {
+	p.procLock.Lock()
+	defer p.procLock.Unlock()
+
+	if !p.exited() {
+		return -1, errors.New("process has not exited")
+	}
+	return p.cmd.ProcessState.ExitCode(), nil
+}
+
+// Pid returns the processes PID
+func (p *JobProcess) Pid() int {
+	if process := p.cmd.Process; process != nil {
+		return process.Pid
+	}
+	return 0
+}
+
+// Close cleans up any state associated with the process but does not kill it.
+func (p *JobProcess) Close() error {
+	p.stdioLock.Lock()
+	if p.stdin != nil {
+		p.stdin.Close()
+		p.stdin = nil
+	}
+	if p.stdout != nil {
+		p.stdout.Close()
+		p.stdout = nil
+	}
+	if p.stderr != nil {
+		p.stderr.Close()
+		p.stderr = nil
+	}
+	p.stdioLock.Unlock()
+
+	p.closedWaitOnce.Do(func() {
+		p.waitError = hcs.ErrAlreadyClosed
+		close(p.waitBlock)
+	})
+	return nil
+}
+
+// Kill signals the process to terminate.
+// Returns a bool signifying whether the signal was successfully delivered.
+func (p *JobProcess) Kill(ctx context.Context) (bool, error) {
+	log.G(ctx).WithField("pid", p.Pid()).Debug("killing job process")
+
+	p.procLock.Lock()
+	defer p.procLock.Unlock()
+
+	if p.exited() {
+		return false, errors.New("kill not sent. process already exited")
+	}
+
+	if p.cmd.Process != nil {
+		if err := p.cmd.Process.Kill(); err != nil {
+			return false, err
+		}
+	}
+	return true, nil
+}
+
+func (p *JobProcess) exited() bool {
+	if p.cmd.ProcessState == nil {
+		return false
+	}
+	return p.cmd.ProcessState.Exited()
+}
+
+// signalProcess sends the specified signal to a process.
+func signalProcess(pid uint32, signal int) error {
+	hProc, err := windows.OpenProcess(winapi.PROCESS_ALL_ACCESS, true, pid)
+	if err != nil {
+		return errors.Wrap(err, "failed to open process")
+	}
+	defer windows.Close(hProc)
+
+	// We can't use GenerateConsoleCtrlEvent since that only supports CTRL_C_EVENT and CTRL_BREAK_EVENT.
+	// Instead, to handle an arbitrary signal we open a CtrlRoutine thread inside the target process and
+	// give it the specified signal to handle. This is safe even with ASLR as even though kernel32.dll's
+	// location will be randomized each boot, it will be in the same address for every process. This is why
+	// we're able to get the address from a different process and use this as the start address for the routine
+	// that the thread will run.
+	//
+	// Note: This is a hack which is not officially supported.
+	k32, err := windows.LoadLibrary("kernel32.dll")
+	if err != nil {
+		return errors.Wrap(err, "failed to load kernel32 library")
+	}
+	defer windows.Close(k32)
+
+	proc, err := windows.GetProcAddress(k32, "CtrlRoutine")
+	if err != nil {
+		return errors.Wrap(err, "failed to load CtrlRoutine")
+	}
+
+	threadHandle, err := winapi.CreateRemoteThread(hProc, nil, 0, proc, uintptr(signal), 0, nil)
+	if err != nil {
+		return errors.Wrapf(err, "failed to open remote thread in target process %d", pid)
+	}
+	defer windows.Close(threadHandle)
+	return nil
+}
diff --git a/internal/jobcontainers/storage.go b/internal/jobcontainers/storage.go
new file mode 100644
index 0000000000..270d44def2
--- /dev/null
+++ b/internal/jobcontainers/storage.go
@@ -0,0 +1,93 @@
+package jobcontainers
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+
+	"github.com/Microsoft/hcsshim/internal/layers"
+	"github.com/Microsoft/hcsshim/internal/log"
+	"github.com/Microsoft/hcsshim/internal/wclayer"
+	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
+	"golang.org/x/sys/windows"
+)
+
+// Trailing backslash required for SetVolumeMountPoint and DeleteVolumeMountPoint
+const sandboxMountFormat = `C:\C\%s\`
+
+func mountLayers(ctx context.Context, s *specs.Spec) error {
+	if s == nil || s.Windows == nil || s.Windows.LayerFolders == nil {
+		return errors.New("field 'Spec.Windows.Layerfolders' is not populated")
+	}
+
+	// Last layer always contains the sandbox.vhdx, or 'scratch' space for the container.
+	scratchFolder := s.Windows.LayerFolders[len(s.Windows.LayerFolders)-1]
+	if _, err := os.Stat(scratchFolder); os.IsNotExist(err) {
+		if err := os.MkdirAll(scratchFolder, 0777); err != nil {
+			return errors.Wrapf(err, "failed to auto-create container scratch folder %s", scratchFolder)
+		}
+	}
+
+	// Create sandbox.vhdx if it doesn't exist in the scratch folder.
+	if _, err := os.Stat(filepath.Join(scratchFolder, "sandbox.vhdx")); os.IsNotExist(err) {
+		if err := wclayer.CreateScratchLayer(ctx, scratchFolder, s.Windows.LayerFolders[:len(s.Windows.LayerFolders)-1]); err != nil {
+			return errors.Wrap(err, "failed to CreateSandboxLayer")
+		}
+	}
+
+	if s.Root == nil {
+		s.Root = &specs.Root{}
+	}
+
+	if s.Root.Path == "" {
+		log.G(ctx).Debug("mounting job container storage")
+		containerRootPath, err := layers.MountContainerLayers(ctx, s.Windows.LayerFolders, "", nil)
+		if err != nil {
+			return errors.Wrap(err, "failed to mount container storage")
+		}
+		s.Root.Path = containerRootPath
+	}
+	return nil
+}
+
+// Mount the sandbox vhd to a user friendly path.
+func mountSandboxVolume(ctx context.Context, hostPath, volumeName string) (err error) {
+	log.G(ctx).WithFields(logrus.Fields{
+		"hostpath":   hostPath,
+		"volumeName": volumeName,
+	}).Debug("mounting sandbox volume for job container")
+
+	if _, err := os.Stat(hostPath); os.IsNotExist(err) {
+		if err := os.MkdirAll(hostPath, 0777); err != nil {
+			return err
+		}
+	}
+
+	defer func() {
+		if err != nil {
+			os.RemoveAll(hostPath)
+		}
+	}()
+
+	if err = windows.SetVolumeMountPoint(windows.StringToUTF16Ptr(hostPath), windows.StringToUTF16Ptr(volumeName)); err != nil {
+		return errors.Wrapf(err, "failed to mount sandbox volume to %s on host", hostPath)
+	}
+	return nil
+}
+
+// Remove volume mount point. And remove folder afterwards.
+func removeSandboxMountPoint(ctx context.Context, hostPath string) error {
+	log.G(ctx).WithFields(logrus.Fields{
+		"hostpath": hostPath,
+	}).Debug("mounting sandbox volume for job container")
+
+	if err := windows.DeleteVolumeMountPoint(windows.StringToUTF16Ptr(hostPath)); err != nil {
+		return errors.Wrap(err, "failed to delete sandbox volume mount point")
+	}
+	if err := os.Remove(hostPath); err != nil {
+		return errors.Wrapf(err, "failed to remove sandbox mounted folder path %q", hostPath)
+	}
+	return nil
+}
diff --git a/internal/jobobject/jobobject.go b/internal/jobobject/jobobject.go
index 98e4c51557..fe8992bc71 100644
--- a/internal/jobobject/jobobject.go
+++ b/internal/jobobject/jobobject.go
@@ -2,7 +2,6 @@ package jobobject
 
 import (
 	"context"
-	"fmt"
 	"sync"
 	"unsafe"
 
@@ -196,6 +195,13 @@ func Open(ctx context.Context, options *Options) (_ *JobObject, err error) {
 
 // helper function to setup notifications for creating/opening a job object
 func setupNotifications(ctx context.Context, job *JobObject) (*queue.MessageQueue, error) {
+	job.handleLock.RLock()
+	defer job.handleLock.RUnlock()
+
+	if job.handle == 0 {
+		return nil, ErrAlreadyClosed
+	}
+
 	ioInitOnce.Do(func() {
 		h, err := windows.CreateIoCompletionPort(windows.InvalidHandle, 0, 0, 0xffffffff)
 		if err != nil {
@@ -210,18 +216,11 @@ func setupNotifications(ctx context.Context, job *JobObject) (*queue.MessageQueu
 		return nil, initIOErr
 	}
 
-	job.handleLock.RLock()
-	defer job.handleLock.RUnlock()
-
-	if job.handle == 0 {
-		return nil, ErrAlreadyClosed
-	}
-
 	mq := queue.NewMessageQueue()
 	jobMap.Store(uintptr(job.handle), mq)
 	if err := attachIOCP(job.handle, ioCompletionPort); err != nil {
 		jobMap.Delete(uintptr(job.handle))
-		return nil, err
+		return nil, errors.Wrap(err, "failed to attach job to IO completion port")
 	}
 	return mq, nil
 }
@@ -326,7 +325,7 @@ func (job *JobObject) Pids() ([]uint32, error) {
 	}
 
 	if err != winapi.ERROR_MORE_DATA {
-		return nil, fmt.Errorf("failed initial query for PIDs in job object: %s", err)
+		return nil, errors.Wrap(err, "failed initial query for PIDs in job object")
 	}
 
 	jobBasicProcessIDListSize := unsafe.Sizeof(info) + (unsafe.Sizeof(info.ProcessIdList[0]) * uintptr(info.NumberOfAssignedProcesses-1))
@@ -338,7 +337,7 @@ func (job *JobObject) Pids() ([]uint32, error) {
 		uint32(len(buf)),
 		nil,
 	); err != nil {
-		return nil, fmt.Errorf("failed to query for PIDs in job object: %s", err)
+		return nil, errors.Wrap(err, "failed to query for PIDs in job object")
 	}
 
 	bufInfo := (*winapi.JOBOBJECT_BASIC_PROCESS_ID_LIST)(unsafe.Pointer(&buf[0]))
@@ -367,7 +366,7 @@ func (job *JobObject) QueryMemoryStats() (*winapi.JOBOBJECT_MEMORY_USAGE_INFORMA
 		uint32(unsafe.Sizeof(info)),
 		nil,
 	); err != nil {
-		return nil, fmt.Errorf("failed to query for job object memory stats: %s", err)
+		return nil, errors.Wrap(err, "failed to query for job object memory stats")
 	}
 	return &info, nil
 }
@@ -389,7 +388,7 @@ func (job *JobObject) QueryProcessorStats() (*winapi.JOBOBJECT_BASIC_ACCOUNTING_
 		uint32(unsafe.Sizeof(info)),
 		nil,
 	); err != nil {
-		return nil, fmt.Errorf("failed to query for job object process stats: %s", err)
+		return nil, errors.Wrap(err, "failed to query for job object process stats")
 	}
 	return &info, nil
 }
@@ -411,7 +410,7 @@ func (job *JobObject) QueryStorageStats() (*winapi.JOBOBJECT_BASIC_AND_IO_ACCOUN
 		uint32(unsafe.Sizeof(info)),
 		nil,
 	); err != nil {
-		return nil, fmt.Errorf("failed to query for job object storage stats: %s", err)
+		return nil, errors.Wrap(err, "failed to query for job object storage stats")
 	}
 	return &info, nil
 }
diff --git a/internal/jobobject/jobobject_test.go b/internal/jobobject/jobobject_test.go
index 8169d3d1bd..d251dab9a8 100644
--- a/internal/jobobject/jobobject_test.go
+++ b/internal/jobobject/jobobject_test.go
@@ -2,7 +2,13 @@ package jobobject
 
 import (
 	"context"
+	"os/exec"
+	"syscall"
 	"testing"
+	"time"
+
+	"github.com/pkg/errors"
+	"golang.org/x/sys/windows"
 )
 
 func TestJobNilOptions(t *testing.T) {
@@ -18,13 +24,240 @@ func TestJobCreateAndOpen(t *testing.T) {
 		options = &Options{Name: "test"}
 	)
 
-	_, err := Create(ctx, options)
+	jobCreate, err := Create(ctx, options)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer jobCreate.Close()
+
+	jobOpen, err := Open(ctx, options)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer jobOpen.Close()
+}
+
+func createProcsAndAssign(num int, job *JobObject) (_ []*exec.Cmd, err error) {
+	var procs []*exec.Cmd
+
+	defer func() {
+		if err != nil {
+			for _, proc := range procs {
+				proc.Process.Kill()
+			}
+		}
+	}()
+
+	for i := 0; i < num; i++ {
+		cmd := exec.Command("ping", "-t", "127.0.0.1")
+		cmd.SysProcAttr = &syscall.SysProcAttr{
+			CreationFlags: windows.CREATE_NEW_PROCESS_GROUP,
+		}
+
+		if err := cmd.Start(); err != nil {
+			return nil, err
+		}
+
+		if err := job.Assign(uint32(cmd.Process.Pid)); err != nil {
+			return nil, err
+		}
+		procs = append(procs, cmd)
+	}
+	return procs, nil
+}
+
+func TestSetTerminateOnLastHandleClose(t *testing.T) {
+	options := &Options{
+		Name:          "test",
+		Notifications: true,
+	}
+	job, err := Create(context.Background(), options)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer job.Close()
+
+	if err := job.SetTerminateOnLastHandleClose(); err != nil {
+		t.Fatal(err)
+	}
+
+	procs, err := createProcsAndAssign(1, job)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	errCh := make(chan error)
+	go func() {
+		if err := job.Close(); err != nil {
+			errCh <- err
+		}
+		if err := procs[0].Wait(); err != nil {
+			errCh <- err
+		}
+		// Check if process is still alive after job handle close (it should not be).
+		// If wait returned it should be gone but just to be explicit check anyways.
+		if !procs[0].ProcessState.Exited() {
+			errCh <- errors.New("process should have exited after closing job handle")
+		}
+		errCh <- nil
+	}()
+
+	select {
+	case err := <-errCh:
+		if err != nil {
+			t.Fatal(err)
+		}
+	case <-time.After(time.Second * 10):
+		procs[0].Process.Kill()
+		t.Fatal("process didn't complete wait within timeout")
+	}
+}
+
+func TestSetMultipleExtendedLimits(t *testing.T) {
+	// Tests setting two different properties on the job that modify
+	// JOBOBJECT_EXTENDED_LIMIT_INFORMATION
+	options := &Options{
+		Name:          "test",
+		Notifications: true,
+	}
+	job, err := Create(context.Background(), options)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer job.Close()
+
+	// No reason for this limit in particular. Could be any value.
+	memLimitInMB := uint64(10 * 1024 * 1204)
+	if err := job.SetMemoryLimit(memLimitInMB); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := job.SetTerminateOnLastHandleClose(); err != nil {
+		t.Fatal(err)
+	}
+
+	eli, err := job.getExtendedInformation()
 	if err != nil {
 		t.Fatal(err)
 	}
 
-	_, err = Open(ctx, options)
+	if !isFlagSet(windows.JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE, eli.BasicLimitInformation.LimitFlags) {
+		t.Fatal("the job does not have cpu rate control enabled")
+	}
+
+	if !isFlagSet(windows.JOB_OBJECT_LIMIT_JOB_MEMORY, eli.BasicLimitInformation.LimitFlags) {
+		t.Fatal("the job does not have cpu rate control enabled")
+	}
+
+	if eli.JobMemoryLimit != uintptr(memLimitInMB) {
+		t.Fatal("job memory limit not persisted")
+	}
+}
+
+func TestNoMoreProcessesMessageKill(t *testing.T) {
+	// Test that we receive the no more processes in job message after killing all of
+	// the processes in the job.
+	options := &Options{
+		Name:          "test",
+		Notifications: true,
+	}
+	job, err := Create(context.Background(), options)
 	if err != nil {
 		t.Fatal(err)
 	}
+	defer job.Close()
+
+	if err := job.SetTerminateOnLastHandleClose(); err != nil {
+		t.Fatal(err)
+	}
+
+	procs, err := createProcsAndAssign(2, job)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	errCh := make(chan error)
+	go func() {
+		for _, proc := range procs {
+			if err := proc.Process.Kill(); err != nil {
+				errCh <- err
+			}
+		}
+
+		for {
+			notif, err := job.PollNotification()
+			if err != nil {
+				errCh <- err
+			}
+
+			switch notif.(type) {
+			case MsgAllProcessesExited:
+				errCh <- nil
+			case MsgUnimplemented:
+			default:
+			}
+		}
+	}()
+
+	select {
+	case err := <-errCh:
+		if err != nil {
+			t.Fatal(err)
+		}
+	case <-time.After(time.Second * 10):
+		t.Fatal("didn't receive no more processes message within timeout")
+	}
+}
+
+func TestNoMoreProcessesMessageTerminate(t *testing.T) {
+	// Test that we receive the no more processes in job message after terminating the
+	// job (terminates every process in the job).
+	options := &Options{
+		Name:          "test",
+		Notifications: true,
+	}
+	job, err := Create(context.Background(), options)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer job.Close()
+
+	if err := job.SetTerminateOnLastHandleClose(); err != nil {
+		t.Fatal(err)
+	}
+
+	_, err = createProcsAndAssign(2, job)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	errCh := make(chan error)
+	go func() {
+		if err := job.Terminate(1); err != nil {
+			errCh <- err
+		}
+
+		for {
+			notif, err := job.PollNotification()
+			if err != nil {
+				errCh <- err
+			}
+
+			switch notif.(type) {
+			case MsgAllProcessesExited:
+				errCh <- nil
+			case MsgUnimplemented:
+			default:
+			}
+		}
+	}()
+
+	select {
+	case err := <-errCh:
+		if err != nil {
+			t.Fatal(err)
+		}
+	case <-time.After(time.Second * 10):
+		t.Fatal("didn't receive no more processes message within timeout")
+	}
 }
diff --git a/internal/jobobject/limits.go b/internal/jobobject/limits.go
index 990af100cf..4657f9322d 100644
--- a/internal/jobobject/limits.go
+++ b/internal/jobobject/limits.go
@@ -14,7 +14,7 @@ const (
 	memoryLimitMax     uint64  = 0xffffffffffffffff
 )
 
-func isFlagSet(flag uint32, controlFlags uint32) bool {
+func isFlagSet(flag, controlFlags uint32) bool {
 	return (flag & controlFlags) == flag
 }
 
@@ -45,6 +45,17 @@ func (job *JobObject) SetResourceLimits(limits *JobLimits) error {
 	return nil
 }
 
+// SetTerminateOnLastHandleClose sets the job object flag that specifies that the job should terminate
+// all processes in the job on the last open handle being closed.
+func (job *JobObject) SetTerminateOnLastHandleClose() error {
+	info, err := job.getExtendedInformation()
+	if err != nil {
+		return err
+	}
+	info.BasicLimitInformation.LimitFlags |= windows.JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE
+	return job.setExtendedInformation(info)
+}
+
 // SetMemoryLimit sets the memory limit of the job object based on the given `memoryLimitInBytes`.
 func (job *JobObject) SetMemoryLimit(memoryLimitInBytes uint64) error {
 	if memoryLimitInBytes >= memoryLimitMax {
diff --git a/internal/layers/layers.go b/internal/layers/layers.go
index 5f815b783a..3fffd827be 100644
--- a/internal/layers/layers.go
+++ b/internal/layers/layers.go
@@ -74,7 +74,7 @@ func MountContainerLayers(ctx context.Context, layerFolders []string, guestRoot
 
 	if uvm == nil {
 		if len(layerFolders) < 2 {
-			return "", fmt.Errorf("need at least two layers - base and scratch")
+			return "", errors.New("need at least two layers - base and scratch")
 		}
 		path := layerFolders[len(layerFolders)-1]
 		rest := layerFolders[:len(layerFolders)-1]
diff --git a/internal/oci/uvm.go b/internal/oci/uvm.go
index 5c2cb3c3b5..9b8c6933aa 100644
--- a/internal/oci/uvm.go
+++ b/internal/oci/uvm.go
@@ -87,6 +87,11 @@ const (
 	// files and information needed to install given driver(s). This may include .sys,
 	// .inf, .cer, and/or other files used during standard installation with pnputil.
 	AnnotationAssignedDeviceKernelDrivers = "io.microsoft.assigneddevice.kerneldrivers"
+	// AnnotationHostProcessInheritUser indicates whether to ignore the username passed in to run a host process
+	// container as and instead inherit the user token from the executable that is launching the container process.
+	AnnotationHostProcessInheritUser = "microsoft.com/hostprocess-inherit-user"
+	// AnnotationHostProcessContainer indicates to launch a host process container (job container in this repository).
+	AnnotationHostProcessContainer = "microsoft.com/hostprocess-container"
 
 	annotationAllowOvercommit       = "io.microsoft.virtualmachine.computetopology.memory.allowovercommit"
 	annotationEnableDeferredCommit  = "io.microsoft.virtualmachine.computetopology.memory.enabledeferredcommit"
diff --git a/internal/winapi/memory.go b/internal/winapi/memory.go
index ccaf5a624f..83f7040644 100644
--- a/internal/winapi/memory.go
+++ b/internal/winapi/memory.go
@@ -9,3 +9,19 @@ package winapi
 
 //sys LocalAlloc(flags uint32, size int) (ptr uintptr) = kernel32.LocalAlloc
 //sys LocalFree(ptr uintptr) = kernel32.LocalFree
+
+// BOOL QueryWorkingSet(
+//	HANDLE hProcess,
+//	PVOID  pv,
+//	DWORD  cb
+// );
+//sys QueryWorkingSet(handle windows.Handle, pv uintptr, cb uint32) (err error) = psapi.QueryWorkingSet
+
+type PSAPI_WORKING_SET_INFORMATION struct {
+	NumberOfEntries uintptr
+	WorkingSetInfo  [1]PSAPI_WORKING_SET_BLOCK
+}
+
+type PSAPI_WORKING_SET_BLOCK struct {
+	Flags uintptr
+}
diff --git a/internal/winapi/path.go b/internal/winapi/path.go
index 0ae8f33ea6..908920e872 100644
--- a/internal/winapi/path.go
+++ b/internal/winapi/path.go
@@ -8,4 +8,4 @@ package winapi
 // 	LPWSTR  lpBuffer,
 // 	LPWSTR  *lpFilePart
 // );
-//sys SearchPath(lpPath *uint16, lpFileName *uint16, lpExtension *uint16, nBufferLength uint32, lpBuffer *uint16, lpFilePath **uint16) (size uint32, err error) = kernel32.SearchPathW
+//sys SearchPath(lpPath *uint16, lpFileName *uint16, lpExtension *uint16, nBufferLength uint32, lpBuffer *uint16, lpFilePath *uint16) (size uint32, err error) = kernel32.SearchPathW
diff --git a/internal/winapi/process.go b/internal/winapi/process.go
index adf0168eae..b87068327c 100644
--- a/internal/winapi/process.go
+++ b/internal/winapi/process.go
@@ -1,3 +1,10 @@
 package winapi
 
 const PROCESS_ALL_ACCESS uint32 = 2097151
+
+// DWORD GetProcessImageFileNameW(
+//	HANDLE hProcess,
+//	LPWSTR lpImageFileName,
+//	DWORD  nSize
+// );
+//sys GetProcessImageFileName(hProcess windows.Handle, imageFileName *uint16, nSize uint32) (size uint32, err error) = kernel32.GetProcessImageFileNameW
diff --git a/internal/winapi/thread.go b/internal/winapi/thread.go
new file mode 100644
index 0000000000..2a1dac26ac
--- /dev/null
+++ b/internal/winapi/thread.go
@@ -0,0 +1,3 @@
+package winapi
+
+//sys CreateRemoteThread(process windows.Handle, sa *windows.SecurityAttributes, stackSize uint32, startAddr uintptr, parameter uintptr, creationFlags uint32, threadID *uint32) (handle windows.Handle, err error) = kernel32.CreateRemoteThread
diff --git a/internal/winapi/winapi.go b/internal/winapi/winapi.go
index 77ea13e3e3..e783d61f66 100644
--- a/internal/winapi/winapi.go
+++ b/internal/winapi/winapi.go
@@ -2,4 +2,4 @@
 // be thought of as an extension to golang.org/x/sys/windows.
 package winapi
 
-//go:generate go run ..\..\mksyscall_windows.go -output zsyscall_windows.go net.go iocp.go jobobject.go path.go logon.go memory.go processor.go devices.go filesystem.go errors.go
+//go:generate go run ..\..\mksyscall_windows.go -output zsyscall_windows.go net.go path.go thread.go iocp.go jobobject.go logon.go memory.go process.go processor.go devices.go filesystem.go errors.go
diff --git a/internal/winapi/zsyscall_windows.go b/internal/winapi/zsyscall_windows.go
index 3a54c1fa1b..8dd3d3e9bd 100644
--- a/internal/winapi/zsyscall_windows.go
+++ b/internal/winapi/zsyscall_windows.go
@@ -41,9 +41,12 @@ var (
 	modkernel32 = windows.NewLazySystemDLL("kernel32.dll")
 	modntdll    = windows.NewLazySystemDLL("ntdll.dll")
 	modadvapi32 = windows.NewLazySystemDLL("advapi32.dll")
+	modpsapi    = windows.NewLazySystemDLL("psapi.dll")
 	modcfgmgr32 = windows.NewLazySystemDLL("cfgmgr32.dll")
 
 	procSetJobCompartmentId                    = modiphlpapi.NewProc("SetJobCompartmentId")
+	procSearchPathW                            = modkernel32.NewProc("SearchPathW")
+	procCreateRemoteThread                     = modkernel32.NewProc("CreateRemoteThread")
 	procGetQueuedCompletionStatus              = modkernel32.NewProc("GetQueuedCompletionStatus")
 	procIsProcessInJob                         = modkernel32.NewProc("IsProcessInJob")
 	procQueryInformationJobObject              = modkernel32.NewProc("QueryInformationJobObject")
@@ -52,11 +55,12 @@ var (
 	procQueryIoRateControlInformationJobObject = modkernel32.NewProc("QueryIoRateControlInformationJobObject")
 	procNtOpenJobObject                        = modntdll.NewProc("NtOpenJobObject")
 	procNtCreateJobObject                      = modntdll.NewProc("NtCreateJobObject")
-	procSearchPathW                            = modkernel32.NewProc("SearchPathW")
 	procLogonUserW                             = modadvapi32.NewProc("LogonUserW")
 	procRtlMoveMemory                          = modkernel32.NewProc("RtlMoveMemory")
 	procLocalAlloc                             = modkernel32.NewProc("LocalAlloc")
 	procLocalFree                              = modkernel32.NewProc("LocalFree")
+	procQueryWorkingSet                        = modpsapi.NewProc("QueryWorkingSet")
+	procGetProcessImageFileNameW               = modkernel32.NewProc("GetProcessImageFileNameW")
 	procGetActiveProcessorCount                = modkernel32.NewProc("GetActiveProcessorCount")
 	procCM_Get_Device_ID_List_SizeA            = modcfgmgr32.NewProc("CM_Get_Device_ID_List_SizeA")
 	procCM_Get_Device_ID_ListA                 = modcfgmgr32.NewProc("CM_Get_Device_ID_ListA")
@@ -77,6 +81,32 @@ func SetJobCompartmentId(handle windows.Handle, compartmentId uint32) (win32Err
 	return
 }
 
+func SearchPath(lpPath *uint16, lpFileName *uint16, lpExtension *uint16, nBufferLength uint32, lpBuffer *uint16, lpFilePath *uint16) (size uint32, err error) {
+	r0, _, e1 := syscall.Syscall6(procSearchPathW.Addr(), 6, uintptr(unsafe.Pointer(lpPath)), uintptr(unsafe.Pointer(lpFileName)), uintptr(unsafe.Pointer(lpExtension)), uintptr(nBufferLength), uintptr(unsafe.Pointer(lpBuffer)), uintptr(unsafe.Pointer(lpFilePath)))
+	size = uint32(r0)
+	if size == 0 {
+		if e1 != 0 {
+			err = errnoErr(e1)
+		} else {
+			err = syscall.EINVAL
+		}
+	}
+	return
+}
+
+func CreateRemoteThread(process windows.Handle, sa *windows.SecurityAttributes, stackSize uint32, startAddr uintptr, parameter uintptr, creationFlags uint32, threadID *uint32) (handle windows.Handle, err error) {
+	r0, _, e1 := syscall.Syscall9(procCreateRemoteThread.Addr(), 7, uintptr(process), uintptr(unsafe.Pointer(sa)), uintptr(stackSize), uintptr(startAddr), uintptr(parameter), uintptr(creationFlags), uintptr(unsafe.Pointer(threadID)), 0, 0)
+	handle = windows.Handle(r0)
+	if handle == 0 {
+		if e1 != 0 {
+			err = errnoErr(e1)
+		} else {
+			err = syscall.EINVAL
+		}
+	}
+	return
+}
+
 func GetQueuedCompletionStatus(cphandle windows.Handle, qty *uint32, key *uintptr, overlapped **windows.Overlapped, timeout uint32) (err error) {
 	r1, _, e1 := syscall.Syscall6(procGetQueuedCompletionStatus.Addr(), 5, uintptr(cphandle), uintptr(unsafe.Pointer(qty)), uintptr(unsafe.Pointer(key)), uintptr(unsafe.Pointer(overlapped)), uintptr(timeout), 0)
 	if r1 == 0 {
@@ -170,19 +200,6 @@ func NtCreateJobObject(jobHandle *windows.Handle, desiredAccess uint32, objAttri
 	return
 }
 
-func SearchPath(lpPath *uint16, lpFileName *uint16, lpExtension *uint16, nBufferLength uint32, lpBuffer *uint16, lpFilePath **uint16) (size uint32, err error) {
-	r0, _, e1 := syscall.Syscall6(procSearchPathW.Addr(), 6, uintptr(unsafe.Pointer(lpPath)), uintptr(unsafe.Pointer(lpFileName)), uintptr(unsafe.Pointer(lpExtension)), uintptr(nBufferLength), uintptr(unsafe.Pointer(lpBuffer)), uintptr(unsafe.Pointer(lpFilePath)))
-	size = uint32(r0)
-	if size == 0 {
-		if e1 != 0 {
-			err = errnoErr(e1)
-		} else {
-			err = syscall.EINVAL
-		}
-	}
-	return
-}
-
 func LogonUser(username *uint16, domain *uint16, password *uint16, logonType uint32, logonProvider uint32, token *windows.Token) (err error) {
 	r1, _, e1 := syscall.Syscall6(procLogonUserW.Addr(), 6, uintptr(unsafe.Pointer(username)), uintptr(unsafe.Pointer(domain)), uintptr(unsafe.Pointer(password)), uintptr(logonType), uintptr(logonProvider), uintptr(unsafe.Pointer(token)))
 	if r1 == 0 {
@@ -218,6 +235,31 @@ func LocalFree(ptr uintptr) {
 	return
 }
 
+func QueryWorkingSet(handle windows.Handle, pv uintptr, cb uint32) (err error) {
+	r1, _, e1 := syscall.Syscall(procQueryWorkingSet.Addr(), 3, uintptr(handle), uintptr(pv), uintptr(cb))
+	if r1 == 0 {
+		if e1 != 0 {
+			err = errnoErr(e1)
+		} else {
+			err = syscall.EINVAL
+		}
+	}
+	return
+}
+
+func GetProcessImageFileName(hProcess windows.Handle, imageFileName *uint16, nSize uint32) (size uint32, err error) {
+	r0, _, e1 := syscall.Syscall(procGetProcessImageFileNameW.Addr(), 3, uintptr(hProcess), uintptr(unsafe.Pointer(imageFileName)), uintptr(nSize))
+	size = uint32(r0)
+	if size == 0 {
+		if e1 != 0 {
+			err = errnoErr(e1)
+		} else {
+			err = syscall.EINVAL
+		}
+	}
+	return
+}
+
 func GetActiveProcessorCount(groupNumber uint16) (amount uint32) {
 	r0, _, _ := syscall.Syscall(procGetActiveProcessorCount.Addr(), 1, uintptr(groupNumber), 0, 0)
 	amount = uint32(r0)
diff --git a/test/cri-containerd/jobcontainer_test.go b/test/cri-containerd/jobcontainer_test.go
new file mode 100644
index 0000000000..59e6dcc3c0
--- /dev/null
+++ b/test/cri-containerd/jobcontainer_test.go
@@ -0,0 +1,87 @@
+// +build functional
+
+package cri_containerd
+
+import (
+	"context"
+	"strings"
+	"testing"
+	"time"
+
+	runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+func getJobContainerPodRequestWCOW(t *testing.T) *runtime.RunPodSandboxRequest {
+	return &runtime.RunPodSandboxRequest{
+		Config: &runtime.PodSandboxConfig{
+			Metadata: &runtime.PodSandboxMetadata{
+				Name:      t.Name(),
+				Namespace: testNamespace,
+			},
+			Annotations: map[string]string{
+				"microsoft.com/hostprocess-container": "true",
+			},
+		},
+		RuntimeHandler: wcowProcessRuntimeHandler,
+	}
+}
+
+func getJobContainerRequestWCOW(t *testing.T, podID string, podConfig *runtime.PodSandboxConfig) *runtime.CreateContainerRequest {
+	return &runtime.CreateContainerRequest{
+		Config: &runtime.ContainerConfig{
+			Metadata: &runtime.ContainerMetadata{
+				Name: t.Name() + "-Container",
+			},
+			Image: &runtime.ImageSpec{
+				Image: imageWindowsNanoserver,
+			},
+			Command: []string{
+				"cmd",
+				"/c",
+				"ping",
+				"-t",
+				"127.0.0.1",
+			},
+
+			Annotations: map[string]string{
+				"microsoft.com/hostprocess":              "true",
+				"microsoft.com/hostprocess-inherit-user": "true",
+			},
+		},
+		PodSandboxId:  podID,
+		SandboxConfig: podConfig,
+	}
+}
+
+func Test_RunContainer_InheritUser_JobContainer_WCOW(t *testing.T) {
+	requireFeatures(t, featureWCOWProcess, featureHostProcess)
+
+	pullRequiredImages(t, []string{imageWindowsNanoserver})
+	client := newTestRuntimeClient(t)
+
+	username := "nt authority\\system"
+	podctx := context.Background()
+	sandboxRequest := getJobContainerPodRequestWCOW(t)
+
+	podID := runPodSandbox(t, client, podctx, sandboxRequest)
+	defer removePodSandbox(t, client, podctx, podID)
+	defer stopPodSandbox(t, client, podctx, podID)
+
+	containerRequest := getJobContainerRequestWCOW(t, podID, sandboxRequest.Config)
+	ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
+	defer cancel()
+
+	containerID := createContainer(t, client, ctx, containerRequest)
+	defer removeContainer(t, client, ctx, containerID)
+	startContainer(t, client, ctx, containerID)
+	defer stopContainer(t, client, ctx, containerID)
+
+	execResponse := execSync(t, client, ctx, &runtime.ExecSyncRequest{
+		ContainerId: containerID,
+		Cmd:         []string{"whoami"},
+	})
+	stdout := strings.Trim(string(execResponse.Stdout), " \r\n")
+	if !strings.Contains(stdout, username) {
+		t.Fatalf("expected user: '%s', got '%s'", username, stdout)
+	}
+}
diff --git a/test/cri-containerd/main.go b/test/cri-containerd/main.go
index 08130aca83..6eab0e3153 100644
--- a/test/cri-containerd/main.go
+++ b/test/cri-containerd/main.go
@@ -75,6 +75,7 @@ const (
 	featureLCOW           = "LCOW"
 	featureWCOWProcess    = "WCOWProcess"
 	featureWCOWHypervisor = "WCOWHypervisor"
+	featureHostProcess    = "HostProcess"
 	featureGMSA           = "GMSA"
 	featureGPU            = "GPU"
 )
@@ -83,6 +84,7 @@ var allFeatures = []string{
 	featureLCOW,
 	featureWCOWProcess,
 	featureWCOWHypervisor,
+	featureHostProcess,
 	featureGMSA,
 	featureGPU,
 }