From bc14fe0480cbdd40fabaef1b70a80e0558886676 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Mon, 21 Nov 2016 23:21:32 +0800 Subject: [PATCH] specs-go/config: add Intel RDT/CAT Linux support Add support for Intel Resource Director Technology (RDT) / Cache Allocation Technology (CAT). Add L3 cache resource constraints in Linux-specific configuration. This is the prerequisite of this runc proposal: https://github.com/opencontainers/runc/issues/433 For more information about Intel RDT/CAT, please refer to: https://github.com/opencontainers/runc/issues/433 Signed-off-by: Xiaochen Shen --- config-linux.md | 89 ++++++++++++++++++++++++++++++++++++++++++++++ specs-go/config.go | 9 +++++ 2 files changed, 98 insertions(+) diff --git a/config-linux.md b/config-linux.md index e3f79bdb8..91ca36e62 100644 --- a/config-linux.md +++ b/config-linux.md @@ -476,6 +476,94 @@ The following parameters can be specified to setup the controller: } ``` +## Intel RDT + +Intel platforms with new Xeon CPU support Intel Resource Director Technology +(RDT). Cache Allocation Technology (CAT) is a sub-feature of RDT, which +currently supports L3 cache resource allocation. + +This feature provides a way for the software to restrict cache allocation to a +defined 'subset' of L3 cache which may be overlapping with other 'subsets'. +The different subsets are identified by class of service (CLOS) and each CLOS +has a capacity bitmask (CBM). + +In Linux kernel, it is exposed via "resource control" filesystem, which is a +"cgroup-like" interface. + +Comparing with cgroups, it has similar process management lifecycle and +interfaces in a container. But unlike cgroups' hierarchy, it has single level +filesystem layout. + +Intel RDT "resource control" filesystem hierarchy: +``` +mount -t resctrl resctrl /sys/fs/resctrl +tree /sys/fs/resctrl +/sys/fs/resctrl/ +|-- info +| |-- L3 +| |-- cbm_mask +| |-- min_cbm_bits +| |-- num_closids +|-- cpus +|-- schemata +|-- tasks +|-- + |-- cpus + |-- schemata + |-- tasks + +``` + +For containers, we can make use of `tasks` and `schemata` configuration for +L3 cache resource constraints if hardware and kernel support Intel RDT/CAT. + +The file `tasks` has a list of tasks that belongs to this group (e.g., +" group). Tasks can be added to a group by writing the task ID +to the "tasks" file (which will automatically remove them from the previous +group to which they belonged). New tasks created by fork(2) and clone(2) are +added to the same group as their parent. If a pid is not in any sub group, it +is in root group. + +The file `schemata` has allocation masks/values for L3 cache on each socket, +which contains L3 cache id and capacity bitmask (CBM). +``` + Format: "L3:=;=;..." +``` +For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0` +Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. + +The valid L3 cache CBM is a *contiguous bits set* and number of bits that can +be set is less than the max bit. The max bits in the CBM is varied among +supported Intel Xeon platforms. In Intel RDT "resource control" filesystem +layout, the CBM in a group should be a subset of the CBM in root. Kernel will +check if it is valid when writing. e.g., 0xfffff in root indicates the max bits +of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM +values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. + +**`intelRdt`** (object, OPTIONAL) represents the L3 cache resource constraints in Intel Xeon platforms. +It is part of `resources` field of the Linux configuration. + +For more information, see [Intel RDT/CAT kernel interface][intel-rdt-cat-kernel-interface]. + +The following parameters can be specified for the container: + +* **`l3CacheSchema`** *(string, OPTIONAL)* - specifies the schema for L3 cache id and capacity bitmask (CBM) + +###### Example +```json +There are two L3 caches in the two-socket machine, the default CBM is 0xfffff +and the max CBM length is 20 bits. This configuration assigns 4/5 of L3 cache +id 0 and the whole L3 cache id 1 for the container: + +"linux": { + "resources": { + "intelRdt": { + "l3CacheSchema": "L3:0=ffff0;1=fffff" + } + } +} +``` + ## Sysctl **`sysctl`** (object, OPTIONAL) allows kernel parameters to be modified at runtime for the container. @@ -621,3 +709,4 @@ The values MUST be absolute paths in the [container namespace][container-namespa [random.4]: http://man7.org/linux/man-pages/man4/random.4.html [tty.4]: http://man7.org/linux/man-pages/man4/tty.4.html [zero.4]: http://man7.org/linux/man-pages/man4/zero.4.html +[intel-rdt-cat-kernel-interface]: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/x86/intel_rdt_ui.txt diff --git a/specs-go/config.go b/specs-go/config.go index 1660b776f..0396fae12 100644 --- a/specs-go/config.go +++ b/specs-go/config.go @@ -311,6 +311,13 @@ type LinuxNetwork struct { Priorities []LinuxInterfacePriority `json:"priorities,omitempty"` } +// LinuxIntelRdt for Linux Intel RDT/CAT resource management (Linux 4.10) +type LinuxIntelRdt struct { + // The schema for L3 cache id and capacity bitmask (CBM) + // Format: "L3:=;=;..." + L3CacheSchema *string `json:"l3CacheSchema,omitempty"` +} + // LinuxResources has container runtime resource constraints type LinuxResources struct { // Devices configures the device whitelist. @@ -331,6 +338,8 @@ type LinuxResources struct { HugepageLimits []LinuxHugepageLimit `json:"hugepageLimits,omitempty"` // Network restriction configuration Network *LinuxNetwork `json:"network,omitempty"` + // IntelRdt restriction configuration + IntelRdt *LinuxIntelRdt `json:"intelRdt,omitempty"` } // LinuxDevice represents the mknod information for a Linux special device file