From 3992e9002564ef5d63ceec89c92346577bcef275 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Tue, 11 Sep 2018 18:09:42 +0800 Subject: [PATCH] libcontainer: intelrdt: add support for Intel RDT/MBA in runc Memory Bandwidth Allocation (MBA) is a resource allocation sub-feature of Intel Resource Director Technology (RDT) which is supported on some Intel Xeon platforms. Intel RDT/MBA provides indirect and approximate throttle over memory bandwidth for the software. A user controls the resource by indicating the percentage of maximum memory bandwidth. Hardware details of Intel RDT/MBA can be found in section 17.18 of Intel Software Developer Manual: https://software.intel.com/en-us/articles/intel-sdm In Linux 4.12 kernel and newer, Intel RDT/MBA is enabled by kernel config CONFIG_INTEL_RDT. If hardware support, CPU flags `rdt_a` and `mba` will be set in /proc/cpuinfo. Intel RDT "resource control" filesystem hierarchy: mount -t resctrl resctrl /sys/fs/resctrl tree /sys/fs/resctrl /sys/fs/resctrl/ |-- info | |-- L3 | | |-- cbm_mask | | |-- min_cbm_bits | | |-- num_closids | |-- MB | |-- bandwidth_gran | |-- delay_linear | |-- min_bandwidth | |-- num_closids |-- ... |-- schemata |-- tasks |-- |-- ... |-- schemata |-- tasks For MBA support for `runc`, we will reuse the infrastructure and code base of Intel RDT/CAT which implemented in #1279. We could also make use of `tasks` and `schemata` configuration for memory bandwidth resource constraints. The file `tasks` has a list of tasks that belongs to this group (e.g., " group). Tasks can be added to a group by writing the task ID to the "tasks" file (which will automatically remove them from the previous group to which they belonged). New tasks created by fork(2) and clone(2) are added to the same group as their parent. The file `schemata` has a list of all the resources available to this group. Each resource (L3 cache, memory bandwidth) has its own line and format. Memory bandwidth schema: It has allocation values for memory bandwidth on each socket, which contains L3 cache id and memory bandwidth percentage. Format: "MB:=bandwidth0;=bandwidth1;..." The minimum bandwidth percentage value for each CPU model is predefined and can be looked up through "info/MB/min_bandwidth". The bandwidth granularity that is allocated is also dependent on the CPU model and can be looked up at "info/MB/bandwidth_gran". The available bandwidth control steps are: min_bw + N * bw_gran. Intermediate values are rounded to the next control step available on the hardware. For more information about Intel RDT kernel interface: https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt An example for runc: Consider a two-socket machine with two L3 caches where the minimum memory bandwidth of 10% with a memory bandwidth granularity of 10%. Tasks inside the container may use a maximum memory bandwidth of 20% on socket 0 and 70% on socket 1. "linux": { "intelRdt": { "memBwSchema": "MB:0=20;1=70" } } Signed-off-by: Xiaochen Shen --- events.go | 38 ++- libcontainer/configs/config.go | 4 +- libcontainer/configs/intelrdt.go | 4 + libcontainer/configs/validate/validator.go | 19 +- libcontainer/factory_linux.go | 6 +- libcontainer/intelrdt/intelrdt.go | 350 ++++++++++++++------- libcontainer/intelrdt/stats.go | 16 + libcontainer/specconv/spec_linux.go | 3 + update.go | 2 +- utils_linux.go | 2 +- 10 files changed, 319 insertions(+), 125 deletions(-) diff --git a/events.go b/events.go index 94dd2b93dc2..cfe369e1ecd 100644 --- a/events.go +++ b/events.go @@ -104,6 +104,13 @@ type l3CacheInfo struct { NumClosids uint64 `json:"num_closids,omitempty"` } +type memBwInfo struct { + BandwidthGran uint64 `json:"bandwidth_gran,omitempty"` + DelayLinear uint64 `json:"delay_linear,omitempty"` + MinBandwidth uint64 `json:"min_bandwidth,omitempty"` + NumClosids uint64 `json:"num_closids,omitempty"` +} + type intelRdt struct { // The read-only L3 cache information L3CacheInfo *l3CacheInfo `json:"l3_cache_info,omitempty"` @@ -113,6 +120,15 @@ type intelRdt struct { // The L3 cache schema in 'container_id' group L3CacheSchema string `json:"l3_cache_schema,omitempty"` + + // The read-only memory bandwidth information + MemBwInfo *memBwInfo `json:"mem_bw_info,omitempty"` + + // The read-only memory bandwidth schema in root + MemBwSchemaRoot string `json:"mem_bw_schema_root,omitempty"` + + // The memory bandwidth schema in 'container_id' group + MemBwSchema string `json:"mem_bw_schema,omitempty"` } var eventsCommand = cli.Command{ @@ -248,9 +264,16 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *stats { } if is := ls.IntelRdtStats; is != nil { - s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo) - s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot - s.IntelRdt.L3CacheSchema = is.L3CacheSchema + if intelrdt.IsCatEnabled() { + s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo) + s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot + s.IntelRdt.L3CacheSchema = is.L3CacheSchema + } + if intelrdt.IsMbaEnabled() { + s.IntelRdt.MemBwInfo = convertMemBwInfo(is.MemBwInfo) + s.IntelRdt.MemBwSchemaRoot = is.MemBwSchemaRoot + s.IntelRdt.MemBwSchema = is.MemBwSchema + } } return &s @@ -293,3 +316,12 @@ func convertL3CacheInfo(i *intelrdt.L3CacheInfo) *l3CacheInfo { NumClosids: i.NumClosids, } } + +func convertMemBwInfo(i *intelrdt.MemBwInfo) *memBwInfo { + return &memBwInfo{ + BandwidthGran: i.BandwidthGran, + DelayLinear: i.DelayLinear, + MinBandwidth: i.MinBandwidth, + NumClosids: i.NumClosids, + } +} diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go index b1c4762fe20..064762d5590 100644 --- a/libcontainer/configs/config.go +++ b/libcontainer/configs/config.go @@ -189,8 +189,8 @@ type Config struct { // Rootless specifies whether the container is a rootless container. Rootless bool `json:"rootless"` - // IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into - // to limit the resources (e.g., L3 cache) the container has available + // IntelRdt specifies settings for Intel RDT group that the container is placed into + // to limit the resources (e.g., L3 cache, memory bandwidth) the container has available IntelRdt *IntelRdt `json:"intel_rdt,omitempty"` } diff --git a/libcontainer/configs/intelrdt.go b/libcontainer/configs/intelrdt.go index 36bd5f96a11..6f47aac077d 100644 --- a/libcontainer/configs/intelrdt.go +++ b/libcontainer/configs/intelrdt.go @@ -4,4 +4,8 @@ type IntelRdt struct { // The schema for L3 cache id and capacity bitmask (CBM) // Format: "L3:=;=;..." L3CacheSchema string `json:"l3_cache_schema,omitempty"` + + // The schema of memory bandwidth percentage per L3 cache id + // Format: "MB:=bandwidth0;=bandwidth1;..." + MemBwSchema string `json:"memBwSchema,omitempty"` } diff --git a/libcontainer/configs/validate/validator.go b/libcontainer/configs/validate/validator.go index b36e553d207..417f7827a49 100644 --- a/libcontainer/configs/validate/validator.go +++ b/libcontainer/configs/validate/validator.go @@ -169,11 +169,22 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error { func (v *ConfigValidator) intelrdt(config *configs.Config) error { if config.IntelRdt != nil { - if !intelrdt.IsEnabled() { - return fmt.Errorf("intelRdt is specified in config, but Intel RDT feature is not supported or enabled") + if !intelrdt.IsCatEnabled() && !intelrdt.IsMbaEnabled() { + return fmt.Errorf("intelRdt is specified in config, but Intel RDT is not supported or enabled") } - if config.IntelRdt.L3CacheSchema == "" { - return fmt.Errorf("intelRdt is specified in config, but intelRdt.l3CacheSchema is empty") + + if !intelrdt.IsCatEnabled() && config.IntelRdt.L3CacheSchema != "" { + return fmt.Errorf("intelRdt.l3CacheSchema is specified in config, but Intel RDT/CAT is not enabled") + } + if !intelrdt.IsMbaEnabled() && config.IntelRdt.MemBwSchema != "" { + return fmt.Errorf("intelRdt.memBwSchema is specified in config, but Intel RDT/MBA is not enabled") + } + + if intelrdt.IsCatEnabled() && config.IntelRdt.L3CacheSchema == "" { + return fmt.Errorf("Intel RDT/CAT is enabled and intelRdt is specified in config, but intelRdt.l3CacheSchema is empty") + } + if intelrdt.IsMbaEnabled() && config.IntelRdt.MemBwSchema == "" { + return fmt.Errorf("Intel RDT/MBA is enabled and intelRdt is specified in config, but intelRdt.memBwSchema is empty") } } diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go index 612ccd74b98..11883cb3a65 100644 --- a/libcontainer/factory_linux.go +++ b/libcontainer/factory_linux.go @@ -92,7 +92,7 @@ func RootlessCgroupfs(l *LinuxFactory) error { // IntelRdtfs is an options func to configure a LinuxFactory to return // containers that use the Intel RDT "resource control" filesystem to -// create and manage Intel Xeon platform shared resources (e.g., L3 cache). +// create and manage Intel RDT resources (e.g., L3 cache, memory bandwidth). func IntelRdtFs(l *LinuxFactory) error { l.NewIntelRdtManager = func(config *configs.Config, id string, path string) intelrdt.Manager { return &intelrdt.IntelRdtManager{ @@ -222,7 +222,7 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err newgidmapPath: l.NewgidmapPath, cgroupManager: l.NewCgroupsManager(config.Cgroups, nil), } - if intelrdt.IsEnabled() { + if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() { c.intelRdtManager = l.NewIntelRdtManager(config, id, "") } c.state = &stoppedState{c: c} @@ -268,7 +268,7 @@ func (l *LinuxFactory) Load(id string) (Container, error) { if err := c.refreshState(); err != nil { return nil, err } - if intelrdt.IsEnabled() { + if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() { c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath) } return c, nil diff --git a/libcontainer/intelrdt/intelrdt.go b/libcontainer/intelrdt/intelrdt.go index 487c630af61..159ead6ae9b 100644 --- a/libcontainer/intelrdt/intelrdt.go +++ b/libcontainer/intelrdt/intelrdt.go @@ -16,20 +16,25 @@ import ( ) /* - * About Intel RDT/CAT feature: + * About Intel RDT features: * Intel platforms with new Xeon CPU support Resource Director Technology (RDT). - * Intel Cache Allocation Technology (CAT) is a sub-feature of RDT. Currently L3 - * Cache is the only resource that is supported in RDT. + * Cache Allocation Technology (CAT) and Memory Bandwidth Allocation (MBA) are + * two sub-features of RDT. * - * This feature provides a way for the software to restrict cache allocation to a - * defined 'subset' of L3 cache which may be overlapping with other 'subsets'. - * The different subsets are identified by class of service (CLOS) and each CLOS - * has a capacity bitmask (CBM). + * Cache Allocation Technology (CAT) provides a way for the software to restrict + * cache allocation to a defined 'subset' of L3 cache which may be overlapping + * with other 'subsets'. The different subsets are identified by class of + * service (CLOS) and each CLOS has a capacity bitmask (CBM). * - * For more information about Intel RDT/CAT can be found in the section 17.17 - * of Intel Software Developer Manual. + * Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle + * over memory bandwidth for the software. A user controls the resource by + * indicating the percentage of maximum memory bandwidth. * - * About Intel RDT/CAT kernel interface: + * More details about Intel RDT CAT and MBA can be found in the section 17.18 + * of Intel Software Developer Manual: + * https://software.intel.com/en-us/articles/intel-sdm + * + * About Intel RDT kernel interface: * In Linux 4.10 kernel or newer, the interface is defined and exposed via * "resource control" filesystem, which is a "cgroup-like" interface. * @@ -37,59 +42,86 @@ import ( * interfaces in a container. But unlike cgroups' hierarchy, it has single level * filesystem layout. * + * CAT and MBA features are introduced in Linux 4.10 and 4.12 kernel via + * "resource control" filesystem. + * * Intel RDT "resource control" filesystem hierarchy: * mount -t resctrl resctrl /sys/fs/resctrl * tree /sys/fs/resctrl * /sys/fs/resctrl/ * |-- info * | |-- L3 - * | |-- cbm_mask - * | |-- min_cbm_bits + * | | |-- cbm_mask + * | | |-- min_cbm_bits + * | | |-- num_closids + * | |-- MB + * | |-- bandwidth_gran + * | |-- delay_linear + * | |-- min_bandwidth * | |-- num_closids - * |-- cpus + * |-- ... * |-- schemata * |-- tasks * |-- - * |-- cpus + * |-- ... * |-- schemata * |-- tasks * - * For runc, we can make use of `tasks` and `schemata` configuration for L3 cache - * resource constraints. + * For runc, we can make use of `tasks` and `schemata` configuration for L3 + * cache and memory bandwidth resources constraints. * - * The file `tasks` has a list of tasks that belongs to this group (e.g., + * The file `tasks` has a list of tasks that belongs to this group (e.g., * " group). Tasks can be added to a group by writing the task ID - * to the "tasks" file (which will automatically remove them from the previous + * to the "tasks" file (which will automatically remove them from the previous * group to which they belonged). New tasks created by fork(2) and clone(2) are - * added to the same group as their parent. If a pid is not in any sub group, it is - * in root group. + * added to the same group as their parent. + * + * The file `schemata` has a list of all the resources available to this group. + * Each resource (L3 cache, memory bandwidth) has its own line and format. * - * The file `schemata` has allocation bitmasks/values for L3 cache on each socket, - * which contains L3 cache id and capacity bitmask (CBM). + * L3 cache schema: + * It has allocation bitmasks/values for L3 cache on each socket, which + * contains L3 cache id and capacity bitmask (CBM). * Format: "L3:=;=;..." - * For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0` + * For example, on a two-socket machine, the schema line could be "L3:0=ff;1=c0" * which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. * * The valid L3 cache CBM is a *contiguous bits set* and number of bits that can * be set is less than the max bit. The max bits in the CBM is varied among - * supported Intel Xeon platforms. In Intel RDT "resource control" filesystem - * layout, the CBM in a group should be a subset of the CBM in root. Kernel will - * check if it is valid when writing. e.g., 0xfffff in root indicates the max bits - * of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM - * values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. + * supported Intel CPU models. Kernel will check if it is valid when writing. + * e.g., default value 0xfffff in root indicates the max bits of CBM is 20 + * bits, which mapping to entire L3 cache capacity. Some valid CBM values to + * set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. + * + * Memory bandwidth schema: + * It has allocation values for memory bandwidth on each socket, which contains + * L3 cache id and memory bandwidth percentage. + * Format: "MB:=bandwidth0;=bandwidth1;..." + * For example, on a two-socket machine, the schema line could be "MB:0=20;1=70" + * + * The minimum bandwidth percentage value for each CPU model is predefined and + * can be looked up through "info/MB/min_bandwidth". The bandwidth granularity + * that is allocated is also dependent on the CPU model and can be looked up at + * "info/MB/bandwidth_gran". The available bandwidth control steps are: + * min_bw + N * bw_gran. Intermediate values are rounded to the next control + * step available on the hardware. * - * For more information about Intel RDT/CAT kernel interface: + * For more information about Intel RDT kernel interface: * https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt * * An example for runc: * Consider a two-socket machine with two L3 caches where the default CBM is - * 0xfffff and the max CBM length is 20 bits. With this configuration, tasks - * inside the container only have access to the "upper" 80% of L3 cache id 0 and - * the "lower" 50% L3 cache id 1: + * 0x7ff and the max CBM length is 11 bits, and minimum memory bandwidth of 10% + * with a memory bandwidth granularity of 10%. + * + * Tasks inside the container only have access to the "upper" 7/11 of L3 cache + * on socket 0 and the "lower" 5/11 L3 cache on socket 1, and may use a + * maximum memory bandwidth of 20% on socket 0 and 70% on socket 1. * * "linux": { - * "intelRdt": { - * "l3CacheSchema": "L3:0=ffff0;1=3ff" + * "intelRdt": { + * "l3CacheSchema": "L3:0=7f0;1=1f", + * "memBwSchema": "MB:0=20;1=70" * } * } */ @@ -129,8 +161,10 @@ var ( intelRdtRoot string intelRdtRootLock sync.Mutex - // The flag to indicate if Intel RDT is supported - isEnabled bool + // The flag to indicate if Intel RDT/CAT is enabled + isCatEnabled bool + // The flag to indicate if Intel RDT/MBA is enabled + isMbaEnabled bool ) type intelRdtData struct { @@ -139,19 +173,22 @@ type intelRdtData struct { pid int } -// Check if Intel RDT is enabled in init() +// Check if Intel RDT sub-features are enabled in init() func init() { - // 1. Check if hardware and kernel support Intel RDT/CAT feature - // "cat_l3" flag is set if supported - isFlagSet, err := parseCpuInfoFile("/proc/cpuinfo") - if !isFlagSet || err != nil { - isEnabled = false + // 1. Check if hardware and kernel support Intel RDT sub-features + // "cat_l3" flag for CAT and "mba" flag for MBA + isCatFlagSet, isMbaFlagSet, err := parseCpuInfoFile("/proc/cpuinfo") + if err != nil { + isCatEnabled = false + isMbaEnabled = false return } // 2. Check if Intel RDT "resource control" filesystem is mounted // The user guarantees to mount the filesystem - isEnabled = isIntelRdtMounted() + isFsMounted := isIntelRdtMounted() + isCatEnabled = isCatFlagSet && isFsMounted + isMbaEnabled = isMbaFlagSet && isFsMounted } // Return the mount point path of Intel RDT "resource control" filesysem @@ -223,30 +260,40 @@ func isIntelRdtMounted() bool { return true } -func parseCpuInfoFile(path string) (bool, error) { +func parseCpuInfoFile(path string) (bool, bool, error) { + isCatFlagSet := false + isMbaFlagSet := false + f, err := os.Open(path) if err != nil { - return false, err + return false, false, err } defer f.Close() s := bufio.NewScanner(f) for s.Scan() { if err := s.Err(); err != nil { - return false, err + return false, false, err } - text := s.Text() - flags := strings.Split(text, " ") - - // "cat_l3" flag is set if Intel RDT/CAT is supported - for _, flag := range flags { - if flag == "cat_l3" { - return true, nil + line := s.Text() + + // Search "cat_l3" and "mba" flags in first "flags" line + if strings.Contains(line, "flags") { + flags := strings.Split(line, " ") + // "cat_l3" flag for CAT and "mba" flag for MBA + for _, flag := range flags { + switch flag { + case "cat_l3": + isCatFlagSet = true + case "mba": + isMbaFlagSet = true + } } + return isCatFlagSet, isMbaFlagSet, nil } } - return false, nil + return isCatFlagSet, isMbaFlagSet, nil } func parseUint(s string, base, bitSize int) (uint64, error) { @@ -292,30 +339,6 @@ func getIntelRdtParamString(path, file string) (string, error) { return strings.TrimSpace(string(contents)), nil } -func readTasksFile(dir string) ([]int, error) { - f, err := os.Open(filepath.Join(dir, IntelRdtTasks)) - if err != nil { - return nil, err - } - defer f.Close() - - var ( - s = bufio.NewScanner(f) - out = []int{} - ) - - for s.Scan() { - if t := s.Text(); t != "" { - pid, err := strconv.Atoi(t) - if err != nil { - return nil, err - } - out = append(out, pid) - } - } - return out, nil -} - func writeFile(dir, file, data string) error { if dir == "" { return fmt.Errorf("no such directory for %s", file) @@ -368,6 +391,41 @@ func getL3CacheInfo() (*L3CacheInfo, error) { return l3CacheInfo, nil } +// Get the read-only memory bandwidth information +func getMemBwInfo() (*MemBwInfo, error) { + memBwInfo := &MemBwInfo{} + + rootPath, err := getIntelRdtRoot() + if err != nil { + return memBwInfo, err + } + + path := filepath.Join(rootPath, "info", "MB") + bandwidthGran, err := getIntelRdtParamUint(path, "bandwidth_gran") + if err != nil { + return memBwInfo, err + } + delayLinear, err := getIntelRdtParamUint(path, "delay_linear") + if err != nil { + return memBwInfo, err + } + minBandwidth, err := getIntelRdtParamUint(path, "min_bandwidth") + if err != nil { + return memBwInfo, err + } + numClosids, err := getIntelRdtParamUint(path, "num_closids") + if err != nil { + return memBwInfo, err + } + + memBwInfo.BandwidthGran = bandwidthGran + memBwInfo.DelayLinear = delayLinear + memBwInfo.MinBandwidth = minBandwidth + memBwInfo.NumClosids = numClosids + + return memBwInfo, nil +} + // WriteIntelRdtTasks writes the specified pid into the "tasks" file func WriteIntelRdtTasks(dir string, pid int) error { if dir == "" { @@ -383,9 +441,14 @@ func WriteIntelRdtTasks(dir string, pid int) error { return nil } -// Check if Intel RDT is enabled -func IsEnabled() bool { - return isEnabled +// Check if Intel RDT/CAT is enabled +func IsCatEnabled() bool { + return isCatEnabled +} + +// Check if Intel RDT/MBA is enabled +func IsMbaEnabled() bool { + return isMbaEnabled } // Get the 'container_id' path in Intel RDT "resource control" filesystem @@ -452,67 +515,132 @@ func (m *IntelRdtManager) GetStats() (*Stats, error) { defer m.mu.Unlock() stats := NewStats() - // The read-only L3 cache information - l3CacheInfo, err := getL3CacheInfo() - if err != nil { - return nil, err - } - stats.L3CacheInfo = l3CacheInfo - - // The read-only L3 cache schema in root rootPath, err := getIntelRdtRoot() if err != nil { return nil, err } + // The read-only L3 cache and memory bandwidth schemata in root tmpRootStrings, err := getIntelRdtParamString(rootPath, "schemata") if err != nil { return nil, err } - // L3 cache schema is in the first line schemaRootStrings := strings.Split(tmpRootStrings, "\n") - stats.L3CacheSchemaRoot = schemaRootStrings[0] - // The L3 cache schema in 'container_id' group + // The L3 cache and memory bandwidth schemata in 'container_id' group tmpStrings, err := getIntelRdtParamString(m.GetPath(), "schemata") if err != nil { return nil, err } - // L3 cache schema is in the first line schemaStrings := strings.Split(tmpStrings, "\n") - stats.L3CacheSchema = schemaStrings[0] + + if IsCatEnabled() { + // The read-only L3 cache information + l3CacheInfo, err := getL3CacheInfo() + if err != nil { + return nil, err + } + stats.L3CacheInfo = l3CacheInfo + + // The read-only L3 cache schema in root + for _, schemaRoot := range schemaRootStrings { + if strings.Contains(schemaRoot, "L3") { + stats.L3CacheSchemaRoot = strings.TrimSpace(schemaRoot) + } + } + + // The L3 cache schema in 'container_id' group + for _, schema := range schemaStrings { + if strings.Contains(schema, "L3") { + stats.L3CacheSchema = strings.TrimSpace(schema) + } + } + } + + if IsMbaEnabled() { + // The read-only memory bandwidth information + memBwInfo, err := getMemBwInfo() + if err != nil { + return nil, err + } + stats.MemBwInfo = memBwInfo + + // The read-only memory bandwidth information + for _, schemaRoot := range schemaRootStrings { + if strings.Contains(schemaRoot, "MB") { + stats.MemBwSchemaRoot = strings.TrimSpace(schemaRoot) + } + } + + // The memory bandwidth schema in 'container_id' group + for _, schema := range schemaStrings { + if strings.Contains(schema, "MB") { + stats.MemBwSchema = strings.TrimSpace(schema) + } + } + } return stats, nil } // Set Intel RDT "resource control" filesystem as configured. func (m *IntelRdtManager) Set(container *configs.Config) error { - path := m.GetPath() - - // About L3 cache schema file: - // The schema has allocation masks/values for L3 cache on each socket, + // About L3 cache schema: + // It has allocation bitmasks/values for L3 cache on each socket, // which contains L3 cache id and capacity bitmask (CBM). - // Format: "L3:=;=;..." - // For example, on a two-socket machine, L3's schema line could be: - // L3:0=ff;1=c0 - // Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. + // Format: "L3:=;=;..." + // For example, on a two-socket machine, the schema line could be: + // L3:0=ff;1=c0 + // which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM + // is 0xc0. // - // About L3 cache CBM validity: // The valid L3 cache CBM is a *contiguous bits set* and number of // bits that can be set is less than the max bit. The max bits in the - // CBM is varied among supported Intel Xeon platforms. In Intel RDT - // "resource control" filesystem layout, the CBM in a group should - // be a subset of the CBM in root. Kernel will check if it is valid - // when writing. - // e.g., 0xfffff in root indicates the max bits of CBM is 20 bits, - // which mapping to entire L3 cache capacity. Some valid CBM values - // to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. + // CBM is varied among supported Intel CPU models. Kernel will check + // if it is valid when writing. e.g., default value 0xfffff in root + // indicates the max bits of CBM is 20 bits, which mapping to entire + // L3 cache capacity. Some valid CBM values to set in a group: + // 0xf, 0xf0, 0x3ff, 0x1f00 and etc. + // + // + // About memory bandwidth schema: + // It has allocation values for memory bandwidth on each socket, which + // contains L3 cache id and memory bandwidth percentage. + // Format: "MB:=bandwidth0;=bandwidth1;..." + // For example, on a two-socket machine, the schema line could be: + // "MB:0=20;1=70" + // + // The minimum bandwidth percentage value for each CPU model is + // predefined and can be looked up through "info/MB/min_bandwidth". + // The bandwidth granularity that is allocated is also dependent on + // the CPU model and can be looked up at "info/MB/bandwidth_gran". + // The available bandwidth control steps are: min_bw + N * bw_gran. + // Intermediate values are rounded to the next control step available + // on the hardware. if container.IntelRdt != nil { + path := m.GetPath() l3CacheSchema := container.IntelRdt.L3CacheSchema - if l3CacheSchema != "" { + memBwSchema := container.IntelRdt.MemBwSchema + + // Write a single joint schema string to schemata file + if l3CacheSchema != "" && memBwSchema != "" { + if err := writeFile(path, "schemata", l3CacheSchema+"\n"+memBwSchema); err != nil { + return err + } + } + + // Write only L3 cache schema string to schemata file + if l3CacheSchema != "" && memBwSchema == "" { if err := writeFile(path, "schemata", l3CacheSchema); err != nil { return err } } + + // Write only memory bandwidth schema string to schemata file + if l3CacheSchema == "" && memBwSchema != "" { + if err := writeFile(path, "schemata", memBwSchema); err != nil { + return err + } + } } return nil diff --git a/libcontainer/intelrdt/stats.go b/libcontainer/intelrdt/stats.go index 095c0a380cd..df5686f3b80 100644 --- a/libcontainer/intelrdt/stats.go +++ b/libcontainer/intelrdt/stats.go @@ -8,6 +8,13 @@ type L3CacheInfo struct { NumClosids uint64 `json:"num_closids,omitempty"` } +type MemBwInfo struct { + BandwidthGran uint64 `json:"bandwidth_gran,omitempty"` + DelayLinear uint64 `json:"delay_linear,omitempty"` + MinBandwidth uint64 `json:"min_bandwidth,omitempty"` + NumClosids uint64 `json:"num_closids,omitempty"` +} + type Stats struct { // The read-only L3 cache information L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"` @@ -17,6 +24,15 @@ type Stats struct { // The L3 cache schema in 'container_id' group L3CacheSchema string `json:"l3_cache_schema,omitempty"` + + // The read-only memory bandwidth information + MemBwInfo *MemBwInfo `json:"mem_bw_info,omitempty"` + + // The read-only memory bandwidth schema in root + MemBwSchemaRoot string `json:"mem_bw_schema_root,omitempty"` + + // The memory bandwidth schema in 'container_id' group + MemBwSchema string `json:"mem_bw_schema,omitempty"` } func NewStats() *Stats { diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index 7951497efa3..320be1d8cb4 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -262,6 +262,9 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { if spec.Linux.IntelRdt.L3CacheSchema != "" { config.IntelRdt.L3CacheSchema = spec.Linux.IntelRdt.L3CacheSchema } + if spec.Linux.IntelRdt.MemBwSchema != "" { + config.IntelRdt.MemBwSchema = spec.Linux.IntelRdt.MemBwSchema + } } return config, nil } diff --git a/update.go b/update.go index 9bb6b610207..1a81208f3b8 100644 --- a/update.go +++ b/update.go @@ -262,7 +262,7 @@ other options are ignored. // Update Intel RDT/CAT if val := context.String("l3-cache-schema"); val != "" { - if !intelrdt.IsEnabled() { + if !intelrdt.IsCatEnabled() { return fmt.Errorf("Intel RDT: l3 cache schema is not enabled") } diff --git a/utils_linux.go b/utils_linux.go index c6a3489737c..cb3def48c93 100644 --- a/utils_linux.go +++ b/utils_linux.go @@ -55,7 +55,7 @@ func loadFactory(context *cli.Context) (libcontainer.Factory, error) { } intelRdtManager := libcontainer.IntelRdtFs - if !intelrdt.IsEnabled() { + if !intelrdt.IsCatEnabled() && !intelrdt.IsMbaEnabled() { intelRdtManager = nil }