Skip to content

Commit

Permalink
libcontainer: intelrdt: add support for Intel RDT/MBA in runc
Browse files Browse the repository at this point in the history
Memory Bandwidth Allocation (MBA) is a resource allocation sub-feature
of Intel Resource Director Technology (RDT) which is supported on some
Intel Xeon platforms. Intel RDT/MBA provides indirect and approximate
throttle over memory bandwidth for the software. A user controls the
resource by indicating the percentage of maximum memory bandwidth.

Hardware details of Intel RDT/MBA can be found in section 17.18 of
Intel Software Developer Manual:
https://software.intel.com/en-us/articles/intel-sdm

In Linux 4.12 kernel and newer, Intel RDT/MBA is enabled by kernel
config CONFIG_INTEL_RDT. If hardware support, CPU flags `rdt_a` and
`mba` will be set in /proc/cpuinfo.

Intel RDT "resource control" filesystem hierarchy:
mount -t resctrl resctrl /sys/fs/resctrl
tree /sys/fs/resctrl
/sys/fs/resctrl/
|-- info
|   |-- L3
|   |   |-- cbm_mask
|   |   |-- min_cbm_bits
|   |   |-- num_closids
|   |-- MB
|       |-- bandwidth_gran
|       |-- delay_linear
|       |-- min_bandwidth
|       |-- num_closids
|-- ...
|-- schemata
|-- tasks
|-- <container_id>
    |-- ...
    |-- schemata
    |-- tasks

For MBA support for `runc`, we will reuse the infrastructure and code
base of Intel RDT/CAT which implemented in opencontainers#1279. We could also make
use of `tasks` and `schemata` configuration for memory bandwidth
resource constraints.

The file `tasks` has a list of tasks that belongs to this group (e.g.,
<container_id>" group). Tasks can be added to a group by writing the
task ID to the "tasks" file (which will automatically remove them from
the previous group to which they belonged). New tasks created by
fork(2) and clone(2) are added to the same group as their parent.

The file `schemata` has a list of all the resources available to this
group. Each resource (L3 cache, memory bandwidth) has its own line and
format.

Memory bandwidth schema:
It has allocation values for memory bandwidth on each socket, which
contains L3 cache id and memory bandwidth percentage.
    Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."

The minimum bandwidth percentage value for each CPU model is predefined
and can be looked up through "info/MB/min_bandwidth". The bandwidth
granularity that is allocated is also dependent on the CPU model and
can be looked up at "info/MB/bandwidth_gran". The available bandwidth
control steps are: min_bw + N * bw_gran. Intermediate values are
rounded to the next control step available on the hardware.

For more information about Intel RDT kernel interface:
https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt

An example for runc:
Consider a two-socket machine with two L3 caches where the minimum
memory bandwidth of 10% with a memory bandwidth granularity of 10%.
Tasks inside the container may use a maximum memory bandwidth of 20%
on socket 0 and 70% on socket 1.

"linux": {
    "intelRdt": {
        "memBwSchema": "MB:0=20;1=70"
    }
}

Signed-off-by: Xiaochen Shen <xiaochen.shen@intel.com>
  • Loading branch information
xiaochenshen committed Sep 5, 2018
1 parent 90355f0 commit 651344d
Show file tree
Hide file tree
Showing 10 changed files with 319 additions and 125 deletions.
38 changes: 35 additions & 3 deletions events.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,13 @@ type l3CacheInfo struct {
NumClosids uint64 `json:"num_closids,omitempty"`
}

type memBwInfo struct {
BandwidthGran uint64 `json:"bandwidth_gran,omitempty"`
DelayLinear uint64 `json:"delay_linear,omitempty"`
MinBandwidth uint64 `json:"min_bandwidth,omitempty"`
NumClosids uint64 `json:"num_closids,omitempty"`
}

type intelRdt struct {
// The read-only L3 cache information
L3CacheInfo *l3CacheInfo `json:"l3_cache_info,omitempty"`
Expand All @@ -113,6 +120,15 @@ type intelRdt struct {

// The L3 cache schema in 'container_id' group
L3CacheSchema string `json:"l3_cache_schema,omitempty"`

// The read-only memory bandwidth information
MemBwInfo *memBwInfo `json:"mem_bw_info,omitempty"`

// The read-only memory bandwidth schema in root
MemBwSchemaRoot string `json:"mem_bw_schema_root,omitempty"`

// The memory bandwidth schema in 'container_id' group
MemBwSchema string `json:"mem_bw_schema,omitempty"`
}

var eventsCommand = cli.Command{
Expand Down Expand Up @@ -248,9 +264,16 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *stats {
}

if is := ls.IntelRdtStats; is != nil {
s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo)
s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot
s.IntelRdt.L3CacheSchema = is.L3CacheSchema
if intelrdt.IsCatEnabled() {
s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo)
s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot
s.IntelRdt.L3CacheSchema = is.L3CacheSchema
}
if intelrdt.IsMbaEnabled() {
s.IntelRdt.MemBwInfo = convertMemBwInfo(is.MemBwInfo)
s.IntelRdt.MemBwSchemaRoot = is.MemBwSchemaRoot
s.IntelRdt.MemBwSchema = is.MemBwSchema
}
}

return &s
Expand Down Expand Up @@ -293,3 +316,12 @@ func convertL3CacheInfo(i *intelrdt.L3CacheInfo) *l3CacheInfo {
NumClosids: i.NumClosids,
}
}

func convertMemBwInfo(i *intelrdt.MemBwInfo) *memBwInfo {
return &memBwInfo{
BandwidthGran: i.BandwidthGran,
DelayLinear: i.DelayLinear,
MinBandwidth: i.MinBandwidth,
NumClosids: i.NumClosids,
}
}
4 changes: 2 additions & 2 deletions libcontainer/configs/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,8 @@ type Config struct {
// Rootless specifies whether the container is a rootless container.
Rootless bool `json:"rootless"`

// IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into
// to limit the resources (e.g., L3 cache) the container has available
// IntelRdt specifies settings for Intel RDT group that the container is placed into
// to limit the resources (e.g., L3 cache, memory bandwidth) the container has available
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
}

Expand Down
4 changes: 4 additions & 0 deletions libcontainer/configs/intelrdt.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,8 @@ type IntelRdt struct {
// The schema for L3 cache id and capacity bitmask (CBM)
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
L3CacheSchema string `json:"l3_cache_schema,omitempty"`

// The schema of memory bandwidth percentage per L3 cache id
// Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
MemBwSchema string `json:"memBwSchema,omitempty"`
}
19 changes: 15 additions & 4 deletions libcontainer/configs/validate/validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,22 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {

func (v *ConfigValidator) intelrdt(config *configs.Config) error {
if config.IntelRdt != nil {
if !intelrdt.IsEnabled() {
return fmt.Errorf("intelRdt is specified in config, but Intel RDT feature is not supported or enabled")
if !intelrdt.IsCatEnabled() && !intelrdt.IsMbaEnabled() {
return fmt.Errorf("intelRdt is specified in config, but Intel RDT is not supported or enabled")
}
if config.IntelRdt.L3CacheSchema == "" {
return fmt.Errorf("intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")

if !intelrdt.IsCatEnabled() && config.IntelRdt.L3CacheSchema != "" {
return fmt.Errorf("intelRdt.l3CacheSchema is specified in config, but Intel RDT/CAT is not enabled")
}
if !intelrdt.IsMbaEnabled() && config.IntelRdt.MemBwSchema != "" {
return fmt.Errorf("intelRdt.memBwSchema is specified in config, but Intel RDT/MBA is not enabled")
}

if intelrdt.IsCatEnabled() && config.IntelRdt.L3CacheSchema == "" {
return fmt.Errorf("Intel RDT/CAT is enabled and intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
}
if intelrdt.IsMbaEnabled() && config.IntelRdt.MemBwSchema == "" {
return fmt.Errorf("Intel RDT/MBA is enabled and intelRdt is specified in config, but intelRdt.memBwSchema is empty")
}
}

Expand Down
6 changes: 3 additions & 3 deletions libcontainer/factory_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ func RootlessCgroupfs(l *LinuxFactory) error {

// IntelRdtfs is an options func to configure a LinuxFactory to return
// containers that use the Intel RDT "resource control" filesystem to
// create and manage Intel Xeon platform shared resources (e.g., L3 cache).
// create and manage Intel RDT resources (e.g., L3 cache, memory bandwidth).
func IntelRdtFs(l *LinuxFactory) error {
l.NewIntelRdtManager = func(config *configs.Config, id string, path string) intelrdt.Manager {
return &intelrdt.IntelRdtManager{
Expand Down Expand Up @@ -218,7 +218,7 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
newgidmapPath: l.NewgidmapPath,
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
}
if intelrdt.IsEnabled() {
if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
}
c.state = &stoppedState{c: c}
Expand Down Expand Up @@ -257,7 +257,7 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
if err := c.refreshState(); err != nil {
return nil, err
}
if intelrdt.IsEnabled() {
if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath)
}
return c, nil
Expand Down
Loading

0 comments on commit 651344d

Please sign in to comment.