diff --git a/events.go b/events.go index 8305bb7cbf0..6901c5d3ac3 100644 --- a/events.go +++ b/events.go @@ -24,11 +24,12 @@ type event struct { // stats is the runc specific stats structure for stability when encoding and decoding stats. type stats struct { - Cpu cpu `json:"cpu"` - Memory memory `json:"memory"` - Pids pids `json:"pids"` - Blkio blkio `json:"blkio"` - Hugetlb map[string]hugetlb `json:"hugetlb"` + Cpu cpu `json:"cpu"` + Memory memory `json:"memory"` + Pids pids `json:"pids"` + Blkio blkio `json:"blkio"` + Hugetlb map[string]hugetlb `json:"hugetlb"` + IntelRdt intelRdt `json:"intelRdt"` } type hugetlb struct { @@ -95,6 +96,12 @@ type memory struct { Raw map[string]uint64 `json:"raw,omitempty"` } +type intelRdt struct { + // The read-only default "schema" in root, for reference + L3CacheSchemaRoot string `json:"l3CacheSchemaRoot,omitempty"` + L3CacheSchema string `json:"l3CacheSchema,omitempty"` +} + var eventsCommand = cli.Command{ Name: "events", Usage: "display container events such as OOM notifications, cpu, memory, and IO usage statistics", @@ -226,6 +233,14 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *stats { for k, v := range cg.HugetlbStats { s.Hugetlb[k] = convertHugtlb(v) } + + is := ls.IntelRdtStats + if is == nil { + return &s + } + s.IntelRdt.L3CacheSchemaRoot = is.IntelRdtRootStats.L3CacheSchema + s.IntelRdt.L3CacheSchema = is.IntelRdtStats.L3CacheSchema + return &s } diff --git a/libcontainer/configs/cgroup_unix.go b/libcontainer/configs/cgroup_unix.go index 14d62898162..77a7ad5c56b 100644 --- a/libcontainer/configs/cgroup_unix.go +++ b/libcontainer/configs/cgroup_unix.go @@ -121,4 +121,8 @@ type Resources struct { // Set class identifier for container's network packets NetClsClassid uint32 `json:"net_cls_classid_u"` + + // Intel RDT: the schema for L3 cache id and capacity bitmask (CBM) + // Format: "L3:=;=;..." + IntelRdtL3CacheSchema string `json:"intel_rdt_l3_cache_schema"` } diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 3ce5a596014..ef44ed706fc 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -22,6 +22,7 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/criurpc" + "github.com/opencontainers/runc/libcontainer/intelrdt" "github.com/opencontainers/runc/libcontainer/resourcemanager" "github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/utils" @@ -62,6 +63,9 @@ type State struct { // Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore ExternalDescriptors []string `json:"external_descriptors,omitempty"` + + // Intel RDT "resource control" filesystem path + IntelRdtPath string `json:"intel_rdt_path"` } // Container is a libcontainer container object. @@ -160,6 +164,13 @@ func (c *linuxContainer) Stats() (*Stats, error) { if err != nil { return stats, newSystemErrorWithCause(err, "getting container stats from cgroups") } + if intelRdtManager, ok := c.resourceManagers["intelrdt"]; ok == true { + intelRdtStats, err := intelRdtManager.GetStats() + if err != nil { + return stats, newSystemErrorWithCause(err, "getting container's Intel RDT stats") + } + stats.IntelRdtStats = intelRdtStats.(*intelrdt.Stats) + } for _, iface := range c.config.Networks { switch iface.Type { case "veth": @@ -380,11 +391,16 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, if err != nil { return nil, err } + intelRdtPath, err := intelrdt.GetIntelRdtPath(c.ID()) + if err != nil { + intelRdtPath = "" + } // TODO: set on container for process management p.consoleChan = make(chan *os.File, 1) return &setnsProcess{ cmd: cmd, cgroupPaths: c.resourceManagers["cgroups"].GetPaths(), + intelRdtPath: intelRdtPath, childPipe: childPipe, parentPipe: parentPipe, config: c.newInitConfig(p), @@ -1254,6 +1270,10 @@ func (c *linuxContainer) currentState() (*State, error) { startTime, _ = c.initProcess.startTime() externalDescriptors = c.initProcess.externalDescriptors() } + intelRdtPath, err := intelrdt.GetIntelRdtPath(c.ID()) + if err != nil { + intelRdtPath = "" + } state := &State{ BaseState: BaseState{ ID: c.ID(), @@ -1263,6 +1283,7 @@ func (c *linuxContainer) currentState() (*State, error) { Created: c.created, }, CgroupPaths: c.resourceManagers["cgroups"].GetPaths(), + IntelRdtPath: intelRdtPath, NamespacePaths: make(map[configs.NamespaceType]string), ExternalDescriptors: externalDescriptors, } diff --git a/libcontainer/container_linux_test.go b/libcontainer/container_linux_test.go index 24c58787b4a..8b2c24e3eeb 100644 --- a/libcontainer/container_linux_test.go +++ b/libcontainer/container_linux_test.go @@ -9,6 +9,7 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/intelrdt" "github.com/opencontainers/runc/libcontainer/resourcemanager" ) @@ -19,6 +20,13 @@ type mockCgroupManager struct { paths map[string]string } +type mockIntelRdtManager struct { + pids []int + allPids []int + stats *intelrdt.Stats + path string +} + func (m *mockCgroupManager) GetPids() ([]int, error) { return m.pids, nil } @@ -51,6 +59,40 @@ func (m *mockCgroupManager) Freeze(state configs.FreezerState) error { return nil } +func (m *mockIntelRdtManager) GetPids() ([]int, error) { + return m.pids, nil +} + +func (m *mockIntelRdtManager) GetAllPids() ([]int, error) { + return m.allPids, nil +} + +func (m *mockIntelRdtManager) GetStats() (interface{}, error) { + return m.stats, nil +} + +func (m *mockIntelRdtManager) Apply(pid int) error { + return nil +} + +func (m *mockIntelRdtManager) Set(container *configs.Config) error { + return nil +} + +func (m *mockIntelRdtManager) Destroy() error { + return nil +} + +func (m *mockIntelRdtManager) GetPaths() map[string]string { + paths := make(map[string]string) + paths["intelrdt"] = m.path + return paths +} + +func (m *mockIntelRdtManager) Freeze(state configs.FreezerState) error { + return nil +} + type mockProcess struct { _pid int started string @@ -121,6 +163,14 @@ func TestGetContainerStats(t *testing.T) { }, }, } + container.resourceManagers["intelrdt"] = &mockIntelRdtManager{ + pids: []int{1, 2, 3}, + stats: &intelrdt.Stats{ + IntelRdtStats: intelrdt.IntelRdtStats{ + L3CacheSchema: "L3:0=ffff0;1=fff00", + }, + }, + } stats, err := container.Stats() if err != nil { t.Fatal(err) @@ -131,13 +181,22 @@ func TestGetContainerStats(t *testing.T) { if stats.CgroupStats.MemoryStats.Usage.Usage != 1024 { t.Fatalf("expected memory usage 1024 but recevied %d", stats.CgroupStats.MemoryStats.Usage.Usage) } + if intelrdt.IsIntelRdtEnabled() { + if stats.IntelRdtStats == nil { + t.Fatal("intel rdt stats are nil") + } + if stats.IntelRdtStats.IntelRdtStats.L3CacheSchema != "L3:0=ffff0;1=fff00" { + t.Fatalf("expected L3CacheSchema L3:0=ffff0;1=fff00 but recevied %s", stats.IntelRdtStats.IntelRdtStats.L3CacheSchema) + } + } } func TestGetContainerState(t *testing.T) { var ( - pid = os.Getpid() - expectedMemoryPath = "/sys/fs/cgroup/memory/myid" - expectedNetworkPath = "/networks/fd" + pid = os.Getpid() + expectedMemoryPath = "/sys/fs/cgroup/memory/myid" + expectedNetworkPath = "/networks/fd" + expectedIntelRdtPath = "/sys/fs/resctrl/myid" ) container := &linuxContainer{ id: "myid", @@ -170,6 +229,15 @@ func TestGetContainerState(t *testing.T) { "memory": expectedMemoryPath, }, } + container.resourceManagers["intelrdt"] = &mockIntelRdtManager{ + pids: []int{1, 2, 3}, + stats: &intelrdt.Stats{ + IntelRdtStats: intelrdt.IntelRdtStats{ + L3CacheSchema: "L3:0=ffff0;1=fff00", + }, + }, + path: expectedIntelRdtPath, + } container.state = &createdState{c: container} state, err := container.State() if err != nil { @@ -188,6 +256,15 @@ func TestGetContainerState(t *testing.T) { if memPath := paths["memory"]; memPath != expectedMemoryPath { t.Fatalf("expected memory path %q but received %q", expectedMemoryPath, memPath) } + if intelrdt.IsIntelRdtEnabled() { + path := state.IntelRdtPath + if path == "" { + t.Fatal("intel rdt path should not be empty") + } + if intelRdtPath := path; intelRdtPath != expectedIntelRdtPath { + t.Fatalf("expected intel rdt path %q but received %q", expectedIntelRdtPath, intelRdtPath) + } + } for _, ns := range container.config.Namespaces { path := state.NamespacePaths[ns.Type] if path == "" { diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go index 5fac3620163..1ecf61fcc23 100644 --- a/libcontainer/factory_linux.go +++ b/libcontainer/factory_linux.go @@ -18,6 +18,7 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups/systemd" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs/validate" + "github.com/opencontainers/runc/libcontainer/intelrdt" "github.com/opencontainers/runc/libcontainer/resourcemanager" "github.com/opencontainers/runc/libcontainer/utils" ) @@ -66,6 +67,19 @@ func Cgroupfs(l *LinuxFactory) error { return nil } +// IntelRdtfs is an options func to configure a LinuxFactory to return +// containers that use the Intel RDT "resource control" filesystem to +// create and manage Intel Xeon platform shared resources (e.g., L3 cache). +func IntelRdtFs(l *LinuxFactory) error { + l.NewIntelRdtManager = func(config *configs.Config, id string) intelrdt.Manager { + return &intelrdt.IntelRdtManager{ + Config: config, + Id: id, + } + } + return nil +} + // TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs. func TmpfsRoot(l *LinuxFactory) error { mounted, err := mount.Mounted(l.Root) @@ -130,6 +144,9 @@ type LinuxFactory struct { // NewCgroupsManager returns an initialized cgroups manager for a single container. NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager + + // NewIntelRdtManager returns an initialized Intel RDT manager for a single container. + NewIntelRdtManager func(config *configs.Config, id string) intelrdt.Manager } func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) { @@ -181,6 +198,9 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err } resourceManagers := make(map[string]resourcemanager.ResourceManager) resourceManagers["cgroups"] = l.NewCgroupsManager(config.Cgroups, nil) + if intelrdt.IsIntelRdtEnabled() { + resourceManagers["intelrdt"] = l.NewIntelRdtManager(config, id) + } c.resourceManagers = resourceManagers c.state = &stoppedState{c: c} return c, nil @@ -212,6 +232,9 @@ func (l *LinuxFactory) Load(id string) (Container, error) { } resourceManagers := make(map[string]resourcemanager.ResourceManager) resourceManagers["cgroups"] = l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths) + if intelrdt.IsIntelRdtEnabled() { + resourceManagers["intelrdt"] = l.NewIntelRdtManager(&state.Config, id) + } c.resourceManagers = resourceManagers c.state = &loadedState{c: c} if err := c.refreshState(); err != nil { diff --git a/libcontainer/factory_linux_test.go b/libcontainer/factory_linux_test.go index ea3b5132d77..53593934573 100644 --- a/libcontainer/factory_linux_test.go +++ b/libcontainer/factory_linux_test.go @@ -49,6 +49,32 @@ func TestFactoryNew(t *testing.T) { } } +func TestFactoryNewIntelRdt(t *testing.T) { + root, rerr := newTestRoot() + if rerr != nil { + t.Fatal(rerr) + } + defer os.RemoveAll(root) + factory, err := New(root, Cgroupfs, IntelRdtFs) + if err != nil { + t.Fatal(err) + } + if factory == nil { + t.Fatal("factory should not be nil") + } + lfactory, ok := factory.(*LinuxFactory) + if !ok { + t.Fatal("expected linux factory returned on linux based systems") + } + if lfactory.Root != root { + t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root) + } + + if factory.Type() != "libcontainer" { + t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer") + } +} + func TestFactoryNewTmpfs(t *testing.T) { root, rerr := newTestRoot() if rerr != nil { @@ -163,7 +189,7 @@ func TestFactoryLoadContainer(t *testing.T) { if err := marshal(filepath.Join(root, id, stateFilename), expectedState); err != nil { t.Fatal(err) } - factory, err := New(root, Cgroupfs) + factory, err := New(root, Cgroupfs, IntelRdtFs) if err != nil { t.Fatal(err) } diff --git a/libcontainer/intelrdt/intelrdt.go b/libcontainer/intelrdt/intelrdt.go new file mode 100644 index 00000000000..aae0ebdc427 --- /dev/null +++ b/libcontainer/intelrdt/intelrdt.go @@ -0,0 +1,558 @@ +// +build linux + +package intelrdt + +import ( + "bufio" + "fmt" + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + "sync" + + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/resourcemanager" +) + +/* + * About Intel RDT/CAT feature: + * Intel platforms with new Xeon CPU support Resource Director Technology (RDT). + * Intel Cache Allocation Technology (CAT) is a sub-feature of RDT. Currently L3 + * Cache is the only resource that is supported in RDT. + * + * This feature provides a way for the software to restrict cache allocation to a + * defined 'subset' of L3 cache which may be overlapping with other 'subsets'. + * The different subsets are identified by class of service (CLOS) and each CLOS + * has a capacity bitmask (CBM). + * + * For more information about Intel RDT/CAT can be found in the section 17.17 + * of Intel Software Developer Manual. + * + * About Intel RDT/CAT kernel interface: + * In Linux kernel, the interface is defined and exposed via "resource control" + * filesystem, which is a "cgroup-like" interface. + * + * Comparing with cgroups, it has similar process management lifecycle and + * interfaces in a container. But unlike cgroups' hierarchy, it has single level + * filesystem layout. + * + * Intel RDT "resource control" filesystem hierarchy: + * mount -t resctrl resctrl /sys/fs/resctrl + * tree /sys/fs/resctrl + * /sys/fs/resctrl/ + * |-- info + * | |-- L3 + * | |-- cbm_mask + * | |-- min_cbm_bits + * | |-- num_closids + * |-- cpus + * |-- schemata + * |-- tasks + * |-- + * |-- cpus + * |-- schemata + * |-- tasks + * + * For runc, we can make use of `tasks` and `schemata` configuration for L3 cache + * resource constraints. + * + * The file `tasks` has a list of tasks that belongs to this group (e.g., + * " group). Tasks can be added to a group by writing the task ID + * to the "tasks" file (which will automatically remove them from the previous + * group to which they belonged). New tasks created by fork(2) and clone(2) are + * added to the same group as their parent. If a pid is not in any sub group, it is + * in root group. + * + * The file `schemata` has allocation bitmasks/values for L3 cache on each socket, + * which contains L3 cache id and capacity bitmask (CBM). + * Format: "L3:=;=;..." + * For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0` + * which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. + * + * The valid L3 cache CBM is a *contiguous bits set* and number of bits that can + * be set is less than the max bit. The max bits in the CBM is varied among + * supported Intel Xeon platforms. In Intel RDT "resource control" filesystem + * layout, the CBM in a group should be a subset of the CBM in root. Kernel will + * check if it is valid when writing. e.g., 0xfffff in root indicates the max bits + * of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM + * values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. + * + * For more information about Intel RDT/CAT kernel interface: + * https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/commit/?h=x86/cache&id=f20e57892806ad244eaec7a7ae365e78fee53377 + * + * An example for runc: + * There are two L3 caches in the two-socket machine, the default CBM is 0xfffff + * and the max CBM length is 20 bits. This configuration assigns 4/5 of L3 cache + * id 0 and the whole L3 cache id 1 for the container: + * + * "linux": { + * "resources": { + * "intelRdt": { + * "l3CacheSchema": "L3:0=ffff0;1=fffff" + * } + * } + * } + */ + +type Manager interface { + resourcemanager.ResourceManager + + // Returns Intel RDT "resource control" filesystem path to save in + // a state file and to be able to restore the object later + GetPath() string +} + +// This implements interface Manager +type IntelRdtManager struct { + mu sync.Mutex + Config *configs.Config + Id string + Path string +} + +const ( + IntelRdtTasks = "tasks" +) + +var ( + // The absolute path to the root of the Intel RDT "resource control" filesystem + intelRdtRootLock sync.Mutex + intelRdtRoot string +) + +// The read-only Intel RDT related system information in root +type IntelRdtInfo struct { + CbmMask uint64 `json:"cbm_mask,omitempty"` + MinCbmBits uint64 `json:"min_cbm_bits,omitempty"` + NumClosid uint64 `json:"num_closid,omitempty"` +} + +type intelRdtData struct { + root string + config *configs.Config + pid int +} + +// Return the mount point path of Intel RDT "resource control" filesysem +func findIntelRdtMountpointDir() (string, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "", err + } + defer f.Close() + + s := bufio.NewScanner(f) + for s.Scan() { + text := s.Text() + fields := strings.Split(text, " ") + // Safe as mountinfo encodes mountpoints with spaces as \040. + index := strings.Index(text, " - ") + postSeparatorFields := strings.Fields(text[index+3:]) + numPostFields := len(postSeparatorFields) + + // This is an error as we can't detect if the mount is for "Intel RDT" + if numPostFields == 0 { + return "", fmt.Errorf("Found no fields post '-' in %q", text) + } + + if postSeparatorFields[0] == "resctrl" { + // Check that the mount is properly formated. + if numPostFields < 3 { + return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) + } + + return fields[4], nil + } + } + if err := s.Err(); err != nil { + return "", err + } + + return "", NewNotFoundError("Intel RDT") +} + +// Gets the root path of Intel RDT "resource control" filesystem +func getIntelRdtRoot() (string, error) { + intelRdtRootLock.Lock() + defer intelRdtRootLock.Unlock() + + if intelRdtRoot != "" { + return intelRdtRoot, nil + } + + root, err := findIntelRdtMountpointDir() + if err != nil { + return "", err + } + + if _, err := os.Stat(root); err != nil { + return "", err + } + + intelRdtRoot = root + return intelRdtRoot, nil +} + +func isIntelRdtMounted() bool { + _, err := getIntelRdtRoot() + if err != nil { + if !IsNotFound(err) { + return false + } + + // If not mounted, we try to mount again: + // mount -t resctrl resctrl /sys/fs/resctrl + if err := os.MkdirAll("/sys/fs/resctrl", 0755); err != nil { + return false + } + if err := exec.Command("mount", "-t", "resctrl", "resctrl", "/sys/fs/resctrl").Run(); err != nil { + return false + } + } + + return true +} + +func parseCpuInfoFile(path string) (bool, error) { + f, err := os.Open(path) + if err != nil { + return false, err + } + defer f.Close() + + s := bufio.NewScanner(f) + for s.Scan() { + if err := s.Err(); err != nil { + return false, err + } + + text := s.Text() + flags := strings.Split(text, " ") + + for _, flag := range flags { + if flag == "rdt_a" { + return true, nil + } + } + } + return false, nil +} + +func parseUint(s string, base, bitSize int) (uint64, error) { + value, err := strconv.ParseUint(s, base, bitSize) + if err != nil { + intValue, intErr := strconv.ParseInt(s, base, bitSize) + // 1. Handle negative values greater than MinInt64 (and) + // 2. Handle negative values lesser than MinInt64 + if intErr == nil && intValue < 0 { + return 0, nil + } else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 { + return 0, nil + } + + return value, err + } + + return value, nil +} + +// Gets a single uint64 value from the specified file. +func getIntelRdtParamUint(path, file string) (uint64, error) { + fileName := filepath.Join(path, file) + contents, err := ioutil.ReadFile(fileName) + if err != nil { + return 0, err + } + + res, err := parseUint(strings.TrimSpace(string(contents)), 10, 64) + if err != nil { + return res, fmt.Errorf("unable to parse %q as a uint from file %q", string(contents), fileName) + } + return res, nil +} + +// Gets a string value from the specified file +func getIntelRdtParamString(path, file string) (string, error) { + contents, err := ioutil.ReadFile(filepath.Join(path, file)) + if err != nil { + return "", err + } + + return strings.TrimSpace(string(contents)), nil +} + +func readTasksFile(dir string) ([]int, error) { + f, err := os.Open(filepath.Join(dir, IntelRdtTasks)) + if err != nil { + return nil, err + } + defer f.Close() + + var ( + s = bufio.NewScanner(f) + out = []int{} + ) + + for s.Scan() { + if t := s.Text(); t != "" { + pid, err := strconv.Atoi(t) + if err != nil { + return nil, err + } + out = append(out, pid) + } + } + return out, nil +} + +func writeFile(dir, file, data string) error { + if dir == "" { + return fmt.Errorf("no such directory for %s", file) + } + if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data+"\n"), 0700); err != nil { + return fmt.Errorf("failed to write %v to %v: %v", data, file, err) + } + return nil +} + +func getIntelRdtData(c *configs.Config, pid int) (*intelRdtData, error) { + rootPath, err := getIntelRdtRoot() + if err != nil { + return nil, err + } + return &intelRdtData{ + root: rootPath, + config: c, + pid: pid, + }, nil +} + +// WriteIntelRdtTasks writes the specified pid into the "tasks" file +func WriteIntelRdtTasks(dir string, pid int) error { + if dir == "" { + return fmt.Errorf("no such directory for %s", IntelRdtTasks) + } + + // Dont attach any pid if -1 is specified as a pid + if pid != -1 { + if err := ioutil.WriteFile(filepath.Join(dir, IntelRdtTasks), []byte(strconv.Itoa(pid)), 0700); err != nil { + return fmt.Errorf("failed to write %v to %v: %v", pid, IntelRdtTasks, err) + } + } + return nil +} + +// Check if Intel RDT is enabled +func IsIntelRdtEnabled() bool { + // 1. check if hardware and kernel support Intel RDT feature + // "rdt" flag is set if supported + isFlagSet, err := parseCpuInfoFile("/proc/cpuinfo") + if err != nil { + return false + } + + // 2. check if Intel RDT "resource control" filesystem is mounted + isMounted := isIntelRdtMounted() + + return isFlagSet && isMounted +} + +// Get Intel RDT "resource control" filesystem path +func GetIntelRdtPath(id string) (string, error) { + rootPath, err := getIntelRdtRoot() + if err != nil { + return "", err + } + + path := filepath.Join(rootPath, id) + return path, nil +} + +// Get read-only Intel RDT related system information +func GetIntelRdtInfo() (*IntelRdtInfo, error) { + intelRdtInfo := &IntelRdtInfo{} + + rootPath, err := getIntelRdtRoot() + if err != nil { + return nil, err + } + + path := filepath.Join(rootPath, "info", "l3") + cbmMask, err := getIntelRdtParamUint(path, "cbm_mask") + if err != nil { + return nil, err + } + minCbmBits, err := getIntelRdtParamUint(path, "min_cbm_bits") + if err != nil { + return nil, err + } + numClosid, err := getIntelRdtParamUint(path, "num_closid") + if err != nil { + return nil, err + } + + intelRdtInfo.CbmMask = cbmMask + intelRdtInfo.MinCbmBits = minCbmBits + intelRdtInfo.NumClosid = numClosid + + return intelRdtInfo, nil +} + +// Applies configuration to the process with the specified pid +func (m *IntelRdtManager) Apply(pid int) (err error) { + d, err := getIntelRdtData(m.Config, pid) + if err != nil && !IsNotFound(err) { + return err + } + + m.mu.Lock() + defer m.mu.Unlock() + path, err := d.join(m.Id) + if err != nil { + return err + } + + m.Path = path + return nil +} + +// Returns the PIDs inside Intel RDT "resource control" filesystem at path +func (m *IntelRdtManager) GetPids() ([]int, error) { + return readTasksFile(m.GetPath()) +} + +// Returns all the PIDs inside Intel RDT "resource control" filesystem at path +func (m *IntelRdtManager) GetAllPids() ([]int, error) { + return m.GetPids() +} + +// Toggles the freezer cgroup according with specified state +func (m *IntelRdtManager) Freeze(state configs.FreezerState) error { + return nil +} + +// Destroys the Intel RDT "resource control" filesystem +func (m *IntelRdtManager) Destroy() error { + m.mu.Lock() + defer m.mu.Unlock() + if err := os.RemoveAll(m.Path); err != nil { + return err + } + m.Path = "" + return nil +} + +// Returns Intel RDT "resource control" filesystem paths to save in +// a state file and to be able to restore the object later +func (m *IntelRdtManager) GetPaths() map[string]string { + m.mu.Lock() + paths := make(map[string]string) + paths["intelrdt"] = m.Path + m.mu.Unlock() + return paths +} + +// Returns Intel RDT "resource control" filesystem path to save in +// a state file and to be able to restore the object later +func (m *IntelRdtManager) GetPath() string { + if m.Path == "" { + m.Path, _ = GetIntelRdtPath(m.Id) + } + return m.Path +} + +// Returns statistics for Intel RDT +func (m *IntelRdtManager) GetStats() (interface{}, error) { + m.mu.Lock() + defer m.mu.Unlock() + stats := NewStats() + + // The read-only default "schemata" in root, for reference + rootPath, err := getIntelRdtRoot() + if err != nil { + return nil, err + } + schemaRoot, err := getIntelRdtParamString(rootPath, "schemata") + if err != nil { + return nil, err + } + stats.IntelRdtRootStats.L3CacheSchema = schemaRoot + + // The stats in "container_id" group + schema, err := getIntelRdtParamString(m.GetPath(), "schemata") + if err != nil { + return nil, err + } + stats.IntelRdtStats.L3CacheSchema = schema + + return stats, nil +} + +// Set Intel RDT "resource control" filesystem as configured. +func (m *IntelRdtManager) Set(container *configs.Config) error { + path := m.GetPath() + + // About L3 cache schema file: + // The schema has allocation masks/values for L3 cache on each socket, + // which contains L3 cache id and capacity bitmask (CBM). + // Format: "L3:=;=;..." + // For example, on a two-socket machine, L3's schema line could be: + // L3:0=ff;1=c0 + // Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. + // + // About L3 cache CBM validity: + // The valid L3 cache CBM is a *contiguous bits set* and number of + // bits that can be set is less than the max bit. The max bits in the + // CBM is varied among supported Intel Xeon platforms. In Intel RDT + // "resource control" filesystem layout, the CBM in a group should + // be a subset of the CBM in root. Kernel will check if it is valid + // when writing. + // e.g., 0xfffff in root indicates the max bits of CBM is 20 bits, + // which mapping to entire L3 cache capacity. Some valid CBM values + // to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. + l3CacheSchema := container.Cgroups.Resources.IntelRdtL3CacheSchema + if l3CacheSchema != "" { + if err := writeFile(path, "schemata", l3CacheSchema); err != nil { + return err + } + } + + return nil +} + +func (raw *intelRdtData) join(id string) (string, error) { + path := filepath.Join(raw.root, id) + if err := os.MkdirAll(path, 0755); err != nil { + return "", err + } + + if err := WriteIntelRdtTasks(path, raw.pid); err != nil { + return "", err + } + return path, nil +} + +type NotFoundError struct { + ResourceControl string +} + +func (e *NotFoundError) Error() string { + return fmt.Sprintf("mountpoint for %s not found", e.ResourceControl) +} + +func NewNotFoundError(res string) error { + return &NotFoundError{ + ResourceControl: res, + } +} + +func IsNotFound(err error) bool { + if err == nil { + return false + } + _, ok := err.(*NotFoundError) + return ok +} diff --git a/libcontainer/intelrdt/intelrdt_test.go b/libcontainer/intelrdt/intelrdt_test.go new file mode 100644 index 00000000000..2a03781c345 --- /dev/null +++ b/libcontainer/intelrdt/intelrdt_test.go @@ -0,0 +1,43 @@ +// +build linux + +package intelrdt + +import ( + "testing" +) + +func TestIntelRdtSetL3CacheSchema(t *testing.T) { + if !IsIntelRdtEnabled() { + return + } + + helper := NewIntelRdtTestUtil(t) + defer helper.cleanup() + + const ( + l3CacheSchemaBefore = "L3:0=f;1=f0" + l3CacheSchemeAfter = "L3:0=f0;1=f" + ) + + helper.writeFileContents(map[string]string{ + "schemata": l3CacheSchemaBefore + "\n", + }) + + helper.IntelRdtData.config.Cgroups.Resources.IntelRdtL3CacheSchema = l3CacheSchemeAfter + intelrdt := &IntelRdtManager{ + Config: helper.IntelRdtData.config, + Path: helper.IntelRdtPath, + } + if err := intelrdt.Set(helper.IntelRdtData.config); err != nil { + t.Fatal(err) + } + + value, err := getIntelRdtParamString(helper.IntelRdtPath, "schemata") + if err != nil { + t.Fatalf("Failed to parse file 'schemata' - %s", err) + } + + if value != l3CacheSchemeAfter { + t.Fatal("Got the wrong value, set 'schemata' failed.") + } +} diff --git a/libcontainer/intelrdt/stats.go b/libcontainer/intelrdt/stats.go new file mode 100644 index 00000000000..a2412d9cb7f --- /dev/null +++ b/libcontainer/intelrdt/stats.go @@ -0,0 +1,20 @@ +// +build linux + +package intelrdt + +type IntelRdtRootStats struct { + L3CacheSchema string `json:"l3_cache_schema,omitempty"` +} + +type IntelRdtStats struct { + L3CacheSchema string `json:"l3_cache_schema,omitempty"` +} + +type Stats struct { + IntelRdtRootStats IntelRdtRootStats `json:"intel_rdt_root_stats,omitempty"` + IntelRdtStats IntelRdtStats `json:"intel_rdt_stats,omitempty"` +} + +func NewStats() *Stats { + return &Stats{} +} diff --git a/libcontainer/intelrdt/util_test.go b/libcontainer/intelrdt/util_test.go new file mode 100644 index 00000000000..cbc35cf1653 --- /dev/null +++ b/libcontainer/intelrdt/util_test.go @@ -0,0 +1,69 @@ +// +build linux + +/* + * Utility for testing Intel RDT operations. + * Creates a mock of the Intel RDT "resource control" filesystem for the duration of the test. + */ +package intelrdt + +import ( + "io/ioutil" + "os" + "path/filepath" + "testing" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +type intelRdtTestUtil struct { + // intelRdt data to use in tests + IntelRdtData *intelRdtData + + // Path to the mock Intel RDT "resource control" filesystem directory + IntelRdtPath string + + // Temporary directory to store mock Intel RDT "resource control" filesystem + tempDir string + t *testing.T +} + +// Creates a new test util +func NewIntelRdtTestUtil(t *testing.T) *intelRdtTestUtil { + d := &intelRdtData{ + config: &configs.Config{ + Cgroups: &configs.Cgroup{ + Resources: &configs.Resources{}, + }, + }, + } + tempDir, err := ioutil.TempDir("", "intelrdt_test") + if err != nil { + t.Fatal(err) + } + d.root = tempDir + testIntelRdtPath := filepath.Join(d.root, "resctrl") + if err != nil { + t.Fatal(err) + } + + // Ensure the full mock Intel RDT "resource control" filesystem path exists + err = os.MkdirAll(testIntelRdtPath, 0755) + if err != nil { + t.Fatal(err) + } + return &intelRdtTestUtil{IntelRdtData: d, IntelRdtPath: testIntelRdtPath, tempDir: tempDir, t: t} +} + +func (c *intelRdtTestUtil) cleanup() { + os.RemoveAll(c.tempDir) +} + +// Write the specified contents on the mock of the specified Intel RDT "resource control" files +func (c *intelRdtTestUtil) writeFileContents(fileContents map[string]string) { + for file, contents := range fileContents { + err := writeFile(c.IntelRdtPath, file, contents) + if err != nil { + c.t.Fatal(err) + } + } +} diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index 5c430bcab77..c9ef6123a97 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -15,6 +15,7 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/intelrdt" "github.com/opencontainers/runc/libcontainer/resourcemanager" "github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/utils" @@ -48,6 +49,7 @@ type setnsProcess struct { parentPipe *os.File childPipe *os.File cgroupPaths map[string]string + intelRdtPath string config *initConfig fds []string process *Process @@ -88,6 +90,15 @@ func (p *setnsProcess) start() (err error) { return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid()) } } + if p.intelRdtPath != "" { + // if Intel RDT "resource control" filesystem path exists + _, err := os.Stat(p.intelRdtPath) + if err == nil { + if err := intelrdt.WriteIntelRdtTasks(p.intelRdtPath, p.pid()); err != nil { + return newSystemErrorWithCausef(err, "adding pid %d to Intel RDT resource control filesystem", p.pid()) + } + } + } // set oom_score_adj if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil { return newSystemErrorWithCause(err, "setting oom score") diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index 94afd65c613..323567919af 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -449,6 +449,11 @@ func createCgroupConfig(name string, useSystemdCgroup bool, spec *specs.Spec) (* }) } } + if r.IntelRdt != nil { + if r.IntelRdt.L3CacheSchema != nil { + c.Resources.IntelRdtL3CacheSchema = *r.IntelRdt.L3CacheSchema + } + } return c, nil } diff --git a/libcontainer/stats_linux.go b/libcontainer/stats_linux.go index c629dc67de9..29fd641e9dd 100644 --- a/libcontainer/stats_linux.go +++ b/libcontainer/stats_linux.go @@ -1,8 +1,10 @@ package libcontainer import "github.com/opencontainers/runc/libcontainer/cgroups" +import "github.com/opencontainers/runc/libcontainer/intelrdt" type Stats struct { - Interfaces []*NetworkInterface - CgroupStats *cgroups.Stats + Interfaces []*NetworkInterface + CgroupStats *cgroups.Stats + IntelRdtStats *intelrdt.Stats } diff --git a/utils_linux.go b/utils_linux.go index fe617c711c4..9c8420d5371 100644 --- a/utils_linux.go +++ b/utils_linux.go @@ -15,6 +15,7 @@ import ( "github.com/coreos/go-systemd/activation" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/cgroups/systemd" + "github.com/opencontainers/runc/libcontainer/intelrdt" "github.com/opencontainers/runc/libcontainer/specconv" "github.com/opencontainers/runtime-spec/specs-go" "github.com/urfave/cli" @@ -39,6 +40,10 @@ func loadFactory(context *cli.Context) (libcontainer.Factory, error) { return nil, fmt.Errorf("systemd cgroup flag passed, but systemd support for managing cgroups is not available") } } + if intelrdt.IsIntelRdtEnabled() { + intelRdtManager := libcontainer.IntelRdtFs + return libcontainer.New(abs, cgroupManager, intelRdtManager, libcontainer.CriuPath(context.GlobalString("criu"))) + } return libcontainer.New(abs, cgroupManager, libcontainer.CriuPath(context.GlobalString("criu"))) }