Skip to content

Commit

Permalink
feat: Enable systemd cgroups management
Browse files Browse the repository at this point in the history
  • Loading branch information
dtrudg committed Feb 7, 2022
1 parent 1db4d6b commit 553b660
Show file tree
Hide file tree
Showing 14 changed files with 385 additions and 98 deletions.
21 changes: 21 additions & 0 deletions e2e/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,27 @@ func (c configTests) configGlobal(t *testing.T) {
directiveValue: "yes",
exit: 0,
},
// FIXME
// The e2e tests currently run inside a PID namespace.
// (see internal/init/init_linux.go)
// We can't instruct systemd to manage our cgroups as the PIDs in our test namespace
// won't match what systemd sees.
// {
// name: "SystemdCgroupsYes",
// argv: []string{"--apply-cgroups", "testdata/cgroups/pids_limit.toml", c.sandboxImage, "true"},
// profile: e2e.RootProfile,
// directive: "systemd cgroup manager",
// directiveValue: "yes",
// exit: 0,
// },
{
name: "SystemdCgroupNo",
argv: []string{"--apply-cgroups", "testdata/cgroups/pids_limit.toml", c.sandboxImage, "true"},
profile: e2e.RootProfile,
directive: "systemd cgroup manager",
directiveValue: "no",
exit: 0,
},
}

for _, tt := range tests {
Expand Down
6 changes: 6 additions & 0 deletions e2e/internal/e2e/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ func SetupDefaultConfig(t *testing.T, path string) {
c.MksquashfsPath = buildcfg.MKSQUASHFS_PATH
c.NvidiaContainerCliPath = buildcfg.NVIDIA_CONTAINER_CLI_PATH
c.UnsquashfsPath = buildcfg.UNSQUASHFS_PATH
// FIXME
// The e2e tests currently run inside a PID namespace.
// (see internal/init/init_linux.go)
// We can't instruct systemd to manage our cgroups as the PIDs in our test namespace
// won't match what systemd sees.
c.SystemdCgroups = false

Privileged(func(t *testing.T) {
f, err := os.Create(path)
Expand Down
2 changes: 2 additions & 0 deletions e2e/testdata/cgroups/pids_limit.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[pids]
limit = 1024
95 changes: 81 additions & 14 deletions internal/pkg/cgroups/manager_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ var ErrUnitialized = errors.New("cgroups manager is not initialized")
type Manager struct {
// The name of the cgroup
group string
// Are we using systemd?
systemd bool
// The underlying runc/libcontainer/cgroups manager
cgroup lccgroups.Manager
}
Expand Down Expand Up @@ -58,14 +60,48 @@ func (m *Manager) GetCgroupRootPath() (rootPath string, err error) {

// Take the piece before the first occurrence of "devices" as the root.
// I.E. /sys/fs/cgroup/devices/singularity/196219 -> /sys/fs/cgroup
pathParts := strings.Split(devicePath, "devices")
pathParts := strings.SplitN(devicePath, "devices", 2)
if len(pathParts) != 2 {
return "", fmt.Errorf("could not find devices controller path")
}

return filepath.Clean(pathParts[0]), nil
}

// GetCgroupRelPath returns the relative path of the cgroup under the mount point
func (m *Manager) GetCgroupRelPath() (relPath string, err error) {
if m.group == "" || m.cgroup == nil {
return "", ErrUnitialized
}

// v2 - has a single fixed mountpoint for the root cgroup
if lccgroups.IsCgroup2UnifiedMode() {
absPath := m.cgroup.Path("")
return strings.TrimPrefix(absPath, unifiedMountPoint), nil
}

// v1 - Get absolute paths to cgroup by subsystem
subPaths := m.cgroup.GetPaths()
// For cgroups v1 we are relying on fetching the 'devices' subsystem path.
// The devices subsystem is needed for our OCI engine and its presence is
// enforced in runc/libcontainer/cgroups/fs initialization without 'skipDevices'.
// This means we never explicitly put a container into a cgroup without a
// set 'devices' path.
devicePath, ok := subPaths["devices"]
if !ok {
return "", fmt.Errorf("could not find devices controller path")
}

// Take the piece after the first occurrence of "devices" as the relative path.
// I.E. /sys/fs/cgroup/devices/singularity/196219 -> /singularity/196219
pathParts := strings.SplitN(devicePath, "devices", 2)
if len(pathParts) != 2 {
return "", fmt.Errorf("could not find devices controller path")
}

return filepath.Clean(pathParts[1]), nil
}

// UpdateFromSpec updates the existing managed cgroup using configuration from
// an OCI LinuxResources spec struct.
func (m *Manager) UpdateFromSpec(resources *specs.LinuxResources) (err error) {
Expand Down Expand Up @@ -118,7 +154,28 @@ func (m *Manager) AddProc(pid int) (err error) {
if pid == 0 {
return fmt.Errorf("cannot add a zero pid to cgroup")
}
return m.cgroup.Apply(pid)

// If we are managing cgroupfs directly we are good to go.
procMgr := m.cgroup
// However, the systemd manager won't put another process in the cgroup...
// so we use an underlying cgroupfs manager for this particular operation.
if m.systemd {
relPath, err := m.GetCgroupRelPath()
if err != nil {
return err
}
lcConfig := &lcconfigs.Cgroup{
Path: relPath,
Resources: &lcconfigs.Resources{},
Systemd: false,
}
procMgr, err = lcmanager.New(lcConfig)
if err != nil {
return fmt.Errorf("while creating cgroupfs manager: %w", err)
}
}

return procMgr.Apply(pid)
}

// Freeze freezes processes in the managed cgroup.
Expand Down Expand Up @@ -147,7 +204,7 @@ func (m *Manager) Destroy() (err error) {

// newManager creates a new Manager, with the associated resources and cgroup.
// The Manager is ready to manage the cgroup but does not apply limits etc.
func newManager(resources *specs.LinuxResources, group string) (manager *Manager, err error) {
func newManager(resources *specs.LinuxResources, group string, systemd bool) (manager *Manager, err error) {
if resources == nil {
return nil, fmt.Errorf("non-nil cgroup LinuxResources definition is required")
}
Expand All @@ -164,7 +221,7 @@ func newManager(resources *specs.LinuxResources, group string) (manager *Manager

opts := &lcspecconv.CreateOpts{
CgroupName: group,
UseSystemdCgroup: false,
UseSystemdCgroup: systemd,
RootlessCgroups: false,
Spec: spec,
}
Expand All @@ -180,30 +237,34 @@ func newManager(resources *specs.LinuxResources, group string) (manager *Manager
}

mgr := Manager{
group: group,
cgroup: cgroup,
group: group,
systemd: systemd,
cgroup: cgroup,
}
return &mgr, nil
}

// NewManagerWithSpec creates a Manager, applies the configuration in spec, and adds pid to the cgroup.
// If a group name is supplied, it will be used by the manager.
// If group = "" then "/singularity/<pid>" is used as a default.
func NewManagerWithSpec(spec *specs.LinuxResources, pid int, group string) (manager *Manager, err error) {
func NewManagerWithSpec(spec *specs.LinuxResources, pid int, group string, systemd bool) (manager *Manager, err error) {
if pid == 0 {
return nil, fmt.Errorf("a pid is required to create a new cgroup")
}
if group == "" {
if group == "" && !systemd {
group = filepath.Join("/singularity", strconv.Itoa(pid))
}
if group == "" && systemd {
group = "system.slice:singularity:" + strconv.Itoa(pid)
}

// Create the manager
mgr, err := newManager(spec, group)
mgr, err := newManager(spec, group, systemd)
if err != nil {
return nil, err
}
// Apply the cgroup to pid (add pid to cgroup)
if err := mgr.AddProc(pid); err != nil {
if err := mgr.cgroup.Apply(pid); err != nil {
return nil, err
}
if err := mgr.UpdateFromSpec(spec); err != nil {
Expand All @@ -216,15 +277,17 @@ func NewManagerWithSpec(spec *specs.LinuxResources, pid int, group string) (mana
// NewManagerWithFile creates a Manager, applies the configuration at specPath, and adds pid to the cgroup.
// If a group name is supplied, it will be used by the manager.
// If group = "" then "/singularity/<pid>" is used as a default.
func NewManagerWithFile(specPath string, pid int, group string) (manager *Manager, err error) {
func NewManagerWithFile(specPath string, pid int, group string, systemd bool) (manager *Manager, err error) {
spec, err := LoadResources(specPath)
if err != nil {
return nil, fmt.Errorf("while loading cgroups spec: %w", err)
}
return NewManagerWithSpec(&spec, pid, group)
return NewManagerWithSpec(&spec, pid, group, systemd)
}

// GetManager returns a Manager for the provided cgroup name/path.
// It can only return a cgroupfs manager, as we aren't wiring back up to systemd
// through dbus etc.
func GetManagerForGroup(group string) (manager *Manager, err error) {
if group == "" {
return nil, fmt.Errorf("cannot load cgroup - no name/path specified")
Expand All @@ -236,20 +299,24 @@ func GetManagerForGroup(group string) (manager *Manager, err error) {
lcConfig := &lcconfigs.Cgroup{
Path: group,
Resources: &lcconfigs.Resources{},
Systemd: false,
}
cgroup, err := lcmanager.New(lcConfig)
if err != nil {
return nil, fmt.Errorf("while creating cgroup manager: %w", err)
}

mgr := Manager{
group: group,
cgroup: cgroup,
group: group,
systemd: false,
cgroup: cgroup,
}
return &mgr, nil
}

// GetManagerFromPid returns a Manager for the cgroup that pid is a member of.
// It can only return a cgroupfs manager, as we aren't wiring back up to systemd
// through dbus etc.
func GetManagerForPid(pid int) (manager *Manager, err error) {
path, err := pidToPath(pid)
if err != nil {
Expand Down
61 changes: 55 additions & 6 deletions internal/pkg/cgroups/manager_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,66 @@ import (

// This file contains tests that will run under cgroups v1 & v2, and test utility functions.

func TestGetFromPid(t *testing.T) {
type (
CgroupTestFunc func(t *testing.T, systemd bool)
CgroupTest struct {
name string
testFunc CgroupTestFunc
}
)
type CgroupTests []CgroupTest

func TestCgroups(t *testing.T) {
tests := CgroupTests{
{
name: "GetFromPid",
testFunc: testGetFromPid,
},
}
runCgroupfsTests(t, tests)
runSystemdTests(t, tests)
}

func runCgroupfsTests(t *testing.T, tests CgroupTests) {
t.Run("cgroupfs", func(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tt.testFunc(t, false)
})
}
})
}

func runSystemdTests(t *testing.T, tests CgroupTests) {
t.Run("systemd", func(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tt.testFunc(t, true)
})
}
})
}

func testGetFromPid(t *testing.T, systemd bool) {
test.EnsurePrivilege(t)
require.Cgroups(t)

pid, manager, cleanup := testManager(t)
// We create either a cgroupfs or systemd cgroup initially
pid, manager, cleanup := testManager(t, systemd)
defer cleanup()

// Covers GetManagerForPath indirectly
// We can only retrieve a cgroupfs managed cgroup from pid
pidMgr, err := GetManagerForPid(pid)
if err != nil {
t.Fatalf("While getting cgroup manager for pid: %v", err)
}

if pidMgr.group != manager.group {
relPath, err := manager.GetCgroupRelPath()
if err != nil {
t.Fatalf("While getting manager cgroup relative path")
}

if pidMgr.group != relPath {
t.Errorf("Expected %s for cgroup from pid, got %s", manager.group, pidMgr.cgroup)
}
}
Expand Down Expand Up @@ -117,7 +163,7 @@ func ensureState(t *testing.T, pid int, wantStates string) {

// testManager returns a cgroup manager, that has created a cgroup with a `cat /dev/zero` process,
// and example resource config.
func testManager(t *testing.T) (pid int, manager *Manager, cleanup func()) {
func testManager(t *testing.T, systemd bool) (pid int, manager *Manager, cleanup func()) {
// Create process to put into a cgroup
t.Log("Creating test process")
cmd := exec.Command("/bin/cat", "/dev/zero")
Expand All @@ -127,6 +173,9 @@ func testManager(t *testing.T) (pid int, manager *Manager, cleanup func()) {
pid = cmd.Process.Pid
strPid := strconv.Itoa(pid)
group := filepath.Join("/singularity", strPid)
if systemd {
group = "system.slice:singularity:" + strPid
}

cgroupsToml := "example/cgroups.toml"
// Some systems, e.g. ppc64le may not have a 2MB page size, so don't
Expand All @@ -137,7 +186,7 @@ func testManager(t *testing.T) (pid int, manager *Manager, cleanup func()) {
cgroupsToml = "example/cgroups-no-hugetlb.toml"
}

manager, err = NewManagerWithFile(cgroupsToml, pid, group)
manager, err = NewManagerWithFile(cgroupsToml, pid, group, systemd)
if err != nil {
t.Fatalf("While creating new cgroup: %v", err)
}
Expand Down
Loading

0 comments on commit 553b660

Please sign in to comment.