Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Enable systemd cgroups management #540

Merged
merged 1 commit into from
Feb 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
so that `oci run` provides expected interactive behavior by default.
- Default hostname for `oci mount` containers is now `singularity` instead of
`mrsdalloway`.
- systemd is now supported and used as the default cgroups manager. Set
`systemd cgroups = no` in `singularity.conf` to manage cgroups directly via
the cgroupfs.

### New features / functionalities

Expand Down
21 changes: 21 additions & 0 deletions e2e/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,27 @@ func (c configTests) configGlobal(t *testing.T) {
directiveValue: "yes",
exit: 0,
},
// FIXME
// The e2e tests currently run inside a PID namespace.
// (see internal/init/init_linux.go)
// We can't instruct systemd to manage our cgroups as the PIDs in our test namespace
// won't match what systemd sees.
// {
// name: "SystemdCgroupsYes",
// argv: []string{"--apply-cgroups", "testdata/cgroups/pids_limit.toml", c.sandboxImage, "true"},
// profile: e2e.RootProfile,
// directive: "systemd cgroups",
// directiveValue: "yes",
// exit: 0,
// },
{
name: "SystemdCgroupNo",
argv: []string{"--apply-cgroups", "testdata/cgroups/pids_limit.toml", c.sandboxImage, "true"},
profile: e2e.RootProfile,
directive: "systemd cgroups",
directiveValue: "no",
exit: 0,
},
}

for _, tt := range tests {
Expand Down
6 changes: 6 additions & 0 deletions e2e/internal/e2e/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ func SetupDefaultConfig(t *testing.T, path string) {
c.MksquashfsPath = buildcfg.MKSQUASHFS_PATH
c.NvidiaContainerCliPath = buildcfg.NVIDIA_CONTAINER_CLI_PATH
c.UnsquashfsPath = buildcfg.UNSQUASHFS_PATH
// FIXME
// The e2e tests currently run inside a PID namespace.
// (see internal/init/init_linux.go)
// We can't instruct systemd to manage our cgroups as the PIDs in our test namespace
// won't match what systemd sees.
c.SystemdCgroups = false

Privileged(func(t *testing.T) {
f, err := os.Create(path)
Expand Down
2 changes: 2 additions & 0 deletions e2e/testdata/cgroups/pids_limit.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[pids]
limit = 1024
95 changes: 81 additions & 14 deletions internal/pkg/cgroups/manager_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ var ErrUnitialized = errors.New("cgroups manager is not initialized")
type Manager struct {
// The name of the cgroup
group string
// Are we using systemd?
systemd bool
// The underlying runc/libcontainer/cgroups manager
cgroup lccgroups.Manager
}
Expand Down Expand Up @@ -58,14 +60,48 @@ func (m *Manager) GetCgroupRootPath() (rootPath string, err error) {

// Take the piece before the first occurrence of "devices" as the root.
// I.E. /sys/fs/cgroup/devices/singularity/196219 -> /sys/fs/cgroup
pathParts := strings.Split(devicePath, "devices")
pathParts := strings.SplitN(devicePath, "devices", 2)
if len(pathParts) != 2 {
return "", fmt.Errorf("could not find devices controller path")
}

return filepath.Clean(pathParts[0]), nil
}

// GetCgroupRelPath returns the relative path of the cgroup under the mount point
func (m *Manager) GetCgroupRelPath() (relPath string, err error) {
if m.group == "" || m.cgroup == nil {
return "", ErrUnitialized
}

// v2 - has a single fixed mountpoint for the root cgroup
if lccgroups.IsCgroup2UnifiedMode() {
absPath := m.cgroup.Path("")
return strings.TrimPrefix(absPath, unifiedMountPoint), nil
}

// v1 - Get absolute paths to cgroup by subsystem
subPaths := m.cgroup.GetPaths()
// For cgroups v1 we are relying on fetching the 'devices' subsystem path.
// The devices subsystem is needed for our OCI engine and its presence is
// enforced in runc/libcontainer/cgroups/fs initialization without 'skipDevices'.
// This means we never explicitly put a container into a cgroup without a
// set 'devices' path.
devicePath, ok := subPaths["devices"]
if !ok {
return "", fmt.Errorf("could not find devices controller path")
}

// Take the piece after the first occurrence of "devices" as the relative path.
// I.E. /sys/fs/cgroup/devices/singularity/196219 -> /singularity/196219
pathParts := strings.SplitN(devicePath, "devices", 2)
if len(pathParts) != 2 {
return "", fmt.Errorf("could not find devices controller path")
}

return filepath.Clean(pathParts[1]), nil
}

// UpdateFromSpec updates the existing managed cgroup using configuration from
// an OCI LinuxResources spec struct.
func (m *Manager) UpdateFromSpec(resources *specs.LinuxResources) (err error) {
Expand Down Expand Up @@ -118,7 +154,28 @@ func (m *Manager) AddProc(pid int) (err error) {
if pid == 0 {
return fmt.Errorf("cannot add a zero pid to cgroup")
}
return m.cgroup.Apply(pid)

// If we are managing cgroupfs directly we are good to go.
procMgr := m.cgroup
// However, the systemd manager won't put another process in the cgroup...
// so we use an underlying cgroupfs manager for this particular operation.
if m.systemd {
relPath, err := m.GetCgroupRelPath()
if err != nil {
return err
}
lcConfig := &lcconfigs.Cgroup{
Path: relPath,
Resources: &lcconfigs.Resources{},
Systemd: false,
}
procMgr, err = lcmanager.New(lcConfig)
if err != nil {
return fmt.Errorf("while creating cgroupfs manager: %w", err)
}
}

return procMgr.Apply(pid)
}

// Freeze freezes processes in the managed cgroup.
Expand Down Expand Up @@ -147,7 +204,7 @@ func (m *Manager) Destroy() (err error) {

// newManager creates a new Manager, with the associated resources and cgroup.
// The Manager is ready to manage the cgroup but does not apply limits etc.
func newManager(resources *specs.LinuxResources, group string) (manager *Manager, err error) {
func newManager(resources *specs.LinuxResources, group string, systemd bool) (manager *Manager, err error) {
if resources == nil {
return nil, fmt.Errorf("non-nil cgroup LinuxResources definition is required")
}
Expand All @@ -164,7 +221,7 @@ func newManager(resources *specs.LinuxResources, group string) (manager *Manager

opts := &lcspecconv.CreateOpts{
CgroupName: group,
UseSystemdCgroup: false,
UseSystemdCgroup: systemd,
RootlessCgroups: false,
Spec: spec,
}
Expand All @@ -180,30 +237,34 @@ func newManager(resources *specs.LinuxResources, group string) (manager *Manager
}

mgr := Manager{
group: group,
cgroup: cgroup,
group: group,
systemd: systemd,
cgroup: cgroup,
}
return &mgr, nil
}

// NewManagerWithSpec creates a Manager, applies the configuration in spec, and adds pid to the cgroup.
// If a group name is supplied, it will be used by the manager.
// If group = "" then "/singularity/<pid>" is used as a default.
func NewManagerWithSpec(spec *specs.LinuxResources, pid int, group string) (manager *Manager, err error) {
func NewManagerWithSpec(spec *specs.LinuxResources, pid int, group string, systemd bool) (manager *Manager, err error) {
if pid == 0 {
return nil, fmt.Errorf("a pid is required to create a new cgroup")
}
if group == "" {
if group == "" && !systemd {
group = filepath.Join("/singularity", strconv.Itoa(pid))
}
if group == "" && systemd {
group = "system.slice:singularity:" + strconv.Itoa(pid)
}

// Create the manager
mgr, err := newManager(spec, group)
mgr, err := newManager(spec, group, systemd)
if err != nil {
return nil, err
}
// Apply the cgroup to pid (add pid to cgroup)
if err := mgr.AddProc(pid); err != nil {
if err := mgr.cgroup.Apply(pid); err != nil {
return nil, err
}
if err := mgr.UpdateFromSpec(spec); err != nil {
Expand All @@ -216,15 +277,17 @@ func NewManagerWithSpec(spec *specs.LinuxResources, pid int, group string) (mana
// NewManagerWithFile creates a Manager, applies the configuration at specPath, and adds pid to the cgroup.
// If a group name is supplied, it will be used by the manager.
// If group = "" then "/singularity/<pid>" is used as a default.
func NewManagerWithFile(specPath string, pid int, group string) (manager *Manager, err error) {
func NewManagerWithFile(specPath string, pid int, group string, systemd bool) (manager *Manager, err error) {
spec, err := LoadResources(specPath)
if err != nil {
return nil, fmt.Errorf("while loading cgroups spec: %w", err)
}
return NewManagerWithSpec(&spec, pid, group)
return NewManagerWithSpec(&spec, pid, group, systemd)
}

// GetManager returns a Manager for the provided cgroup name/path.
// It can only return a cgroupfs manager, as we aren't wiring back up to systemd
// through dbus etc.
func GetManagerForGroup(group string) (manager *Manager, err error) {
if group == "" {
return nil, fmt.Errorf("cannot load cgroup - no name/path specified")
Expand All @@ -236,20 +299,24 @@ func GetManagerForGroup(group string) (manager *Manager, err error) {
lcConfig := &lcconfigs.Cgroup{
Path: group,
Resources: &lcconfigs.Resources{},
Systemd: false,
}
cgroup, err := lcmanager.New(lcConfig)
if err != nil {
return nil, fmt.Errorf("while creating cgroup manager: %w", err)
}

mgr := Manager{
group: group,
cgroup: cgroup,
group: group,
systemd: false,
cgroup: cgroup,
}
return &mgr, nil
}

// GetManagerFromPid returns a Manager for the cgroup that pid is a member of.
// It can only return a cgroupfs manager, as we aren't wiring back up to systemd
// through dbus etc.
func GetManagerForPid(pid int) (manager *Manager, err error) {
path, err := pidToPath(pid)
if err != nil {
Expand Down
61 changes: 55 additions & 6 deletions internal/pkg/cgroups/manager_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,66 @@ import (

// This file contains tests that will run under cgroups v1 & v2, and test utility functions.

func TestGetFromPid(t *testing.T) {
type (
CgroupTestFunc func(t *testing.T, systemd bool)
CgroupTest struct {
name string
testFunc CgroupTestFunc
}
)
type CgroupTests []CgroupTest

func TestCgroups(t *testing.T) {
tests := CgroupTests{
{
name: "GetFromPid",
testFunc: testGetFromPid,
},
}
runCgroupfsTests(t, tests)
runSystemdTests(t, tests)
}

func runCgroupfsTests(t *testing.T, tests CgroupTests) {
t.Run("cgroupfs", func(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tt.testFunc(t, false)
})
}
})
}

func runSystemdTests(t *testing.T, tests CgroupTests) {
t.Run("systemd", func(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tt.testFunc(t, true)
})
}
})
}

func testGetFromPid(t *testing.T, systemd bool) {
test.EnsurePrivilege(t)
require.Cgroups(t)

pid, manager, cleanup := testManager(t)
// We create either a cgroupfs or systemd cgroup initially
pid, manager, cleanup := testManager(t, systemd)
defer cleanup()

// Covers GetManagerForPath indirectly
// We can only retrieve a cgroupfs managed cgroup from pid
pidMgr, err := GetManagerForPid(pid)
if err != nil {
t.Fatalf("While getting cgroup manager for pid: %v", err)
}

if pidMgr.group != manager.group {
relPath, err := manager.GetCgroupRelPath()
if err != nil {
t.Fatalf("While getting manager cgroup relative path")
}

if pidMgr.group != relPath {
t.Errorf("Expected %s for cgroup from pid, got %s", manager.group, pidMgr.cgroup)
}
}
Expand Down Expand Up @@ -117,7 +163,7 @@ func ensureState(t *testing.T, pid int, wantStates string) {

// testManager returns a cgroup manager, that has created a cgroup with a `cat /dev/zero` process,
// and example resource config.
func testManager(t *testing.T) (pid int, manager *Manager, cleanup func()) {
func testManager(t *testing.T, systemd bool) (pid int, manager *Manager, cleanup func()) {
// Create process to put into a cgroup
t.Log("Creating test process")
cmd := exec.Command("/bin/cat", "/dev/zero")
Expand All @@ -127,6 +173,9 @@ func testManager(t *testing.T) (pid int, manager *Manager, cleanup func()) {
pid = cmd.Process.Pid
strPid := strconv.Itoa(pid)
group := filepath.Join("/singularity", strPid)
if systemd {
group = "system.slice:singularity:" + strPid
}

cgroupsToml := "example/cgroups.toml"
// Some systems, e.g. ppc64le may not have a 2MB page size, so don't
Expand All @@ -137,7 +186,7 @@ func testManager(t *testing.T) (pid int, manager *Manager, cleanup func()) {
cgroupsToml = "example/cgroups-no-hugetlb.toml"
}

manager, err = NewManagerWithFile(cgroupsToml, pid, group)
manager, err = NewManagerWithFile(cgroupsToml, pid, group, systemd)
if err != nil {
t.Fatalf("While creating new cgroup: %v", err)
}
Expand Down
Loading