diff --git a/e2e/config/config.go b/e2e/config/config.go index 528d8be6cc..07e5d59c2d 100644 --- a/e2e/config/config.go +++ b/e2e/config/config.go @@ -499,6 +499,27 @@ func (c configTests) configGlobal(t *testing.T) { directiveValue: "yes", exit: 0, }, + // FIXME + // The e2e tests currently run inside a PID namespace. + // (see internal/init/init_linux.go) + // We can't instruct systemd to manage our cgroups as the PIDs in our test namespace + // won't match what systemd sees. + // { + // name: "SystemdCgroupsYes", + // argv: []string{"--apply-cgroups", "testdata/cgroups/pids_limit.toml", c.sandboxImage, "true"}, + // profile: e2e.RootProfile, + // directive: "systemd cgroup manager", + // directiveValue: "yes", + // exit: 0, + // }, + { + name: "SystemdCgroupNo", + argv: []string{"--apply-cgroups", "testdata/cgroups/pids_limit.toml", c.sandboxImage, "true"}, + profile: e2e.RootProfile, + directive: "systemd cgroup manager", + directiveValue: "no", + exit: 0, + }, } for _, tt := range tests { diff --git a/e2e/internal/e2e/config.go b/e2e/internal/e2e/config.go index 681cd29644..0e79966d92 100644 --- a/e2e/internal/e2e/config.go +++ b/e2e/internal/e2e/config.go @@ -28,6 +28,12 @@ func SetupDefaultConfig(t *testing.T, path string) { c.MksquashfsPath = buildcfg.MKSQUASHFS_PATH c.NvidiaContainerCliPath = buildcfg.NVIDIA_CONTAINER_CLI_PATH c.UnsquashfsPath = buildcfg.UNSQUASHFS_PATH + // FIXME + // The e2e tests currently run inside a PID namespace. + // (see internal/init/init_linux.go) + // We can't instruct systemd to manage our cgroups as the PIDs in our test namespace + // won't match what systemd sees. + c.SystemdCgroups = false Privileged(func(t *testing.T) { f, err := os.Create(path) diff --git a/e2e/testdata/cgroups/pids_limit.toml b/e2e/testdata/cgroups/pids_limit.toml new file mode 100644 index 0000000000..c7c6e60a50 --- /dev/null +++ b/e2e/testdata/cgroups/pids_limit.toml @@ -0,0 +1,2 @@ +[pids] + limit = 1024 diff --git a/internal/pkg/cgroups/manager_linux.go b/internal/pkg/cgroups/manager_linux.go index 3f0a68677e..ce934843bc 100644 --- a/internal/pkg/cgroups/manager_linux.go +++ b/internal/pkg/cgroups/manager_linux.go @@ -29,6 +29,8 @@ var ErrUnitialized = errors.New("cgroups manager is not initialized") type Manager struct { // The name of the cgroup group string + // Are we using systemd? + systemd bool // The underlying runc/libcontainer/cgroups manager cgroup lccgroups.Manager } @@ -58,7 +60,7 @@ func (m *Manager) GetCgroupRootPath() (rootPath string, err error) { // Take the piece before the first occurrence of "devices" as the root. // I.E. /sys/fs/cgroup/devices/singularity/196219 -> /sys/fs/cgroup - pathParts := strings.Split(devicePath, "devices") + pathParts := strings.SplitN(devicePath, "devices", 2) if len(pathParts) != 2 { return "", fmt.Errorf("could not find devices controller path") } @@ -66,6 +68,40 @@ func (m *Manager) GetCgroupRootPath() (rootPath string, err error) { return filepath.Clean(pathParts[0]), nil } +// GetCgroupRelPath returns the relative path of the cgroup under the mount point +func (m *Manager) GetCgroupRelPath() (relPath string, err error) { + if m.group == "" || m.cgroup == nil { + return "", ErrUnitialized + } + + // v2 - has a single fixed mountpoint for the root cgroup + if lccgroups.IsCgroup2UnifiedMode() { + absPath := m.cgroup.Path("") + return strings.TrimPrefix(absPath, unifiedMountPoint), nil + } + + // v1 - Get absolute paths to cgroup by subsystem + subPaths := m.cgroup.GetPaths() + // For cgroups v1 we are relying on fetching the 'devices' subsystem path. + // The devices subsystem is needed for our OCI engine and its presence is + // enforced in runc/libcontainer/cgroups/fs initialization without 'skipDevices'. + // This means we never explicitly put a container into a cgroup without a + // set 'devices' path. + devicePath, ok := subPaths["devices"] + if !ok { + return "", fmt.Errorf("could not find devices controller path") + } + + // Take the piece after the first occurrence of "devices" as the relative path. + // I.E. /sys/fs/cgroup/devices/singularity/196219 -> /singularity/196219 + pathParts := strings.SplitN(devicePath, "devices", 2) + if len(pathParts) != 2 { + return "", fmt.Errorf("could not find devices controller path") + } + + return filepath.Clean(pathParts[1]), nil +} + // UpdateFromSpec updates the existing managed cgroup using configuration from // an OCI LinuxResources spec struct. func (m *Manager) UpdateFromSpec(resources *specs.LinuxResources) (err error) { @@ -118,7 +154,28 @@ func (m *Manager) AddProc(pid int) (err error) { if pid == 0 { return fmt.Errorf("cannot add a zero pid to cgroup") } - return m.cgroup.Apply(pid) + + // If we are managing cgroupfs directly we are good to go. + procMgr := m.cgroup + // However, the systemd manager won't put another process in the cgroup... + // so we use an underlying cgroupfs manager for this particular operation. + if m.systemd { + relPath, err := m.GetCgroupRelPath() + if err != nil { + return err + } + lcConfig := &lcconfigs.Cgroup{ + Path: relPath, + Resources: &lcconfigs.Resources{}, + Systemd: false, + } + procMgr, err = lcmanager.New(lcConfig) + if err != nil { + return fmt.Errorf("while creating cgroupfs manager: %w", err) + } + } + + return procMgr.Apply(pid) } // Freeze freezes processes in the managed cgroup. @@ -147,7 +204,7 @@ func (m *Manager) Destroy() (err error) { // newManager creates a new Manager, with the associated resources and cgroup. // The Manager is ready to manage the cgroup but does not apply limits etc. -func newManager(resources *specs.LinuxResources, group string) (manager *Manager, err error) { +func newManager(resources *specs.LinuxResources, group string, systemd bool) (manager *Manager, err error) { if resources == nil { return nil, fmt.Errorf("non-nil cgroup LinuxResources definition is required") } @@ -164,7 +221,7 @@ func newManager(resources *specs.LinuxResources, group string) (manager *Manager opts := &lcspecconv.CreateOpts{ CgroupName: group, - UseSystemdCgroup: false, + UseSystemdCgroup: systemd, RootlessCgroups: false, Spec: spec, } @@ -180,8 +237,9 @@ func newManager(resources *specs.LinuxResources, group string) (manager *Manager } mgr := Manager{ - group: group, - cgroup: cgroup, + group: group, + systemd: systemd, + cgroup: cgroup, } return &mgr, nil } @@ -189,21 +247,24 @@ func newManager(resources *specs.LinuxResources, group string) (manager *Manager // NewManagerWithSpec creates a Manager, applies the configuration in spec, and adds pid to the cgroup. // If a group name is supplied, it will be used by the manager. // If group = "" then "/singularity/" is used as a default. -func NewManagerWithSpec(spec *specs.LinuxResources, pid int, group string) (manager *Manager, err error) { +func NewManagerWithSpec(spec *specs.LinuxResources, pid int, group string, systemd bool) (manager *Manager, err error) { if pid == 0 { return nil, fmt.Errorf("a pid is required to create a new cgroup") } - if group == "" { + if group == "" && !systemd { group = filepath.Join("/singularity", strconv.Itoa(pid)) } + if group == "" && systemd { + group = "system.slice:singularity:" + strconv.Itoa(pid) + } // Create the manager - mgr, err := newManager(spec, group) + mgr, err := newManager(spec, group, systemd) if err != nil { return nil, err } // Apply the cgroup to pid (add pid to cgroup) - if err := mgr.AddProc(pid); err != nil { + if err := mgr.cgroup.Apply(pid); err != nil { return nil, err } if err := mgr.UpdateFromSpec(spec); err != nil { @@ -216,15 +277,17 @@ func NewManagerWithSpec(spec *specs.LinuxResources, pid int, group string) (mana // NewManagerWithFile creates a Manager, applies the configuration at specPath, and adds pid to the cgroup. // If a group name is supplied, it will be used by the manager. // If group = "" then "/singularity/" is used as a default. -func NewManagerWithFile(specPath string, pid int, group string) (manager *Manager, err error) { +func NewManagerWithFile(specPath string, pid int, group string, systemd bool) (manager *Manager, err error) { spec, err := LoadResources(specPath) if err != nil { return nil, fmt.Errorf("while loading cgroups spec: %w", err) } - return NewManagerWithSpec(&spec, pid, group) + return NewManagerWithSpec(&spec, pid, group, systemd) } // GetManager returns a Manager for the provided cgroup name/path. +// It can only return a cgroupfs manager, as we aren't wiring back up to systemd +// through dbus etc. func GetManagerForGroup(group string) (manager *Manager, err error) { if group == "" { return nil, fmt.Errorf("cannot load cgroup - no name/path specified") @@ -236,6 +299,7 @@ func GetManagerForGroup(group string) (manager *Manager, err error) { lcConfig := &lcconfigs.Cgroup{ Path: group, Resources: &lcconfigs.Resources{}, + Systemd: false, } cgroup, err := lcmanager.New(lcConfig) if err != nil { @@ -243,13 +307,16 @@ func GetManagerForGroup(group string) (manager *Manager, err error) { } mgr := Manager{ - group: group, - cgroup: cgroup, + group: group, + systemd: false, + cgroup: cgroup, } return &mgr, nil } // GetManagerFromPid returns a Manager for the cgroup that pid is a member of. +// It can only return a cgroupfs manager, as we aren't wiring back up to systemd +// through dbus etc. func GetManagerForPid(pid int) (manager *Manager, err error) { path, err := pidToPath(pid) if err != nil { diff --git a/internal/pkg/cgroups/manager_linux_test.go b/internal/pkg/cgroups/manager_linux_test.go index 6dc6f0bd95..3134434c34 100644 --- a/internal/pkg/cgroups/manager_linux_test.go +++ b/internal/pkg/cgroups/manager_linux_test.go @@ -21,20 +21,66 @@ import ( // This file contains tests that will run under cgroups v1 & v2, and test utility functions. -func TestGetFromPid(t *testing.T) { +type ( + CgroupTestFunc func(t *testing.T, systemd bool) + CgroupTest struct { + name string + testFunc CgroupTestFunc + } +) +type CgroupTests []CgroupTest + +func TestCgroups(t *testing.T) { + tests := CgroupTests{ + { + name: "GetFromPid", + testFunc: testGetFromPid, + }, + } + runCgroupfsTests(t, tests) + runSystemdTests(t, tests) +} + +func runCgroupfsTests(t *testing.T, tests CgroupTests) { + t.Run("cgroupfs", func(t *testing.T) { + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.testFunc(t, false) + }) + } + }) +} + +func runSystemdTests(t *testing.T, tests CgroupTests) { + t.Run("systemd", func(t *testing.T) { + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.testFunc(t, true) + }) + } + }) +} + +func testGetFromPid(t *testing.T, systemd bool) { test.EnsurePrivilege(t) require.Cgroups(t) - pid, manager, cleanup := testManager(t) + // We create either a cgroupfs or systemd cgroup initially + pid, manager, cleanup := testManager(t, systemd) defer cleanup() - // Covers GetManagerForPath indirectly + // We can only retrieve a cgroupfs managed cgroup from pid pidMgr, err := GetManagerForPid(pid) if err != nil { t.Fatalf("While getting cgroup manager for pid: %v", err) } - if pidMgr.group != manager.group { + relPath, err := manager.GetCgroupRelPath() + if err != nil { + t.Fatalf("While getting manager cgroup relative path") + } + + if pidMgr.group != relPath { t.Errorf("Expected %s for cgroup from pid, got %s", manager.group, pidMgr.cgroup) } } @@ -117,7 +163,7 @@ func ensureState(t *testing.T, pid int, wantStates string) { // testManager returns a cgroup manager, that has created a cgroup with a `cat /dev/zero` process, // and example resource config. -func testManager(t *testing.T) (pid int, manager *Manager, cleanup func()) { +func testManager(t *testing.T, systemd bool) (pid int, manager *Manager, cleanup func()) { // Create process to put into a cgroup t.Log("Creating test process") cmd := exec.Command("/bin/cat", "/dev/zero") @@ -127,6 +173,9 @@ func testManager(t *testing.T) (pid int, manager *Manager, cleanup func()) { pid = cmd.Process.Pid strPid := strconv.Itoa(pid) group := filepath.Join("/singularity", strPid) + if systemd { + group = "system.slice:singularity:" + strPid + } cgroupsToml := "example/cgroups.toml" // Some systems, e.g. ppc64le may not have a 2MB page size, so don't @@ -137,7 +186,7 @@ func testManager(t *testing.T) (pid int, manager *Manager, cleanup func()) { cgroupsToml = "example/cgroups-no-hugetlb.toml" } - manager, err = NewManagerWithFile(cgroupsToml, pid, group) + manager, err = NewManagerWithFile(cgroupsToml, pid, group, systemd) if err != nil { t.Fatalf("While creating new cgroup: %v", err) } diff --git a/internal/pkg/cgroups/manager_linux_v1_test.go b/internal/pkg/cgroups/manager_linux_v1_test.go index bdf745c355..0495888cc0 100644 --- a/internal/pkg/cgroups/manager_linux_v1_test.go +++ b/internal/pkg/cgroups/manager_linux_v1_test.go @@ -9,6 +9,7 @@ import ( "io/ioutil" "os" "os/exec" + "path" "path/filepath" "testing" @@ -22,17 +23,45 @@ import ( func TestCgroupsV1(t *testing.T) { test.EnsurePrivilege(t) require.CgroupsV1(t) - t.Run("GetCgroupRootPath", testGetCgroupRootPathV1) - t.Run("NewUpdate", testNewUpdateV1) - t.Run("AddProc", testAddProcV1) - t.Run("FreezeThaw", testFreezeThawV1) + tests := CgroupTests{ + { + name: "GetCGroupRootPath", + testFunc: testGetCgroupRootPathV1, + }, + { + name: "GetCGroupRelPath", + testFunc: testGetCgroupRelPathV1, + }, + { + name: "NewUpdate", + testFunc: testNewUpdateV1, + }, + { + name: "UpdateUnified", + testFunc: testUpdateUnifiedV1, + }, + { + name: "AddProc", + testFunc: testAddProcV1, + }, + { + name: "FreezeThaw", + testFunc: testFreezeThawV1, + }, + } + runCgroupfsTests(t, tests) + runSystemdTests(t, tests) } //nolint:dupl -func testGetCgroupRootPathV1(t *testing.T) { +func testGetCgroupRootPathV1(t *testing.T, systemd bool) { // This cgroup won't be created in the fs as we don't add a PID through the manager - group := filepath.Join("/singularity", "rootpathtest") - manager, err := newManager(&specs.LinuxResources{}, group) + group := filepath.Join("/singularity/rootpathtest") + if systemd { + group = "system.slice:singularity:rootpathtest" + } + + manager, err := newManager(&specs.LinuxResources{}, group, systemd) if err != nil { t.Fatalf("While creating manager: %v", err) } @@ -41,23 +70,54 @@ func testGetCgroupRootPathV1(t *testing.T) { if err != nil { t.Errorf("While getting root path: %v", err) } - // Cgroups v2 has a fixed mount point - if rootPath != unifiedMountPoint { + + // With v1 the mount could be somewhere odd... but we can test indirectly. + // The root path + '/devices' + the rel path should give us the absolute path + // for the cgroup with the devices controller. + // The rel path is tested explicitly, so we know it works. + relPath, err := manager.GetCgroupRelPath() + if err != nil { + t.Errorf("While getting rel path: %v", err) + } + + absDevicePath := path.Join(rootPath, "devices", relPath) + if absDevicePath != manager.cgroup.Path("devices") { t.Errorf("Expected %s, got %s", unifiedMountPoint, rootPath) } } //nolint:dupl -func testNewUpdateV1(t *testing.T) { - _, manager, cleanup := testManager(t) - defer cleanup() +func testGetCgroupRelPathV1(t *testing.T, systemd bool) { + // This cgroup won't be created in the fs as we don't add a PID through the manager + group := filepath.Join("/singularity/rootpathtest") + wantPath := group + if systemd { + group = "system.slice:singularity:rootpathtest" + wantPath = "/system.slice/singularity-rootpathtest.scope" + } - // Check for correct 1024 value - rootPath, err := manager.GetCgroupRootPath() + manager, err := newManager(&specs.LinuxResources{}, group, systemd) + if err != nil { + t.Fatalf("While creating manager: %v", err) + } + + relPath, err := manager.GetCgroupRelPath() if err != nil { - t.Fatalf("can't determine cgroups root path, is cgroups enabled ?") + t.Errorf("While getting root path: %v", err) } - pidsMax := filepath.Join(rootPath, "pids", manager.group, "pids.limit") + + if relPath != wantPath { + t.Errorf("Expected %s, got %s", wantPath, relPath) + } +} + +//nolint:dupl +func testNewUpdateV1(t *testing.T, systemd bool) { + _, manager, cleanup := testManager(t, systemd) + defer cleanup() + + // Check for correct 1024 value + pidsMax := filepath.Join(manager.cgroup.Path("pids"), "pids.max") ensureInt(t, pidsMax, 1024) // Write a new config with [pids] limit = 512 @@ -83,8 +143,19 @@ func testNewUpdateV1(t *testing.T) { ensureInt(t, pidsMax, 512) } -func testAddProcV1(t *testing.T) { - pid, manager, cleanup := testManager(t) +//nolint:dupl +func testUpdateUnifiedV1(t *testing.T, systemd bool) { + _, manager, cleanup := testManager(t, systemd) + defer cleanup() + + // Try to update existing cgroup from unified style config setting [Unified] pids.max directly + if err := manager.UpdateFromFile("example/cgroups-unified.toml"); err == nil { + t.Fatalf("Unexpected success applying unified config on cgroups v1") + } +} + +func testAddProcV1(t *testing.T, systemd bool) { + pid, manager, cleanup := testManager(t, systemd) cmd := exec.Command("/bin/cat", "/dev/zero") if err := cmd.Start(); err != nil { @@ -101,16 +172,12 @@ func testAddProcV1(t *testing.T) { t.Errorf("While adding proc to cgroup: %v", err) } - rootPath, err := manager.GetCgroupRootPath() - if err != nil { - t.Fatalf("can't determine cgroups root path, is cgroups enabled ?") - } - cgroupProcs := filepath.Join(rootPath, "pids", manager.group, "cgroup.procs") + cgroupProcs := filepath.Join(manager.cgroup.Path("pids"), "cgroup.procs") ensureContainsInt(t, cgroupProcs, int64(pid)) ensureContainsInt(t, cgroupProcs, int64(newPid)) } -func testFreezeThawV1(t *testing.T) { +func testFreezeThawV1(t *testing.T, systemd bool) { manager := &Manager{} if err := manager.Freeze(); err == nil { t.Errorf("unexpected success with PID 0") @@ -119,7 +186,7 @@ func testFreezeThawV1(t *testing.T) { t.Errorf("unexpected success with PID 0") } - pid, manager, cleanup := testManager(t) + pid, manager, cleanup := testManager(t, systemd) defer cleanup() manager.Freeze() diff --git a/internal/pkg/cgroups/manager_linux_v2_test.go b/internal/pkg/cgroups/manager_linux_v2_test.go index 0b5fa0543b..200f3a8d61 100644 --- a/internal/pkg/cgroups/manager_linux_v2_test.go +++ b/internal/pkg/cgroups/manager_linux_v2_test.go @@ -23,21 +23,45 @@ import ( func TestCgroupsV2(t *testing.T) { test.EnsurePrivilege(t) require.CgroupsV2Unified(t) - t.Run("GetCgroupRootPath", testGetCgroupRootPathV2) - t.Run("NewUpdate", testNewUpdateV2) - t.Run("UpdateUnified", testUpdateUnifiedV2) - t.Run("AddProc", testAddProcV2) - t.Run("FreezeThaw", testFreezeThawV2) + tests := CgroupTests{ + { + name: "GetCGroupRootPath", + testFunc: testGetCgroupRootPathV2, + }, + { + name: "GetCGroupRelPath", + testFunc: testGetCgroupRelPathV2, + }, + { + name: "NewUpdate", + testFunc: testNewUpdateV2, + }, + { + name: "UpdateUnified", + testFunc: testUpdateUnifiedV2, + }, + { + name: "AddProc", + testFunc: testAddProcV2, + }, + { + name: "FreezeThaw", + testFunc: testFreezeThawV2, + }, + } + runCgroupfsTests(t, tests) + runSystemdTests(t, tests) } //nolint:dupl -func testGetCgroupRootPathV2(t *testing.T) { - test.EnsurePrivilege(t) - require.CgroupsV2Unified(t) - +func testGetCgroupRootPathV2(t *testing.T, systemd bool) { // This cgroup won't be created in the fs as we don't add a PID through the manager - group := filepath.Join("/singularity", "rootpathtest") - manager, err := newManager(&specs.LinuxResources{}, group) + group := filepath.Join("/singularity/rootpathtest") + if systemd { + group = "system.slice:singularity:rootpathtest" + } + + manager, err := newManager(&specs.LinuxResources{}, group, systemd) if err != nil { t.Fatalf("While creating manager: %v", err) } @@ -52,17 +76,38 @@ func testGetCgroupRootPathV2(t *testing.T) { } } -//nolint:dupl -func testNewUpdateV2(t *testing.T) { - test.EnsurePrivilege(t) - require.CgroupsV2Unified(t) +func testGetCgroupRelPathV2(t *testing.T, systemd bool) { + // This cgroup won't be created in the fs as we don't add a PID through the manager + group := filepath.Join("/singularity/rootpathtest") + wantPath := group + if systemd { + group = "system.slice:singularity:rootpathtest" + wantPath = "/system.slice/singularity-rootpathtest.scope" + } + + manager, err := newManager(&specs.LinuxResources{}, group, systemd) + if err != nil { + t.Fatalf("While creating manager: %v", err) + } + + relPath, err := manager.GetCgroupRelPath() + if err != nil { + t.Errorf("While getting root path: %v", err) + } + + if relPath != wantPath { + t.Errorf("Expected %s, got %s", wantPath, relPath) + } +} - _, manager, cleanup := testManager(t) +//nolint:dupl +func testNewUpdateV2(t *testing.T, systemd bool) { + _, manager, cleanup := testManager(t, systemd) defer cleanup() // For cgroups v2 [pids] limit -> pids.max // Check for correct 1024 value - pidsMax := filepath.Join("/sys/fs/cgroup", manager.group, "pids.max") + pidsMax := filepath.Join(manager.cgroup.Path(""), "pids.max") ensureInt(t, pidsMax, 1024) // Write a new config with [pids] limit = 512 @@ -89,14 +134,11 @@ func testNewUpdateV2(t *testing.T) { } //nolint:dupl -func testUpdateUnifiedV2(t *testing.T) { - test.EnsurePrivilege(t) - require.CgroupsV2Unified(t) - +func testUpdateUnifiedV2(t *testing.T, systemd bool) { // Apply a 1024 pids.max limit using the v1 style config that sets [pids] limit - _, manager, cleanup := testManager(t) + _, manager, cleanup := testManager(t, systemd) defer cleanup() - pidsMax := filepath.Join("/sys/fs/cgroup", manager.group, "pids.max") + pidsMax := filepath.Join(manager.cgroup.Path(""), "pids.max") ensureInt(t, pidsMax, 1024) // Update existing cgroup from unified style config setting [Unified] pids.max directly @@ -109,11 +151,8 @@ func testUpdateUnifiedV2(t *testing.T) { } //nolint:dupl -func testAddProcV2(t *testing.T) { - test.EnsurePrivilege(t) - require.CgroupsV2Unified(t) - - pid, manager, cleanup := testManager(t) +func testAddProcV2(t *testing.T, systemd bool) { + pid, manager, cleanup := testManager(t, systemd) cmd := exec.Command("/bin/cat", "/dev/zero") if err := cmd.Start(); err != nil { @@ -130,16 +169,13 @@ func testAddProcV2(t *testing.T) { t.Errorf("While adding proc to cgroup: %v", err) } - cgroupProcs := filepath.Join("/sys/fs/cgroup", manager.group, "cgroup.procs") + cgroupProcs := filepath.Join(manager.cgroup.Path(""), "cgroup.procs") ensureContainsInt(t, cgroupProcs, int64(pid)) ensureContainsInt(t, cgroupProcs, int64(newPid)) } //nolint:dupl -func testFreezeThawV2(t *testing.T) { - test.EnsurePrivilege(t) - require.CgroupsV2Unified(t) - +func testFreezeThawV2(t *testing.T, systemd bool) { manager := &Manager{} if err := manager.Freeze(); err == nil { t.Errorf("unexpected success freezing PID 0") @@ -148,7 +184,7 @@ func testFreezeThawV2(t *testing.T) { t.Errorf("unexpected success thawing PID 0") } - pid, manager, cleanup := testManager(t) + pid, manager, cleanup := testManager(t, systemd) defer cleanup() manager.Freeze() @@ -156,7 +192,7 @@ func testFreezeThawV2(t *testing.T) { // for our cat /dev/zero while it's running, so check freeze marker as well // as the process state here. ensureState(t, pid, "S") - freezePath := path.Join("/sys/fs/cgroup", manager.group, "cgroup.freeze") + freezePath := path.Join(manager.cgroup.Path(""), "cgroup.freeze") ensureInt(t, freezePath, 1) manager.Thaw() diff --git a/internal/pkg/cgroups/util.go b/internal/pkg/cgroups/util.go index 6a0c054b49..4dcb7b7430 100644 --- a/internal/pkg/cgroups/util.go +++ b/internal/pkg/cgroups/util.go @@ -26,6 +26,8 @@ func pidToPath(pid int) (path string, err error) { return "", fmt.Errorf("cannot read %s: %w", pidCGFile, err) } + fmt.Print(paths) + // cgroups v2 path is always given by the unified "" subsystem ok := false if lccgroups.IsCgroup2UnifiedMode() { diff --git a/internal/pkg/runtime/engine/oci/config_linux.go b/internal/pkg/runtime/engine/oci/config_linux.go index ffba7086f3..41603c3786 100644 --- a/internal/pkg/runtime/engine/oci/config_linux.go +++ b/internal/pkg/runtime/engine/oci/config_linux.go @@ -18,20 +18,21 @@ const Name = "oci" // EngineConfig is the config for the OCI engine. type EngineConfig struct { - BundlePath string `json:"bundlePath"` - LogPath string `json:"logPath"` - LogFormat string `json:"logFormat"` - PidFile string `json:"pidFile"` - OciConfig *oci.Config `json:"ociConfig"` - MasterPts int `json:"masterPts"` - SlavePts int `json:"slavePts"` - OutputStreams [2]int `json:"outputStreams"` - ErrorStreams [2]int `json:"errorStreams"` - InputStreams [2]int `json:"inputStreams"` - SyncSocket string `json:"syncSocket"` - EmptyProcess bool `json:"emptyProcess"` - Exec bool `json:"exec"` - Cgroups *cgroups.Manager `json:"-"` + BundlePath string `json:"bundlePath"` + LogPath string `json:"logPath"` + LogFormat string `json:"logFormat"` + PidFile string `json:"pidFile"` + OciConfig *oci.Config `json:"ociConfig"` + MasterPts int `json:"masterPts"` + SlavePts int `json:"slavePts"` + OutputStreams [2]int `json:"outputStreams"` + ErrorStreams [2]int `json:"errorStreams"` + InputStreams [2]int `json:"inputStreams"` + SyncSocket string `json:"syncSocket"` + EmptyProcess bool `json:"emptyProcess"` + Exec bool `json:"exec"` + SystemdCgroups bool `json:"systemdCgroups"` + Cgroups *cgroups.Manager `json:"-"` sync.Mutex `json:"-"` State ociruntime.State `json:"state"` @@ -95,3 +96,13 @@ func (e *EngineConfig) SetPidFile(path string) { func (e *EngineConfig) GetPidFile() string { return e.PidFile } + +// SetSystemdCgroups sets whether to manage cgroups with systemd. +func (e *EngineConfig) SetSystemdCgroups(systemd bool) { + e.SystemdCgroups = systemd +} + +// SetSystemdCgroups gets whether to manage cgroups with systemd. +func (e *EngineConfig) GetSystemdCgroups() bool { + return e.SystemdCgroups +} diff --git a/internal/pkg/runtime/engine/oci/create_linux.go b/internal/pkg/runtime/engine/oci/create_linux.go index e0aeeb5d3e..2ebd9e0b41 100644 --- a/internal/pkg/runtime/engine/oci/create_linux.go +++ b/internal/pkg/runtime/engine/oci/create_linux.go @@ -482,9 +482,11 @@ func (e *EngineOperations) waitStatusUpdate() { func (c *container) addCgroups(pid int, system *mount.System) error { name := c.engine.CommonConfig.ContainerID + resources := c.engine.EngineConfig.OciConfig.Linux.Resources + systemd := c.engine.EngineConfig.GetSystemdCgroups() cgroupsPath := c.engine.EngineConfig.OciConfig.Linux.CgroupsPath - if !filepath.IsAbs(cgroupsPath) { + if !systemd && !filepath.IsAbs(cgroupsPath) { if cgroupsPath == "" { cgroupsPath = filepath.Join("/singularity-oci", name) } else { @@ -492,9 +494,13 @@ func (c *container) addCgroups(pid int, system *mount.System) error { } } + if systemd && cgroupsPath == "" { + cgroupsPath = "system.slice:singularity-oci:" + name + } + c.engine.EngineConfig.OciConfig.Linux.CgroupsPath = cgroupsPath - manager, err := cgroups.NewManagerWithSpec(c.engine.EngineConfig.OciConfig.Linux.Resources, pid, cgroupsPath) + manager, err := cgroups.NewManagerWithSpec(resources, pid, cgroupsPath, systemd) if err != nil { return fmt.Errorf("failed to apply cgroups resources restriction: %s", err) } diff --git a/internal/pkg/runtime/engine/oci/prepare_linux.go b/internal/pkg/runtime/engine/oci/prepare_linux.go index 3d2bd3bb8e..16ac999e3f 100644 --- a/internal/pkg/runtime/engine/oci/prepare_linux.go +++ b/internal/pkg/runtime/engine/oci/prepare_linux.go @@ -11,11 +11,14 @@ import ( "github.com/kr/pty" specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sylabs/singularity/internal/pkg/buildcfg" "github.com/sylabs/singularity/internal/pkg/cgroups" "github.com/sylabs/singularity/internal/pkg/runtime/engine/config/starter" + "github.com/sylabs/singularity/internal/pkg/util/fs" "github.com/sylabs/singularity/pkg/ociruntime" "github.com/sylabs/singularity/pkg/sylog" "github.com/sylabs/singularity/pkg/util/capabilities" + "github.com/sylabs/singularity/pkg/util/singularityconf" ) // make master/slave as global variable to avoid GC close file descriptor @@ -54,6 +57,16 @@ func (e *EngineOperations) PrepareConfig(starterConfig *starter.Config) error { return fmt.Errorf("empty OCI linux configuration") } + // TODO - investigate whether this is the highest place to pull this value from singularity.conf + if !fs.IsOwner(buildcfg.SINGULARITY_CONF_FILE, 0) { + return fmt.Errorf("%s must be owned by root", buildcfg.SINGULARITY_CONF_FILE) + } + sConf, err := singularityconf.Parse(buildcfg.SINGULARITY_CONF_FILE) + if err != nil { + return fmt.Errorf("unable to parse singularity.conf file: %s", err) + } + e.EngineConfig.SystemdCgroups = sConf.SystemdCgroups + // reset state config that could be passed to engine e.EngineConfig.State = ociruntime.State{} diff --git a/internal/pkg/runtime/engine/singularity/container_linux.go b/internal/pkg/runtime/engine/singularity/container_linux.go index cce773e042..bec2760225 100644 --- a/internal/pkg/runtime/engine/singularity/container_linux.go +++ b/internal/pkg/runtime/engine/singularity/container_linux.go @@ -299,7 +299,7 @@ func create(ctx context.Context, engine *EngineOperations, rpcOps *client.RPC, p if os.Geteuid() == 0 && !c.userNS { path := engine.EngineConfig.GetCgroupsPath() if path != "" { - cgroupsManager, err = cgroups.NewManagerWithFile(path, pid, "") + cgroupsManager, err = cgroups.NewManagerWithFile(path, pid, "", engine.EngineConfig.File.SystemdCgroups) if err != nil { return fmt.Errorf("while applying cgroups config: %v", err) } diff --git a/internal/pkg/test/tool/require/require.go b/internal/pkg/test/tool/require/require.go index f35477fdcc..cf7dec9b83 100644 --- a/internal/pkg/test/tool/require/require.go +++ b/internal/pkg/test/tool/require/require.go @@ -136,7 +136,7 @@ func Cgroups(t *testing.T) { // current test is skipped with a message. func CgroupsV1(t *testing.T) { Cgroups(t) - if cgroups.IsCgroup2UnifiedMode() || cgroups.IsCgroup2HybridMode() { + if cgroups.IsCgroup2UnifiedMode() { t.Skipf("cgroups v1 legacy mode not available") } } diff --git a/pkg/util/singularityconf/config.go b/pkg/util/singularityconf/config.go index 3bb1de3cb8..7b91cd1b13 100644 --- a/pkg/util/singularityconf/config.go +++ b/pkg/util/singularityconf/config.go @@ -73,6 +73,7 @@ type File struct { DownloadConcurrency uint `default:"3" directive:"download concurrency"` DownloadPartSize uint `default:"5242880" directive:"download part size"` DownloadBufferSize uint `default:"32768" directive:"download buffer size"` + SystemdCgroups bool `default:"yes" authorized:"yes,no" directive:"systemd cgroups"` } const TemplateAsset = `# SINGULARITY.CONF @@ -463,4 +464,10 @@ download part size = {{ .DownloadPartSize }} # This option specifies the transfer buffer size when concurrent downloads # are enabled. download buffer size = {{ .DownloadBufferSize }} + +# SYSTEMD CGROUPS: [BOOL] +# DEFAULT: yes +# Whether to use systemd to manage container cgroups. Required for rootless cgroups +# functionality. 'no' will manage cgroups directly via cgroupfs. +systemd cgroups = {{ if eq .SystemdCgroups true }}yes{{ else }}no{{ end }} `