Skip to content

Commit

Permalink
feat: oci: support cgroups requests in --oci mode
Browse files Browse the repository at this point in the history
Honor the --apply-cgroups and individual cgroups resources flags when
running in OCI mode.

The launcher instructs runc/crun to create a named cgroup with
specified LinuxResources in the config.json. runc/crun must be called
with the `--systemd-cgroup` flag when using systemd as cgroup manager.

Closes sylabs/singularity#1032

Signed-off-by: Edita Kizinevic <edita.kizinevic@cern.ch>
  • Loading branch information
dtrudg authored and edytuk committed May 24, 2023
1 parent 004cac6 commit 1b94c19
Show file tree
Hide file tree
Showing 11 changed files with 285 additions and 77 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ For older changes see the [archived Singularity change log](https://github.com/a
- `--rocm` to bind ROCm GPU libraries and devices into the container.
- `--nv` to bind Nvidia driver / basic CUDA libraries and devices into
the container.
- `--apply-cgroups`, and the `--cpu*`, `--blkio*`, `--memory*`,
`--pids-limit` flags to apply resource limits.

### Other changes

Expand Down
103 changes: 82 additions & 21 deletions e2e/cgroups/cgroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -255,74 +255,100 @@ func (c *ctx) instanceStatsRootless(t *testing.T) {
c.instanceStats(t, e2e.UserProfile)
}

func (c *ctx) actionApply(t *testing.T, profile e2e.Profile) {
e2e.EnsureImage(t, c.env)

func (c *ctx) actionApply(t *testing.T, profile e2e.Profile, imageRef string) {
tests := []struct {
name string
args []string
expectErrorCode int
expectErrorOut string
rootfull bool
rootless bool
skipOCI bool
onlyOCI bool
}{
{
name: "nonexistent toml",
args: []string{"--apply-cgroups", "testdata/cgroups/doesnotexist.toml", c.env.ImagePath, "/bin/sleep", "5"},
args: []string{"--apply-cgroups", "testdata/cgroups/doesnotexist.toml", imageRef, "/bin/sleep", "5"},
expectErrorCode: 255,
expectErrorOut: "no such file or directory",
rootfull: true,
rootless: true,
skipOCI: false,
onlyOCI: false,
},
{
name: "invalid toml",
args: []string{"--apply-cgroups", "testdata/cgroups/invalid.toml", c.env.ImagePath, "/bin/sleep", "5"},
args: []string{"--apply-cgroups", "testdata/cgroups/invalid.toml", imageRef, "/bin/sleep", "5"},
expectErrorCode: 255,
expectErrorOut: "toml: expected character",
rootfull: true,
rootless: true,
skipOCI: false,
onlyOCI: false,
},
{
name: "memory limit",
args: []string{"--apply-cgroups", "testdata/cgroups/memory_limit.toml", c.env.ImagePath, "/bin/sleep", "5"},
args: []string{"--apply-cgroups", "testdata/cgroups/memory_limit.toml", imageRef, "/bin/sleep", "5"},
expectErrorCode: 137,
rootfull: true,
rootless: true,
skipOCI: true,
onlyOCI: false,
},
{
name: "memory limit oci",
args: []string{"--apply-cgroups", "testdata/cgroups/memory_limit.toml", imageRef, "/bin/sleep", "5"},
// crun returns a 1 when the OOM kill happens.
expectErrorCode: 1,
rootfull: true,
rootless: true,
skipOCI: false,
onlyOCI: true,
},
{
name: "cpu success",
args: []string{"--apply-cgroups", "testdata/cgroups/cpu_success.toml", c.env.ImagePath, "/bin/true"},
args: []string{"--apply-cgroups", "testdata/cgroups/cpu_success.toml", imageRef, "/bin/true"},
expectErrorCode: 0,
rootfull: true,
// This currently fails in the e2e scenario due to the way we are using a mount namespace.
// It *does* work if you test it, directly calling the apptainer CLI.
// Reason is believed to be: https://github.com/opencontainers/runc/issues/3026
rootless: false,
skipOCI: false,
onlyOCI: false,
},
// Device access is allowed by default.
{
name: "device allow default",
args: []string{"--apply-cgroups", "testdata/cgroups/null.toml", c.env.ImagePath, "cat", "/dev/null"},
args: []string{"--apply-cgroups", "testdata/cgroups/null.toml", imageRef, "cat", "/dev/null"},
expectErrorCode: 0,
rootfull: true,
rootless: true,
skipOCI: false,
onlyOCI: false,
},
// Device limits are properly applied only in rootful mode. Rootless will ignore them with a warning.
{
name: "device deny",
args: []string{"--apply-cgroups", "testdata/cgroups/deny_device.toml", c.env.ImagePath, "cat", "/dev/null"},
args: []string{"--apply-cgroups", "testdata/cgroups/deny_device.toml", imageRef, "cat", "/dev/null"},
expectErrorCode: 1,
expectErrorOut: "Operation not permitted",
rootfull: true,
rootless: false,
// runc/crun always allow /dev/null access
skipOCI: true,
onlyOCI: false,
},
{
name: "device ignored",
args: []string{"--apply-cgroups", "testdata/cgroups/deny_device.toml", c.env.ImagePath, "cat", "/dev/null"},
args: []string{"--apply-cgroups", "testdata/cgroups/deny_device.toml", imageRef, "cat", "/dev/null"},
expectErrorCode: 0,
expectErrorOut: "Device limits will not be applied with rootless cgroups",
rootfull: false,
rootless: true,
// runc/crun silently ignore in rootless
skipOCI: true,
onlyOCI: false,
},
}

Expand All @@ -334,6 +360,13 @@ func (c *ctx) actionApply(t *testing.T, profile e2e.Profile) {
if !profile.Privileged() && !tt.rootless {
t.Skip()
}
if profile.OCI() && tt.skipOCI {
t.Skip()
}
if !profile.OCI() && tt.onlyOCI {
t.Skip()
}

exitFunc := []e2e.ApptainerCmdResultOp{}
if tt.expectErrorOut != "" {
exitFunc = []e2e.ApptainerCmdResultOp{e2e.ExpectError(e2e.ContainMatch, tt.expectErrorOut)}
Expand All @@ -350,13 +383,27 @@ func (c *ctx) actionApply(t *testing.T, profile e2e.Profile) {
}

func (c *ctx) actionApplyRoot(t *testing.T) {
c.actionApply(t, e2e.RootProfile)
e2e.EnsureImage(t, c.env)
e2e.EnsureOCIImage(t, c.env)
t.Run(e2e.RootProfile.String(), func(t *testing.T) {
c.actionApply(t, e2e.RootProfile, c.env.ImagePath)
})
t.Run(e2e.OCIRootProfile.String(), func(t *testing.T) {
c.actionApply(t, e2e.OCIRootProfile, "oci-archive:"+c.env.OCIImagePath)
})
}

func (c *ctx) actionApplyRootless(t *testing.T) {
e2e.EnsureImage(t, c.env)
e2e.EnsureOCIImage(t, c.env)
for _, profile := range []e2e.Profile{e2e.UserProfile, e2e.UserNamespaceProfile, e2e.FakerootProfile} {
t.Run(profile.String(), func(t *testing.T) {
c.actionApply(t, profile)
c.actionApply(t, profile, c.env.ImagePath)
})
}
for _, profile := range []e2e.Profile{e2e.OCIUserProfile, e2e.OCIFakerootProfile} {
t.Run(profile.String(), func(t *testing.T) {
c.actionApply(t, profile, "oci-archive:"+c.env.OCIImagePath)
})
}
}
Expand Down Expand Up @@ -499,21 +546,21 @@ var resourceFlagTests = []resourceFlagTest{
},
}

func (c *ctx) actionFlags(t *testing.T, profile e2e.Profile) {
func (c *ctx) actionFlags(t *testing.T, profile e2e.Profile, imageRef string) {
e2e.EnsureImage(t, c.env)

for _, tt := range resourceFlagTests {
t.Run(tt.name, func(t *testing.T) {
if cgroups.IsCgroup2UnifiedMode() {
c.actionFlagV2(t, tt, profile)
c.actionFlagV2(t, tt, profile, imageRef)
return
}
c.actionFlagV1(t, tt, profile)
c.actionFlagV1(t, tt, profile, imageRef)
})
}
}

func (c *ctx) actionFlagV1(t *testing.T, tt resourceFlagTest, profile e2e.Profile) {
func (c *ctx) actionFlagV1(t *testing.T, tt resourceFlagTest, profile e2e.Profile, imageRef string) {
// Don't try to test a resource that doesn't exist in our caller cgroup.
// E.g. some systems don't have memory.memswp, and might not have blkio.bfq
require.CgroupsResourceExists(t, tt.controllerV1, tt.resourceV1)
Expand All @@ -530,7 +577,7 @@ func (c *ctx) actionFlagV1(t *testing.T, tt resourceFlagTest, profile e2e.Profil
}

args := tt.args
args = append(args, "-B", "/sys/fs/cgroup", c.env.ImagePath, "/bin/sh", "-c", shellCmd)
args = append(args, "-B", "/sys/fs/cgroup", imageRef, "/bin/sh", "-c", shellCmd)

c.env.RunApptainer(
t,
Expand All @@ -541,7 +588,7 @@ func (c *ctx) actionFlagV1(t *testing.T, tt resourceFlagTest, profile e2e.Profil
)
}

func (c *ctx) actionFlagV2(t *testing.T, tt resourceFlagTest, profile e2e.Profile) {
func (c *ctx) actionFlagV2(t *testing.T, tt resourceFlagTest, profile e2e.Profile, imageRef string) {
if tt.skipV2 {
t.Skip()
}
Expand All @@ -566,7 +613,7 @@ func (c *ctx) actionFlagV2(t *testing.T, tt resourceFlagTest, profile e2e.Profil
shellCmd := fmt.Sprintf("cat /sys/fs/cgroup$(cat /proc/self/cgroup | grep '^0::' | cut -d ':' -f 3)/%s", tt.resourceV2)

args := tt.args
args = append(args, "-B", "/sys/fs/cgroup", c.env.ImagePath, "/bin/sh", "-c", shellCmd)
args = append(args, "-B", "/sys/fs/cgroup", imageRef, "/bin/sh", "-c", shellCmd)

c.env.RunApptainer(
t,
Expand All @@ -578,13 +625,27 @@ func (c *ctx) actionFlagV2(t *testing.T, tt resourceFlagTest, profile e2e.Profil
}

func (c *ctx) actionFlagsRoot(t *testing.T) {
c.actionFlags(t, e2e.RootProfile)
e2e.EnsureImage(t, c.env)
e2e.EnsureOCIImage(t, c.env)
t.Run(e2e.RootProfile.String(), func(t *testing.T) {
c.actionFlags(t, e2e.RootProfile, c.env.ImagePath)
})
t.Run(e2e.OCIRootProfile.String(), func(t *testing.T) {
c.actionFlags(t, e2e.OCIRootProfile, "oci-archive:"+c.env.OCIImagePath)
})
}

func (c *ctx) actionFlagsRootless(t *testing.T) {
e2e.EnsureImage(t, c.env)
e2e.EnsureOCIImage(t, c.env)
for _, profile := range []e2e.Profile{e2e.UserProfile, e2e.UserNamespaceProfile, e2e.FakerootProfile} {
t.Run(profile.String(), func(t *testing.T) {
c.actionFlags(t, profile)
c.actionFlags(t, profile, c.env.ImagePath)
})
}
for _, profile := range []e2e.Profile{e2e.OCIUserProfile, e2e.OCIFakerootProfile} {
t.Run(profile.String(), func(t *testing.T) {
c.actionFlags(t, profile, "oci-archive:"+c.env.OCIImagePath)
})
}
}
Expand Down
3 changes: 3 additions & 0 deletions e2e/env/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ func (c ctx) ociApptainerEnv(t *testing.T) {
e2e.WithProfile(e2e.OCIUserProfile),
e2e.WithCommand("exec"),
e2e.WithEnv(tt.env),
e2e.WithRootlessEnv(),
e2e.WithArgs(tt.image, "/bin/sh", "-c", "echo $PATH"),
e2e.ExpectExit(
0,
Expand Down Expand Up @@ -185,6 +186,7 @@ func (c ctx) ociEnvOption(t *testing.T) {
e2e.WithProfile(e2e.OCIUserProfile),
e2e.WithCommand("exec"),
e2e.WithEnv(tt.hostEnv),
e2e.WithRootlessEnv(),
e2e.WithArgs(args...),
e2e.ExpectExit(
0,
Expand Down Expand Up @@ -287,6 +289,7 @@ func (c ctx) ociEnvFile(t *testing.T) {
e2e.WithProfile(e2e.OCIUserProfile),
e2e.WithCommand("exec"),
e2e.WithEnv(tt.hostEnv),
e2e.WithRootlessEnv(),
e2e.WithArgs(args...),
e2e.ExpectExit(
0,
Expand Down
14 changes: 14 additions & 0 deletions e2e/internal/e2e/apptainercmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,20 @@ func (env TestEnv) RunApptainer(t *testing.T, cmdOps ...ApptainerCmdOp) {
cmd.Env = os.Environ()
}

// Clear user-specific DBUS / XDG vars when we are using a priv profile,
// as they don't make sense for the root user... and wouldn't be set in a
// real root user session.
if privileged {
i := 0
for _, e := range cmd.Env {
if !(strings.HasPrefix(e, "DBUS_SESSION_BUS_ADDRESS=") || strings.HasPrefix(e, "XDG_RUNTIME_DIR=")) {
cmd.Env[i] = e
i++
}
}
cmd.Env = cmd.Env[:i]
}

// By default, each E2E command shares a temporary image cache
// directory. If a test is directly testing the cache, or depends on
// specific ordered cache behavior then
Expand Down
14 changes: 14 additions & 0 deletions e2e/internal/e2e/profile.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ type Profile struct {
requirementsFn func(*testing.T) // function checking requirements for the profile
apptainerOption string // option added to apptainer command for the profile
optionForCommands []string // apptainer commands concerned by the option to be added
oci bool // whether the profile uses the OCI low-level runtime
}

// NativeProfiles defines all available profiles for the native apptainer runtime
Expand All @@ -81,6 +82,7 @@ var NativeProfiles = map[string]Profile{
requirementsFn: nil,
apptainerOption: "",
optionForCommands: []string{},
oci: false,
},
rootProfile: {
name: "Root",
Expand All @@ -91,6 +93,7 @@ var NativeProfiles = map[string]Profile{
requirementsFn: nil,
apptainerOption: "",
optionForCommands: []string{},
oci: false,
},
fakerootProfile: {
name: "Fakeroot",
Expand All @@ -101,6 +104,7 @@ var NativeProfiles = map[string]Profile{
requirementsFn: fakerootRequirements,
apptainerOption: "--fakeroot",
optionForCommands: []string{"shell", "exec", "run", "test", "instance start", "build"},
oci: false,
},
userNamespaceProfile: {
name: "UserNamespace",
Expand All @@ -111,6 +115,7 @@ var NativeProfiles = map[string]Profile{
requirementsFn: require.UserNamespace,
apptainerOption: "--userns",
optionForCommands: []string{"shell", "exec", "run", "test", "instance start"},
oci: false,
},
rootUserNamespaceProfile: {
name: "RootUserNamespace",
Expand All @@ -121,6 +126,7 @@ var NativeProfiles = map[string]Profile{
requirementsFn: require.UserNamespace,
apptainerOption: "--userns",
optionForCommands: []string{"shell", "exec", "run", "test", "instance start"},
oci: false,
},
}

Expand All @@ -135,6 +141,7 @@ var OCIProfiles = map[string]Profile{
requirementsFn: ociRequirements,
apptainerOption: "--oci",
optionForCommands: []string{"shell", "exec", "run", "test", "instance start"},
oci: true,
},
ociRootProfile: {
name: "OCIRoot",
Expand All @@ -145,6 +152,7 @@ var OCIProfiles = map[string]Profile{
requirementsFn: ociRequirements,
apptainerOption: "--oci",
optionForCommands: []string{"shell", "exec", "run", "test", "instance start"},
oci: true,
},
ociFakerootProfile: {
name: "OCIFakeroot",
Expand All @@ -155,6 +163,7 @@ var OCIProfiles = map[string]Profile{
requirementsFn: ociRequirements,
apptainerOption: "--oci --fakeroot",
optionForCommands: []string{"shell", "exec", "run", "test", "instance start"},
oci: true,
},
}

Expand All @@ -176,6 +185,11 @@ func (p Profile) Privileged() bool {
return p.privileged
}

// OCI returns whether the profile is using an OCI runtime, rather than the apptainer native runtime.
func (p Profile) OCI() bool {
return p.oci
}

// Requirements calls the different require.* functions
// necessary for running an E2E test under this profile.
func (p Profile) Requirements(t *testing.T) {
Expand Down
Loading

0 comments on commit 1b94c19

Please sign in to comment.