Skip to content

Commit

Permalink
feat: oci: support cgroups requests in --oci mode
Browse files Browse the repository at this point in the history
Honor the --apply-cgroups and individual cgroups resources flags when
running in OCI mode.

The launcher instructs runc/crun to create a named cgroup with
specified LinuxResources in the config.json. runc/crun must be called
with the `--systemd-cgroup` flag when using systemd as cgroup manager.

Closes sylabs#1032
  • Loading branch information
dtrudg committed Dec 13, 2022
1 parent eefa946 commit d0c2fa0
Show file tree
Hide file tree
Showing 11 changed files with 284 additions and 77 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
- Container environment variables via `--env`, `--env-file`, and
`SINGULARITYENV_` host env vars.
- `--rocm` to bind ROCm GPU libraries and devices into the container.
- `--apply-cgroups` to apply resource limits.
- Instance name is available inside an instance via the new
`SINGULARITY_INSTANCE` environment variable.
- The `sign` command now supports signing with non-PGP key material by
Expand Down
103 changes: 82 additions & 21 deletions e2e/cgroups/cgroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,74 +246,100 @@ func (c *ctx) instanceStatsRootless(t *testing.T) {
c.instanceStats(t, e2e.UserProfile)
}

func (c *ctx) actionApply(t *testing.T, profile e2e.Profile) {
e2e.EnsureImage(t, c.env)

func (c *ctx) actionApply(t *testing.T, profile e2e.Profile, imageRef string) {
tests := []struct {
name string
args []string
expectErrorCode int
expectErrorOut string
rootfull bool
rootless bool
skipOCI bool
onlyOCI bool
}{
{
name: "nonexistent toml",
args: []string{"--apply-cgroups", "testdata/cgroups/doesnotexist.toml", c.env.ImagePath, "/bin/sleep", "5"},
args: []string{"--apply-cgroups", "testdata/cgroups/doesnotexist.toml", imageRef, "/bin/sleep", "5"},
expectErrorCode: 255,
expectErrorOut: "no such file or directory",
rootfull: true,
rootless: true,
skipOCI: false,
onlyOCI: false,
},
{
name: "invalid toml",
args: []string{"--apply-cgroups", "testdata/cgroups/invalid.toml", c.env.ImagePath, "/bin/sleep", "5"},
args: []string{"--apply-cgroups", "testdata/cgroups/invalid.toml", imageRef, "/bin/sleep", "5"},
expectErrorCode: 255,
expectErrorOut: "parsing error",
rootfull: true,
rootless: true,
skipOCI: false,
onlyOCI: false,
},
{
name: "memory limit",
args: []string{"--apply-cgroups", "testdata/cgroups/memory_limit.toml", c.env.ImagePath, "/bin/sleep", "5"},
args: []string{"--apply-cgroups", "testdata/cgroups/memory_limit.toml", imageRef, "/bin/sleep", "5"},
expectErrorCode: 137,
rootfull: true,
rootless: true,
skipOCI: true,
onlyOCI: false,
},
{
name: "memory limit oci",
args: []string{"--apply-cgroups", "testdata/cgroups/memory_limit.toml", imageRef, "/bin/sleep", "5"},
// crun returns a 1 when the OOM kill happens.
expectErrorCode: 1,
rootfull: true,
rootless: true,
skipOCI: false,
onlyOCI: true,
},
{
name: "cpu success",
args: []string{"--apply-cgroups", "testdata/cgroups/cpu_success.toml", c.env.ImagePath, "/bin/true"},
args: []string{"--apply-cgroups", "testdata/cgroups/cpu_success.toml", imageRef, "/bin/true"},
expectErrorCode: 0,
rootfull: true,
// This currently fails in the e2e scenario due to the way we are using a mount namespace.
// It *does* work if you test it, directly calling the singularity CLI.
// Reason is believed to be: https://github.com/opencontainers/runc/issues/3026
rootless: false,
skipOCI: false,
onlyOCI: false,
},
// Device access is allowed by default.
{
name: "device allow default",
args: []string{"--apply-cgroups", "testdata/cgroups/null.toml", c.env.ImagePath, "cat", "/dev/null"},
args: []string{"--apply-cgroups", "testdata/cgroups/null.toml", imageRef, "cat", "/dev/null"},
expectErrorCode: 0,
rootfull: true,
rootless: true,
skipOCI: false,
onlyOCI: false,
},
// Device limits are properly applied only in rootful mode. Rootless will ignore them with a warning.
{
name: "device deny",
args: []string{"--apply-cgroups", "testdata/cgroups/deny_device.toml", c.env.ImagePath, "cat", "/dev/null"},
args: []string{"--apply-cgroups", "testdata/cgroups/deny_device.toml", imageRef, "cat", "/dev/null"},
expectErrorCode: 1,
expectErrorOut: "Operation not permitted",
rootfull: true,
rootless: false,
// runc/crun always allow /dev/null access
skipOCI: true,
onlyOCI: false,
},
{
name: "device ignored",
args: []string{"--apply-cgroups", "testdata/cgroups/deny_device.toml", c.env.ImagePath, "cat", "/dev/null"},
args: []string{"--apply-cgroups", "testdata/cgroups/deny_device.toml", imageRef, "cat", "/dev/null"},
expectErrorCode: 0,
expectErrorOut: "Device limits will not be applied with rootless cgroups",
rootfull: false,
rootless: true,
// runc/crun silently ignore in rootless
skipOCI: true,
onlyOCI: false,
},
}

Expand All @@ -325,6 +351,13 @@ func (c *ctx) actionApply(t *testing.T, profile e2e.Profile) {
if !profile.Privileged() && !tt.rootless {
t.Skip()
}
if profile.OCI() && tt.skipOCI {
t.Skip()
}
if !profile.OCI() && tt.onlyOCI {
t.Skip()
}

exitFunc := []e2e.SingularityCmdResultOp{}
if tt.expectErrorOut != "" {
exitFunc = []e2e.SingularityCmdResultOp{e2e.ExpectError(e2e.ContainMatch, tt.expectErrorOut)}
Expand All @@ -341,13 +374,27 @@ func (c *ctx) actionApply(t *testing.T, profile e2e.Profile) {
}

func (c *ctx) actionApplyRoot(t *testing.T) {
c.actionApply(t, e2e.RootProfile)
e2e.EnsureImage(t, c.env)
e2e.EnsureOCIImage(t, c.env)
t.Run(e2e.RootProfile.String(), func(t *testing.T) {
c.actionApply(t, e2e.RootProfile, c.env.ImagePath)
})
t.Run(e2e.OCIRootProfile.String(), func(t *testing.T) {
c.actionApply(t, e2e.OCIRootProfile, "oci-archive:"+c.env.OCIImagePath)
})
}

func (c *ctx) actionApplyRootless(t *testing.T) {
e2e.EnsureImage(t, c.env)
e2e.EnsureOCIImage(t, c.env)
for _, profile := range []e2e.Profile{e2e.UserProfile, e2e.UserNamespaceProfile, e2e.FakerootProfile} {
t.Run(profile.String(), func(t *testing.T) {
c.actionApply(t, profile)
c.actionApply(t, profile, c.env.ImagePath)
})
}
for _, profile := range []e2e.Profile{e2e.OCIUserProfile, e2e.OCIFakerootProfile} {
t.Run(profile.String(), func(t *testing.T) {
c.actionApply(t, profile, "oci-archive:"+c.env.OCIImagePath)
})
}
}
Expand Down Expand Up @@ -487,21 +534,21 @@ var resourceFlagTests = []resourceFlagTest{
},
}

func (c *ctx) actionFlags(t *testing.T, profile e2e.Profile) {
func (c *ctx) actionFlags(t *testing.T, profile e2e.Profile, imageRef string) {
e2e.EnsureImage(t, c.env)

for _, tt := range resourceFlagTests {
t.Run(tt.name, func(t *testing.T) {
if cgroups.IsCgroup2UnifiedMode() {
c.actionFlagV2(t, tt, profile)
c.actionFlagV2(t, tt, profile, imageRef)
return
}
c.actionFlagV1(t, tt, profile)
c.actionFlagV1(t, tt, profile, imageRef)
})
}
}

func (c *ctx) actionFlagV1(t *testing.T, tt resourceFlagTest, profile e2e.Profile) {
func (c *ctx) actionFlagV1(t *testing.T, tt resourceFlagTest, profile e2e.Profile, imageRef string) {
// Don't try to test a resource that doesn't exist in our caller cgroup.
// E.g. some systems don't have memory.memswp, and might not have blkio.bfq
require.CgroupsResourceExists(t, tt.controllerV1, tt.resourceV1)
Expand All @@ -518,7 +565,7 @@ func (c *ctx) actionFlagV1(t *testing.T, tt resourceFlagTest, profile e2e.Profil
}

args := tt.args
args = append(args, "-B", "/sys/fs/cgroup", c.env.ImagePath, "/bin/sh", "-c", shellCmd)
args = append(args, "-B", "/sys/fs/cgroup", imageRef, "/bin/sh", "-c", shellCmd)

c.env.RunSingularity(
t,
Expand All @@ -529,7 +576,7 @@ func (c *ctx) actionFlagV1(t *testing.T, tt resourceFlagTest, profile e2e.Profil
)
}

func (c *ctx) actionFlagV2(t *testing.T, tt resourceFlagTest, profile e2e.Profile) {
func (c *ctx) actionFlagV2(t *testing.T, tt resourceFlagTest, profile e2e.Profile, imageRef string) {
if tt.skipV2 {
t.Skip()
}
Expand All @@ -554,7 +601,7 @@ func (c *ctx) actionFlagV2(t *testing.T, tt resourceFlagTest, profile e2e.Profil
shellCmd := fmt.Sprintf("cat /sys/fs/cgroup$(cat /proc/self/cgroup | grep '^0::' | cut -d ':' -f 3)/%s", tt.resourceV2)

args := tt.args
args = append(args, "-B", "/sys/fs/cgroup", c.env.ImagePath, "/bin/sh", "-c", shellCmd)
args = append(args, "-B", "/sys/fs/cgroup", imageRef, "/bin/sh", "-c", shellCmd)

c.env.RunSingularity(
t,
Expand All @@ -566,13 +613,27 @@ func (c *ctx) actionFlagV2(t *testing.T, tt resourceFlagTest, profile e2e.Profil
}

func (c *ctx) actionFlagsRoot(t *testing.T) {
c.actionFlags(t, e2e.RootProfile)
e2e.EnsureImage(t, c.env)
e2e.EnsureOCIImage(t, c.env)
t.Run(e2e.RootProfile.String(), func(t *testing.T) {
c.actionFlags(t, e2e.RootProfile, c.env.ImagePath)
})
t.Run(e2e.OCIRootProfile.String(), func(t *testing.T) {
c.actionFlags(t, e2e.OCIRootProfile, "oci-archive:"+c.env.OCIImagePath)
})
}

func (c *ctx) actionFlagsRootless(t *testing.T) {
e2e.EnsureImage(t, c.env)
e2e.EnsureOCIImage(t, c.env)
for _, profile := range []e2e.Profile{e2e.UserProfile, e2e.UserNamespaceProfile, e2e.FakerootProfile} {
t.Run(profile.String(), func(t *testing.T) {
c.actionFlags(t, profile)
c.actionFlags(t, profile, c.env.ImagePath)
})
}
for _, profile := range []e2e.Profile{e2e.OCIUserProfile, e2e.OCIFakerootProfile} {
t.Run(profile.String(), func(t *testing.T) {
c.actionFlags(t, profile, "oci-archive:"+c.env.OCIImagePath)
})
}
}
Expand Down
3 changes: 3 additions & 0 deletions e2e/env/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ func (c ctx) ociSingularityEnv(t *testing.T) {
e2e.WithProfile(e2e.OCIUserProfile),
e2e.WithCommand("exec"),
e2e.WithEnv(tt.env),
e2e.WithRootlessEnv(),
e2e.WithArgs(tt.image, "/bin/sh", "-c", "echo $PATH"),
e2e.ExpectExit(
0,
Expand Down Expand Up @@ -181,6 +182,7 @@ func (c ctx) ociEnvOption(t *testing.T) {
e2e.WithProfile(e2e.OCIUserProfile),
e2e.WithCommand("exec"),
e2e.WithEnv(tt.hostEnv),
e2e.WithRootlessEnv(),
e2e.WithArgs(args...),
e2e.ExpectExit(
0,
Expand Down Expand Up @@ -283,6 +285,7 @@ func (c ctx) ociEnvFile(t *testing.T) {
e2e.WithProfile(e2e.OCIUserProfile),
e2e.WithCommand("exec"),
e2e.WithEnv(tt.hostEnv),
e2e.WithRootlessEnv(),
e2e.WithArgs(args...),
e2e.ExpectExit(
0,
Expand Down
14 changes: 14 additions & 0 deletions e2e/internal/e2e/profile.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ type Profile struct {
requirementsFn func(*testing.T) // function checking requirements for the profile
singularityOption string // option added to singularity command for the profile
optionForCommands []string // singularity commands concerned by the option to be added
oci bool // whether the profile uses the OCI low-level runtime
}

// NativeProfiles defines all available profiles for the native singularity runtime
Expand All @@ -77,6 +78,7 @@ var NativeProfiles = map[string]Profile{
requirementsFn: nil,
singularityOption: "",
optionForCommands: []string{},
oci: false,
},
rootProfile: {
name: "Root",
Expand All @@ -87,6 +89,7 @@ var NativeProfiles = map[string]Profile{
requirementsFn: nil,
singularityOption: "",
optionForCommands: []string{},
oci: false,
},
fakerootProfile: {
name: "Fakeroot",
Expand All @@ -97,6 +100,7 @@ var NativeProfiles = map[string]Profile{
requirementsFn: fakerootRequirements,
singularityOption: "--fakeroot",
optionForCommands: []string{"shell", "exec", "run", "test", "instance start", "build"},
oci: false,
},
userNamespaceProfile: {
name: "UserNamespace",
Expand All @@ -107,6 +111,7 @@ var NativeProfiles = map[string]Profile{
requirementsFn: require.UserNamespace,
singularityOption: "--userns",
optionForCommands: []string{"shell", "exec", "run", "test", "instance start"},
oci: false,
},
rootUserNamespaceProfile: {
name: "RootUserNamespace",
Expand All @@ -117,6 +122,7 @@ var NativeProfiles = map[string]Profile{
requirementsFn: require.UserNamespace,
singularityOption: "--userns",
optionForCommands: []string{"shell", "exec", "run", "test", "instance start"},
oci: false,
},
}

Expand All @@ -131,6 +137,7 @@ var OCIProfiles = map[string]Profile{
requirementsFn: ociRequirements,
singularityOption: "--oci",
optionForCommands: []string{"shell", "exec", "run", "test", "instance start"},
oci: true,
},
ociRootProfile: {
name: "OCIRoot",
Expand All @@ -141,6 +148,7 @@ var OCIProfiles = map[string]Profile{
requirementsFn: ociRequirements,
singularityOption: "--oci",
optionForCommands: []string{"shell", "exec", "run", "test", "instance start"},
oci: true,
},
ociFakerootProfile: {
name: "OCIFakeroot",
Expand All @@ -151,6 +159,7 @@ var OCIProfiles = map[string]Profile{
requirementsFn: ociRequirements,
singularityOption: "--oci --fakeroot",
optionForCommands: []string{"shell", "exec", "run", "test", "instance start"},
oci: true,
},
}

Expand All @@ -160,6 +169,11 @@ func (p Profile) Privileged() bool {
return p.privileged
}

// OCI returns whether the profile is using an OCI runtime, rather than the singularity native runtime.
func (p Profile) OCI() bool {
return p.oci
}

// Requirements calls the different require.* functions
// necessary for running an E2E test under this profile.
func (p Profile) Requirements(t *testing.T) {
Expand Down
14 changes: 14 additions & 0 deletions e2e/internal/e2e/singularitycmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,20 @@ func (env TestEnv) RunSingularity(t *testing.T, cmdOps ...SingularityCmdOp) {
cmd.Env = os.Environ()
}

// Clear user-specific DBUS / XDG vars when we are using a priv profile,
// as they don't make sense for the root user... and wouldn't be set in a
// real root user session.
if privileged {
i := 0
for _, e := range cmd.Env {
if !(strings.HasPrefix(e, "DBUS_SESSION_BUS_ADDRESS=") || strings.HasPrefix(e, "XDG_RUNTIME_DIR=")) {
cmd.Env[i] = e
i++
}
}
cmd.Env = cmd.Env[:i]
}

// By default, each E2E command shares a temporary image cache
// directory. If a test is directly testing the cache, or depends on
// specific ordered cache behavior then
Expand Down
Loading

0 comments on commit d0c2fa0

Please sign in to comment.