diff --git a/CHANGELOG.md b/CHANGELOG.md index 41ddd18608..0738a21f22 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ `/var/singularity`) to store local state files. - `--cwd` is now the preferred form of the flag for setting the container's working directory, though `--pwd` is still supported for compatibility. +- `--oci` mode now provides a writable container by default, using a tmpfs + overlay. This improves parity with `--compat` mode in the native runtime, as + `--compat` enables `--writable-tmpfs`. ### New Features & Functionality diff --git a/cmd/internal/cli/oci_linux.go b/cmd/internal/cli/oci_linux.go index 571fe4e407..8e3a58866d 100644 --- a/cmd/internal/cli/oci_linux.go +++ b/cmd/internal/cli/oci_linux.go @@ -107,6 +107,7 @@ func init() { cmdManager.RegisterSubCmd(OciCmd, OciStartCmd) cmdManager.RegisterSubCmd(OciCmd, OciCreateCmd) cmdManager.RegisterSubCmd(OciCmd, OciRunCmd) + cmdManager.RegisterSubCmd(OciCmd, OciRunWrappedCmd) cmdManager.RegisterSubCmd(OciCmd, OciDeleteCmd) cmdManager.RegisterSubCmd(OciCmd, OciKillCmd) cmdManager.RegisterSubCmd(OciCmd, OciStateCmd) @@ -118,7 +119,7 @@ func init() { cmdManager.RegisterSubCmd(OciCmd, OciMountCmd) cmdManager.RegisterSubCmd(OciCmd, OciUmountCmd) - cmdManager.SetCmdGroup("create_run", OciCreateCmd, OciRunCmd) + cmdManager.SetCmdGroup("create_run", OciCreateCmd, OciRunCmd, OciRunWrappedCmd) createRunCmd := cmdManager.GetCmdGroup("create_run") cmdManager.RegisterFlagForCmd(&ociBundleFlag, createRunCmd...) @@ -167,6 +168,25 @@ var OciRunCmd = &cobra.Command{ Example: docs.OciRunExample, } +// OciRunWrappedCmd is for internal OCI launcher use. +// Executes an oci run, wrapped with preparation / cleanup code. +var OciRunWrappedCmd = &cobra.Command{ + Args: cobra.ExactArgs(1), + DisableFlagsInUseLine: true, + PreRun: CheckRoot, + Run: func(cmd *cobra.Command, args []string) { + if err := singularity.OciRunWrapped(cmd.Context(), args[0], &ociArgs); err != nil { + var exitErr *exec.ExitError + if errors.As(err, &exitErr) { + os.Exit(exitErr.ExitCode()) + } + sylog.Fatalf("%s", err) + } + }, + Use: docs.OciRunWrappedUse, + Hidden: true, +} + // OciStartCmd represents oci start command. var OciStartCmd = &cobra.Command{ Args: cobra.ExactArgs(1), diff --git a/docs/content.go b/docs/content.go index cff5b24edd..11fba9305e 100644 --- a/docs/content.go +++ b/docs/content.go @@ -976,6 +976,9 @@ Enterprise Performance Computing (EPC)` $ singularity oci attach mycontainer $ singularity oci delete mycontainer` + // Internal oci launcher use only - no user-facing docs + OciRunWrappedUse string = `run-wrapped -b [run options...] ` + OciUpdateUse string = `update [update options...] ` OciUpdateShort string = `Update container cgroups resources (root user only)` OciUpdateLong string = ` diff --git a/e2e/actions/actions.go b/e2e/actions/actions.go index 57671c6121..aa4671d375 100644 --- a/e2e/actions/actions.go +++ b/e2e/actions/actions.go @@ -2548,12 +2548,13 @@ func E2ETests(env e2e.TestEnv) testhelper.Tests { // // OCI Runtime Mode // - "ociRun": c.actionOciRun, // singularity run --oci - "ociExec": c.actionOciExec, // singularity exec --oci - "ociShell": c.actionOciShell, // singularity shell --oci - "ociNetwork": c.actionOciNetwork, // singularity exec --oci --net - "ociBinds": c.actionOciBinds, // singularity exec --oci --bind / --mount - "ociCdi": c.actionOciCdi, // singularity exec --oci --cdi - "ociIDMaps": c.actionOciIDMaps, // check uid/gid mapping on host for --oci as user / --fakeroot + "ociRun": c.actionOciRun, // singularity run --oci + "ociExec": c.actionOciExec, // singularity exec --oci + "ociShell": c.actionOciShell, // singularity shell --oci + "ociNetwork": c.actionOciNetwork, // singularity exec --oci --net + "ociBinds": c.actionOciBinds, // singularity exec --oci --bind / --mount + "ociCdi": c.actionOciCdi, // singularity exec --oci --cdi + "ociIDMaps": c.actionOciIDMaps, // check uid/gid mapping on host for --oci as user / --fakeroot + "ociCompat": np(c.actionOciCompat), // --oci equivalence to native mode --compat } } diff --git a/e2e/actions/oci.go b/e2e/actions/oci.go index 185577913d..27731719d0 100644 --- a/e2e/actions/oci.go +++ b/e2e/actions/oci.go @@ -12,6 +12,7 @@ import ( "os/exec" "path/filepath" "strings" + "syscall" "testing" "text/template" @@ -908,3 +909,60 @@ func (c actionTests) actionOciIDMaps(t *testing.T) { }) } } + +// actionOCICompat checks that the --oci mode has the behaviour that the native mode gains from the --compat flag. +// Must be run in sequential section as it modifies host process umask. +func (c actionTests) actionOciCompat(t *testing.T) { + e2e.EnsureOCIArchive(t, c.env) + imageRef := "oci-archive:" + c.env.OCIArchivePath + + type test struct { + name string + args []string + exitCode int + expect e2e.SingularityCmdResultOp + } + + tests := []test{ + { + name: "containall", + args: []string{imageRef, "sh", "-c", "ls -lah $HOME"}, + exitCode: 0, + expect: e2e.ExpectOutput(e2e.ContainMatch, "total 0"), + }, + { + name: "writable-tmpfs", + args: []string{imageRef, "sh", "-c", "touch /test"}, + exitCode: 0, + }, + { + name: "no-init", + args: []string{imageRef, "sh", "-c", "ps"}, + exitCode: 0, + expect: e2e.ExpectOutput(e2e.UnwantedContainMatch, "sinit"), + }, + { + name: "no-umask", + args: []string{imageRef, "sh", "-c", "umask"}, + exitCode: 0, + expect: e2e.ExpectOutput(e2e.ContainMatch, "0022"), + }, + } + + oldUmask := syscall.Umask(0) + defer syscall.Umask(oldUmask) + + for _, tt := range tests { + c.env.RunSingularity( + t, + e2e.AsSubtest(tt.name), + e2e.WithProfile(e2e.OCIUserProfile), + e2e.WithCommand("exec"), + e2e.WithArgs(tt.args...), + e2e.ExpectExit( + tt.exitCode, + tt.expect, + ), + ) + } +} diff --git a/internal/app/singularity/oci_linux.go b/internal/app/singularity/oci_linux.go index 1c8ba04695..e59c427914 100644 --- a/internal/app/singularity/oci_linux.go +++ b/internal/app/singularity/oci_linux.go @@ -42,6 +42,15 @@ func OciRun(ctx context.Context, containerID string, args *OciArgs) error { return oci.Run(ctx, containerID, args.BundlePath, args.PidFile, systemdCgroups) } +// OciRun runs a container via the OCI runtime, wrapped with prep / cleanup steps. +func OciRunWrapped(ctx context.Context, containerID string, args *OciArgs) error { + systemdCgroups, err := systemdCgroups() + if err != nil { + return err + } + return oci.RunWrapped(ctx, containerID, args.BundlePath, args.PidFile, systemdCgroups) +} + // OciCreate creates a container from an OCI bundle func OciCreate(containerID string, args *OciArgs) error { systemdCgroups, err := systemdCgroups() diff --git a/internal/pkg/runtime/launcher/oci/launcher_linux.go b/internal/pkg/runtime/launcher/oci/launcher_linux.go index deef5c3799..c3af777e2d 100644 --- a/internal/pkg/runtime/launcher/oci/launcher_linux.go +++ b/internal/pkg/runtime/launcher/oci/launcher_linux.go @@ -79,7 +79,7 @@ func checkOpts(lo launcher.Options) error { badOpt = append(badOpt, "Writable") } if lo.WritableTmpfs { - badOpt = append(badOpt, "WritableTmpfs") + sylog.Infof("--oci mode uses --writable-tmpfs by default") } if len(lo.OverlayPaths) > 0 { badOpt = append(badOpt, "OverlayPaths") @@ -473,12 +473,12 @@ func (l *Launcher) Exec(ctx context.Context, image string, process string, args } if os.Getuid() == 0 { - // Direct execution of runc/crun run. - err = Run(ctx, id.String(), b.Path(), "", l.singularityConf.SystemdCgroups) + // Execution of runc/crun run, wrapped with prep / cleanup. + err = RunWrapped(ctx, id.String(), b.Path(), "", l.singularityConf.SystemdCgroups) } else { // Reexec singularity oci run in a userns with mappings. // Note - the oci run command will pull out the SystemdCgroups setting from config. - err = RunNS(ctx, id.String(), b.Path(), "") + err = RunWrappedNS(ctx, id.String(), b.Path(), "") } var exitErr *exec.ExitError if errors.As(err, &exitErr) { diff --git a/internal/pkg/runtime/launcher/oci/oci_runc_linux.go b/internal/pkg/runtime/launcher/oci/oci_runc_linux.go index e2738b9862..8268643d88 100644 --- a/internal/pkg/runtime/launcher/oci/oci_runc_linux.go +++ b/internal/pkg/runtime/launcher/oci/oci_runc_linux.go @@ -19,8 +19,10 @@ import ( "github.com/sylabs/singularity/internal/pkg/buildcfg" fakerootConfig "github.com/sylabs/singularity/internal/pkg/runtime/engine/fakeroot/config" "github.com/sylabs/singularity/internal/pkg/util/starter" + "github.com/sylabs/singularity/pkg/ocibundle/tools" "github.com/sylabs/singularity/pkg/runtime/engine/config" "github.com/sylabs/singularity/pkg/sylog" + "github.com/sylabs/singularity/pkg/util/singularityconf" ) // Delete deletes container resources @@ -219,8 +221,25 @@ func Run(ctx context.Context, containerID, bundlePath, pidFile string, systemdCg return cmd.Run() } -// RunNS reexecs singularity in a user namespace, with supplied uid/gid mapping, calling oci run. -func RunNS(ctx context.Context, containerID, bundlePath, pidFile string) error { +// RunWrapped runs a container via the OCI runtime, wrapped with prep / cleanup steps. +func RunWrapped(ctx context.Context, containerID, bundlePath, pidFile string, systemdCgroups bool) error { + // TODO: --oci mode always emulating --compat, which uses --writable-tmpfs. + // Provide a way of disabling this, for a read only rootfs. + if err := prepareWriteableTmpfs(bundlePath); err != nil { + return err + } + + Run(ctx, containerID, bundlePath, pidFile, systemdCgroups) + + // Cleanup actions log errors, but don't return - so we get as much cleanup done as possible. + if err := cleanupWritableTmpfs(bundlePath); err != nil { + sylog.Errorf("While cleaning up writable tmpfs: %v", err) + } + return nil +} + +// RunWrappedNS reexecs singularity in a user namespace, with supplied uid/gid mapping, calling oci run. +func RunWrappedNS(ctx context.Context, containerID, bundlePath, pidFile string) error { absBundle, err := filepath.Abs(bundlePath) if err != nil { return fmt.Errorf("failed to determine bundle absolute path: %s", err) @@ -233,7 +252,7 @@ func RunNS(ctx context.Context, containerID, bundlePath, pidFile string) error { args := []string{ filepath.Join(buildcfg.BINDIR, "singularity"), "oci", - "run", + "run-wrapped", "-b", absBundle, containerID, } @@ -342,3 +361,17 @@ func Update(containerID, cgFile string, systemdCgroups bool) error { sylog.Debugf("Calling %s with args %v", runtimeBin, runtimeArgs) return cmd.Run() } + +func prepareWriteableTmpfs(bundleDir string) error { + sylog.Debugf("Configuring writable tmpfs overlay for %s", bundleDir) + c := singularityconf.GetCurrentConfig() + if c == nil { + return fmt.Errorf("singularity configuration is not initialized") + } + return tools.CreateOverlayTmpfs(bundleDir, int(c.SessiondirMaxSize)) +} + +func cleanupWritableTmpfs(bundleDir string) error { + sylog.Debugf("Cleaning up writable tmpfs overlay for %s", bundleDir) + return tools.DeleteOverlay(bundleDir) +} diff --git a/internal/pkg/runtime/launcher/oci/spec_linux.go b/internal/pkg/runtime/launcher/oci/spec_linux.go index cd8beadbe7..9b37bd3ee4 100644 --- a/internal/pkg/runtime/launcher/oci/spec_linux.go +++ b/internal/pkg/runtime/launcher/oci/spec_linux.go @@ -36,8 +36,8 @@ func minimalSpec() specs.Spec { } config.Root = &specs.Root{ Path: "rootfs", - // TODO - support writable-tmpfs / writable - Readonly: true, + // TODO - support read-only. At present we always have a writable tmpfs overlay, like native runtime --compat. + Readonly: false, } config.Process = &specs.Process{ Terminal: true, diff --git a/pkg/ocibundle/tools/overlay_linux.go b/pkg/ocibundle/tools/overlay_linux.go index e98c567075..c0a65cf7d4 100644 --- a/pkg/ocibundle/tools/overlay_linux.go +++ b/pkg/ocibundle/tools/overlay_linux.go @@ -12,7 +12,7 @@ import ( "syscall" ) -// CreateOverlay creates a writable overlay +// CreateOverlay creates a writable overlay based on a directory. func CreateOverlay(bundlePath string) error { var err error @@ -45,18 +45,61 @@ func CreateOverlay(bundlePath string) error { return fmt.Errorf("failed to remount %s: %s", overlayDir, err) } + err = prepareOverlay(bundlePath, overlayDir) + return err +} + +// CreateOverlay creates a writable overlay based on a tmpfs. +func CreateOverlayTmpfs(bundlePath string, sizeMiB int) error { + var err error + + oldumask := syscall.Umask(0) + defer syscall.Umask(oldumask) + + overlayDir := filepath.Join(bundlePath, "overlay") + if err = os.Mkdir(overlayDir, 0o700); err != nil { + return fmt.Errorf("failed to create %s: %s", overlayDir, err) + } + // delete overlay directory in case of error + defer func() { + if err != nil { + os.RemoveAll(overlayDir) + } + }() + + options := fmt.Sprintf("mode=1777,size=%dm", sizeMiB) + err = syscall.Mount("tmpfs", overlayDir, "tmpfs", syscall.MS_NODEV, options) + if err != nil { + return fmt.Errorf("failed to bind %s: %s", overlayDir, err) + } + // best effort to cleanup mount + defer func() { + if err != nil { + syscall.Unmount(overlayDir, syscall.MNT_DETACH) + } + }() + + if err = syscall.Mount("", overlayDir, "", syscall.MS_REMOUNT, ""); err != nil { + return fmt.Errorf("failed to remount %s: %s", overlayDir, err) + } + + err = prepareOverlay(bundlePath, overlayDir) + return err +} + +func prepareOverlay(bundlePath, overlayDir string) error { upperDir := filepath.Join(overlayDir, "upper") - if err = os.Mkdir(upperDir, 0o755); err != nil { + if err := os.Mkdir(upperDir, 0o755); err != nil { return fmt.Errorf("failed to create %s: %s", upperDir, err) } workDir := filepath.Join(overlayDir, "work") - if err = os.Mkdir(workDir, 0o700); err != nil { + if err := os.Mkdir(workDir, 0o700); err != nil { return fmt.Errorf("failed to create %s: %s", workDir, err) } rootFsDir := RootFs(bundlePath).Path() options := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", rootFsDir, upperDir, workDir) - if err = syscall.Mount("overlay", rootFsDir, "overlay", 0, options); err != nil { + if err := syscall.Mount("overlay", rootFsDir, "overlay", 0, options); err != nil { return fmt.Errorf("failed to mount %s: %s", overlayDir, err) } return nil