Skip to content

Commit

Permalink
oci: Enable --writable-tmpfs behaviour by default
Browse files Browse the repository at this point in the history
The `--oci` mode intends to follow behaviour that the native runtime
implements when run with `--compat`.

One missing aspect is that `--compat` sets `--writable-tmpfs`, where
the container rootfs is made writable with a tmpfs backed overlay.

This PR:

- Introduces a simple wrapping of the `oci run` sub-command as `oci
  run-wrapped`. This hidden command implements prep / cleanup steps
  that must take place in a userns for non-root `--oci` execution.
- Switches the oci launcher to calling `oci run-wrapped` instead of
  `oci-run`.
- Adds a tmpfs based overlay creation function for OCI
  bundles.
- Includes the tmpfs overlay creation in the `oci run-wrapped` flow.
- Copies the native runtime `--compat` e2e tests to OCI mode.

Fixes sylabs/singularity#1621

Signed-off-by: Edita Kizinevic <edita.kizinevic@cern.ch>
  • Loading branch information
dtrudg authored and edytuk committed May 10, 2023
1 parent 0fad897 commit ec1c416
Show file tree
Hide file tree
Showing 10 changed files with 189 additions and 21 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ For older changes see the [archived Singularity change log](https://github.com/a
`--timeout` have been removed.
- `sessiondir maxsize` in `apptainer.conf` now defaults to 64 MiB for new
installations. This is an increase from 16 MiB in prior versions.
- `--oci` mode now provides a writable container by default, using a tmpfs
overlay. This improves parity with `--compat` mode in the native runtime, as
`--compat` enables `--writable-tmpfs`.

### New features / functionalities

Expand Down
22 changes: 21 additions & 1 deletion cmd/internal/cli/oci_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ func init() {
cmdManager.RegisterSubCmd(OciCmd, OciStartCmd)
cmdManager.RegisterSubCmd(OciCmd, OciCreateCmd)
cmdManager.RegisterSubCmd(OciCmd, OciRunCmd)
cmdManager.RegisterSubCmd(OciCmd, OciRunWrappedCmd)
cmdManager.RegisterSubCmd(OciCmd, OciDeleteCmd)
cmdManager.RegisterSubCmd(OciCmd, OciKillCmd)
cmdManager.RegisterSubCmd(OciCmd, OciStateCmd)
Expand All @@ -122,7 +123,7 @@ func init() {
cmdManager.RegisterSubCmd(OciCmd, OciMountCmd)
cmdManager.RegisterSubCmd(OciCmd, OciUmountCmd)

cmdManager.SetCmdGroup("create_run", OciCreateCmd, OciRunCmd)
cmdManager.SetCmdGroup("create_run", OciCreateCmd, OciRunCmd, OciRunWrappedCmd)
createRunCmd := cmdManager.GetCmdGroup("create_run")

cmdManager.RegisterFlagForCmd(&ociBundleFlag, createRunCmd...)
Expand Down Expand Up @@ -171,6 +172,25 @@ var OciRunCmd = &cobra.Command{
Example: docs.OciRunExample,
}

// OciRunWrappedCmd is for internal OCI launcher use.
// Executes an oci run, wrapped with preparation / cleanup code.
var OciRunWrappedCmd = &cobra.Command{
Args: cobra.ExactArgs(1),
DisableFlagsInUseLine: true,
PreRun: CheckRoot,
Run: func(cmd *cobra.Command, args []string) {
if err := apptainer.OciRunWrapped(cmd.Context(), args[0], &ociArgs); err != nil {
var exitErr *exec.ExitError
if errors.As(err, &exitErr) {
os.Exit(exitErr.ExitCode())
}
sylog.Fatalf("%s", err)
}
},
Use: docs.OciRunWrappedUse,
Hidden: true,
}

// OciStartCmd represents oci start command.
var OciStartCmd = &cobra.Command{
Args: cobra.ExactArgs(1),
Expand Down
3 changes: 3 additions & 0 deletions docs/content.go
Original file line number Diff line number Diff line change
Expand Up @@ -989,6 +989,9 @@ Enterprise Performance Computing (EPC)`
$ apptainer oci attach mycontainer
$ apptainer oci delete mycontainer`

// Internal oci launcher use only - no user-facing docs
OciRunWrappedUse string = `run-wrapped -b <bundle_path> [run options...] <container_ID>`

OciUpdateUse string = `update [update options...] <container_ID>`
OciUpdateShort string = `Update container cgroups resources (root user only)`
OciUpdateLong string = `
Expand Down
15 changes: 8 additions & 7 deletions e2e/actions/actions.go
Original file line number Diff line number Diff line change
Expand Up @@ -2614,12 +2614,13 @@ func E2ETests(env e2e.TestEnv) testhelper.Tests {
//
// OCI Runtime Mode
//
"ociRun": c.actionOciRun, // apptainer run --oci
"ociExec": c.actionOciExec, // apptainer exec --oci
"ociShell": c.actionOciShell, // apptainer shell --oci
"ociNetwork": c.actionOciNetwork, // apptainer exec --oci --net
"ociBinds": c.actionOciBinds, // apptainer exec --oci --bind / --mount
"ociCdi": c.actionOciCdi, // apptainer exec --oci --cdi
"ociIDMaps": c.actionOciIDMaps, // check uid/gid mapping on host for --oci as user / --fakeroot
"ociRun": c.actionOciRun, // apptainer run --oci
"ociExec": c.actionOciExec, // apptainer exec --oci
"ociShell": c.actionOciShell, // apptainer shell --oci
"ociNetwork": c.actionOciNetwork, // apptainer exec --oci --net
"ociBinds": c.actionOciBinds, // apptainer exec --oci --bind / --mount
"ociCdi": c.actionOciCdi, // apptainer exec --oci --cdi
"ociIDMaps": c.actionOciIDMaps, // check uid/gid mapping on host for --oci as user / --fakeroot
"ociCompat": np(c.actionOciCompat), // --oci equivalence to native mode --compat
}
}
58 changes: 58 additions & 0 deletions e2e/actions/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"os/exec"
"path/filepath"
"strings"
"syscall"
"testing"
"text/template"

Expand Down Expand Up @@ -894,3 +895,60 @@ func (c actionTests) actionOciIDMaps(t *testing.T) {
})
}
}

// actionOCICompat checks that the --oci mode has the behavior that the native mode gains from the --compat flag.
// Must be run in sequential section as it modifies host process umask.
func (c actionTests) actionOciCompat(t *testing.T) {
e2e.EnsureOCIArchive(t, c.env)
imageRef := "oci-archive:" + c.env.OCIArchivePath

type test struct {
name string
args []string
exitCode int
expect e2e.ApptainerCmdResultOp
}

tests := []test{
{
name: "containall",
args: []string{imageRef, "sh", "-c", "ls -lah $HOME"},
exitCode: 0,
expect: e2e.ExpectOutput(e2e.ContainMatch, "total 0"),
},
{
name: "writable-tmpfs",
args: []string{imageRef, "sh", "-c", "touch /test"},
exitCode: 0,
},
{
name: "no-init",
args: []string{imageRef, "sh", "-c", "ps"},
exitCode: 0,
expect: e2e.ExpectOutput(e2e.UnwantedContainMatch, "sinit"),
},
{
name: "no-umask",
args: []string{imageRef, "sh", "-c", "umask"},
exitCode: 0,
expect: e2e.ExpectOutput(e2e.ContainMatch, "0022"),
},
}

oldUmask := syscall.Umask(0)
defer syscall.Umask(oldUmask)

for _, tt := range tests {
c.env.RunApptainer(
t,
e2e.AsSubtest(tt.name),
e2e.WithProfile(e2e.OCIUserProfile),
e2e.WithCommand("exec"),
e2e.WithArgs(tt.args...),
e2e.ExpectExit(
tt.exitCode,
tt.expect,
),
)
}
}
9 changes: 9 additions & 0 deletions internal/app/apptainer/oci_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@ func OciRun(ctx context.Context, containerID string, args *OciArgs) error {
return oci.Run(ctx, containerID, args.BundlePath, args.PidFile, systemdCgroups)
}

// OciRun runs a container via the OCI runtime, wrapped with prep / cleanup steps.
func OciRunWrapped(ctx context.Context, containerID string, args *OciArgs) error {
systemdCgroups, err := systemdCgroups()
if err != nil {
return err
}
return oci.RunWrapped(ctx, containerID, args.BundlePath, args.PidFile, systemdCgroups)
}

// OciCreate creates a container from an OCI bundle
func OciCreate(containerID string, args *OciArgs) error {
systemdCgroups, err := systemdCgroups()
Expand Down
8 changes: 4 additions & 4 deletions internal/pkg/runtime/launcher/oci/launcher_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ func checkOpts(lo launcher.Options) error {
badOpt = append(badOpt, "Writable")
}
if lo.WritableTmpfs {
badOpt = append(badOpt, "WritableTmpfs")
sylog.Infof("--oci mode uses --writable-tmpfs by default")
}
if len(lo.OverlayPaths) > 0 {
badOpt = append(badOpt, "OverlayPaths")
Expand Down Expand Up @@ -474,12 +474,12 @@ func (l *Launcher) Exec(ctx context.Context, image string, process string, args
}

if os.Getuid() == 0 {
// Direct execution of runc/crun run.
err = Run(ctx, id.String(), b.Path(), "", l.apptainerConf.SystemdCgroups)
// Execution of runc/crun run, wrapped with prep / cleanup.
err = RunWrapped(ctx, id.String(), b.Path(), "", l.apptainerConf.SystemdCgroups)
} else {
// Reexec apptainer oci run in a userns with mappings.
// Note - the oci run command will pull out the SystemdCgroups setting from config.
err = RunNS(ctx, id.String(), b.Path(), "")
err = RunWrappedNS(ctx, id.String(), b.Path(), "")
}
var exitErr *exec.ExitError
if errors.As(err, &exitErr) {
Expand Down
41 changes: 38 additions & 3 deletions internal/pkg/runtime/launcher/oci/oci_runc_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@ import (
"path/filepath"
"strings"

"github.com/apptainer/apptainer/pkg/util/apptainerconf"
"github.com/apptainer/apptainer/internal/pkg/buildcfg"
fakerootConfig "github.com/apptainer/apptainer/internal/pkg/runtime/engine/fakeroot/config"
"github.com/apptainer/apptainer/internal/pkg/util/starter"
"github.com/apptainer/apptainer/pkg/ocibundle/tools"
"github.com/apptainer/apptainer/pkg/runtime/engine/config"
"github.com/apptainer/apptainer/pkg/sylog"
)
Expand Down Expand Up @@ -223,8 +225,27 @@ func Run(ctx context.Context, containerID, bundlePath, pidFile string, systemdCg
return cmd.Run()
}

// RunNS reexecs apptainer in a user namespace, with supplied uid/gid mapping, calling oci run.
func RunNS(ctx context.Context, containerID, bundlePath, pidFile string) error {
// RunWrapped runs a container via the OCI runtime, wrapped with prep / cleanup steps.
func RunWrapped(ctx context.Context, containerID, bundlePath, pidFile string, systemdCgroups bool) error {
// TODO: --oci mode always emulating --compat, which uses --writable-tmpfs.
// Provide a way of disabling this, for a read only rootfs.
if err := prepareWriteableTmpfs(bundlePath); err != nil {
return err
}

err := Run(ctx, containerID, bundlePath, pidFile, systemdCgroups)

// Cleanup actions log errors, but don't return - so we get as much cleanup done as possible.
if err := cleanupWritableTmpfs(bundlePath); err != nil {
sylog.Errorf("While cleaning up writable tmpfs: %v", err)
}

// Return any error from the actual container payload - preserve exit code.
return err
}

// RunWrappedNS reexecs apptainer in a user namespace, with supplied uid/gid mapping, calling oci run.
func RunWrappedNS(ctx context.Context, containerID, bundlePath, pidFile string) error {
absBundle, err := filepath.Abs(bundlePath)
if err != nil {
return fmt.Errorf("failed to determine bundle absolute path: %s", err)
Expand All @@ -237,7 +258,7 @@ func RunNS(ctx context.Context, containerID, bundlePath, pidFile string) error {
args := []string{
filepath.Join(buildcfg.BINDIR, "apptainer"),
"oci",
"run",
"run-wrapped",
"-b", absBundle,
containerID,
}
Expand Down Expand Up @@ -346,3 +367,17 @@ func Update(containerID, cgFile string, systemdCgroups bool) error {
sylog.Debugf("Calling %s with args %v", runtimeBin, runtimeArgs)
return cmd.Run()
}

func prepareWriteableTmpfs(bundleDir string) error {
sylog.Debugf("Configuring writable tmpfs overlay for %s", bundleDir)
c := apptainerconf.GetCurrentConfig()
if c == nil {
return fmt.Errorf("apptainer configuration is not initialized")
}
return tools.CreateOverlayTmpfs(bundleDir, int(c.SessiondirMaxSize))
}

func cleanupWritableTmpfs(bundleDir string) error {
sylog.Debugf("Cleaning up writable tmpfs overlay for %s", bundleDir)
return tools.DeleteOverlay(bundleDir)
}
4 changes: 2 additions & 2 deletions internal/pkg/runtime/launcher/oci/spec_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ func minimalSpec() specs.Spec {
}
config.Root = &specs.Root{
Path: "rootfs",
// TODO - support writable-tmpfs / writable
Readonly: true,
// TODO - support read-only. At present we always have a writable tmpfs overlay, like native runtime --compat.
Readonly: false,
}
config.Process = &specs.Process{
Terminal: true,
Expand Down
47 changes: 43 additions & 4 deletions pkg/ocibundle/tools/overlay_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import (
"syscall"
)

// CreateOverlay creates a writable overlay
// CreateOverlay creates a writable overlay based on a directory.
func CreateOverlay(bundlePath string) error {
var err error

Expand Down Expand Up @@ -49,18 +49,57 @@ func CreateOverlay(bundlePath string) error {
return fmt.Errorf("failed to remount %s: %s", overlayDir, err)
}

err = prepareOverlay(bundlePath, overlayDir)
return err
}

// CreateOverlay creates a writable overlay based on a tmpfs.
func CreateOverlayTmpfs(bundlePath string, sizeMiB int) error {
var err error

oldumask := syscall.Umask(0)
defer syscall.Umask(oldumask)

overlayDir := filepath.Join(bundlePath, "overlay")
if err = os.Mkdir(overlayDir, 0o700); err != nil {
return fmt.Errorf("failed to create %s: %s", overlayDir, err)
}
// delete overlay directory in case of error
defer func() {
if err != nil {
os.RemoveAll(overlayDir)
}
}()

options := fmt.Sprintf("mode=1777,size=%dm", sizeMiB)
err = syscall.Mount("tmpfs", overlayDir, "tmpfs", syscall.MS_NODEV, options)
if err != nil {
return fmt.Errorf("failed to bind %s: %s", overlayDir, err)
}
// best effort to cleanup mount
defer func() {
if err != nil {
syscall.Unmount(overlayDir, syscall.MNT_DETACH)
}
}()

err = prepareOverlay(bundlePath, overlayDir)
return err
}

func prepareOverlay(bundlePath, overlayDir string) error {
upperDir := filepath.Join(overlayDir, "upper")
if err = os.Mkdir(upperDir, 0o755); err != nil {
if err := os.Mkdir(upperDir, 0o755); err != nil {
return fmt.Errorf("failed to create %s: %s", upperDir, err)
}
workDir := filepath.Join(overlayDir, "work")
if err = os.Mkdir(workDir, 0o700); err != nil {
if err := os.Mkdir(workDir, 0o700); err != nil {
return fmt.Errorf("failed to create %s: %s", workDir, err)
}
rootFsDir := RootFs(bundlePath).Path()

options := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", rootFsDir, upperDir, workDir)
if err = syscall.Mount("overlay", rootFsDir, "overlay", 0, options); err != nil {
if err := syscall.Mount("overlay", rootFsDir, "overlay", 0, options); err != nil {
return fmt.Errorf("failed to mount %s: %s", overlayDir, err)
}
return nil
Expand Down

0 comments on commit ec1c416

Please sign in to comment.