Skip to content

Commit

Permalink
*: actually support joining a userns with a new container
Browse files Browse the repository at this point in the history
(This is a cherry-pick of 1912d59.)

Our handling for name space paths with user namespaces has been broken
for a long time. In particular, the need to parse /proc/self/*id_map in
quite a few places meant that we would treat userns configurations that
had a namespace path as if they were a userns configuration without
mappings, resulting in errors.

The primary issue was down to the id translation helper functions, which
could only handle configurations that had explicit mappings. Obviously,
when joining a user namespace we need to map the ids but figuring out
the correct mapping is non-trivial in comparison.

In order to get the mapping, you need to read /proc/<pid>/*id_map of a
process inside the userns -- while most userns paths will be of the form
/proc/<pid>/ns/user (and we have a fast-path for this case), this is not
guaranteed and thus it is necessary to spawn a process inside the
container and read its /proc/<pid>/*id_map files in the general case.

As Go does not allow us spawn a subprocess into a target userns,
we have to use CGo to fork a sub-process which does the setns(2). To be
honest, this is a little dodgy in regards to POSIX signal-safety(7) but
since we do no allocations and we are executing in the forked context
from a Go program (not a C program), it should be okay. The other
alternative would be to do an expensive re-exec (a-la nsexec which would
make several other bits of runc more complicated), or to use nsenter(1)
which might not exist on the system and is less than ideal.

Because we need to logically remap users quite a few times in runc
(including in "runc init", where joining the namespace is not feasable),
we cache the mapping inside the libcontainer config struct. A future
patch will make sure that we stop allow invalid user configurations
where a mapping is specified as well as a userns path to join.

Finally, add an integration test to make sure we don't regress this again.

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
  • Loading branch information
cyphar committed Dec 14, 2023
1 parent 75d99b4 commit 0c8e2cc
Show file tree
Hide file tree
Showing 5 changed files with 360 additions and 20 deletions.
31 changes: 12 additions & 19 deletions libcontainer/configs/validate/rootless.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,25 +28,18 @@ func (v *ConfigValidator) rootlessEUID(config *configs.Config) error {
return nil
}

func hasIDMapping(id int, mappings []configs.IDMap) bool {
for _, m := range mappings {
if id >= m.ContainerID && id < m.ContainerID+m.Size {
return true
}
}
return false
}

func rootlessEUIDMappings(config *configs.Config) error {
if !config.Namespaces.Contains(configs.NEWUSER) {
return errors.New("rootless container requires user namespaces")
}

if len(config.UidMappings) == 0 {
return errors.New("rootless containers requires at least one UID mapping")
}
if len(config.GidMappings) == 0 {
return errors.New("rootless containers requires at least one GID mapping")
// We only require mappings if we are not joining another userns.
if path := config.Namespaces.PathOf(configs.NEWUSER); path == "" {
if len(config.UidMappings) == 0 {
return errors.New("rootless containers requires at least one UID mapping")
}
if len(config.GidMappings) == 0 {
return errors.New("rootless containers requires at least one GID mapping")
}
}
return nil
}
Expand All @@ -70,8 +63,8 @@ func rootlessEUIDMount(config *configs.Config) error {
// Ignore unknown mount options.
continue
}
if !hasIDMapping(uid, config.UidMappings) {
return errors.New("cannot specify uid= mount options for unmapped uid in rootless containers")
if _, err := config.HostUID(uid); err != nil {
return fmt.Errorf("cannot specify uid=%d mount option for rootless container: %w", uid, err)
}
}

Expand All @@ -82,8 +75,8 @@ func rootlessEUIDMount(config *configs.Config) error {
// Ignore unknown mount options.
continue
}
if !hasIDMapping(gid, config.GidMappings) {
return errors.New("cannot specify gid= mount options for unmapped gid in rootless containers")
if _, err := config.HostGID(gid); err != nil {
return fmt.Errorf("cannot specify gid=%d mount option for rootless container: %w", gid, err)
}
}
}
Expand Down
16 changes: 16 additions & 0 deletions libcontainer/specconv/spec_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/userns"
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -939,6 +940,21 @@ func setupUserNamespace(spec *specs.Spec, config *configs.Config) error {
config.GidMappings = append(config.GidMappings, create(m))
}
}
if path := config.Namespaces.PathOf(configs.NEWUSER); path != "" {
// Cache the current userns mappings in our configuration, so that we
// can calculate uid and gid mappings within runc. These mappings are
// never used for configuring the container if the path is set.
uidMap, gidMap, err := userns.GetUserNamespaceMappings(path)
if err != nil {
return fmt.Errorf("failed to cache mappings for userns: %w", err)
}
config.UidMappings = uidMap
config.GidMappings = gidMap
logrus.WithFields(logrus.Fields{
"uid_map": uidMap,
"gid_map": gidMap,
}).Debugf("config uses path-based userns configuration -- current uid and gid mappings cached")
}
rootUID, err := config.HostRootUID()
if err != nil {
return err
Expand Down
79 changes: 79 additions & 0 deletions libcontainer/userns/userns_maps.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#define _GNU_SOURCE
#include <fcntl.h>
#include <sched.h>
#include <stdio.h>
#include <unistd.h>
#include <stdarg.h>
#include <stdlib.h>

/*
* All of the code here is run inside an aync-signal-safe context, so we need
* to be careful to not call any functions that could cause issues. In theory,
* since we are a Go program, there are fewer restrictions in practice, it's
* better to be safe than sorry.
*
* The only exception is exit, which we need to call to make sure we don't
* return into runc.
*/

void bail(int pipefd, const char *fmt, ...)
{
va_list args;

va_start(args, fmt);
vdprintf(pipefd, fmt, args);
va_end(args);

exit(1);
}

int spawn_userns_cat(char *userns_path, char *path, int outfd, int errfd)
{
char buffer[4096] = { 0 };

pid_t child = fork();
if (child != 0)
return child;
/* in child */

/* Join the target userns. */
int nsfd = open(userns_path, O_RDONLY);
if (nsfd < 0)
bail(errfd, "open userns path %s failed: %m", userns_path);

int err = setns(nsfd, CLONE_NEWUSER);
if (err < 0)
bail(errfd, "setns %s failed: %m", userns_path);

close(nsfd);

/* Pipe the requested file contents. */
int fd = open(path, O_RDONLY);
if (fd < 0)
bail(errfd, "open %s in userns %s failed: %m", path, userns_path);

int nread, ntotal = 0;
while ((nread = read(fd, buffer, sizeof(buffer))) != 0) {
if (nread < 0)
bail(errfd, "read bytes from %s failed (after %d total bytes read): %m", path, ntotal);
ntotal += nread;

int nwritten = 0;
while (nwritten < nread) {
int n = write(outfd, buffer, nread - nwritten);
if (n < 0)
bail(errfd, "write %d bytes from %s failed (after %d bytes written): %m",
nread - nwritten, path, nwritten);
nwritten += n;
}
if (nread != nwritten)
bail(errfd, "mismatch for bytes read and written: %d read != %d written", nread, nwritten);
}

close(fd);
close(outfd);
close(errfd);

/* We must exit here, otherwise we would return into a forked runc. */
exit(0);
}
171 changes: 171 additions & 0 deletions libcontainer/userns/userns_maps_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
//go:build linux

package userns

import (
"bufio"
"bytes"
"fmt"
"io"
"os"
"unsafe"

"github.com/opencontainers/runc/libcontainer/configs"
"github.com/sirupsen/logrus"
)

/*
#include <stdlib.h>
extern int spawn_userns_cat(char *userns_path, char *path, int outfd, int errfd);
*/
import "C"

func parseIdmapData(data []byte) (ms []configs.IDMap, err error) {
scanner := bufio.NewScanner(bytes.NewReader(data))
for scanner.Scan() {
var m configs.IDMap
line := scanner.Text()
if _, err := fmt.Sscanf(line, "%d %d %d", &m.ContainerID, &m.HostID, &m.Size); err != nil {
return nil, fmt.Errorf("parsing id map failed: invalid format in line %q: %w", line, err)
}
ms = append(ms, m)
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("parsing id map failed: %w", err)
}
return ms, nil
}

// Do something equivalent to nsenter --user=<nsPath> cat <path>, but more
// efficiently. Returns the contents of the requested file from within the user
// namespace.
func spawnUserNamespaceCat(nsPath string, path string) ([]byte, error) {
rdr, wtr, err := os.Pipe()
if err != nil {
return nil, fmt.Errorf("create pipe for userns spawn failed: %w", err)
}
defer rdr.Close()
defer wtr.Close()

errRdr, errWtr, err := os.Pipe()
if err != nil {
return nil, fmt.Errorf("create error pipe for userns spawn failed: %w", err)
}
defer errRdr.Close()
defer errWtr.Close()

cNsPath := C.CString(nsPath)
defer C.free(unsafe.Pointer(cNsPath))
cPath := C.CString(path)
defer C.free(unsafe.Pointer(cPath))

childPid := C.spawn_userns_cat(cNsPath, cPath, C.int(wtr.Fd()), C.int(errWtr.Fd()))

if childPid < 0 {
return nil, fmt.Errorf("failed to spawn fork for userns")
} else if childPid == 0 {
// this should never happen
panic("runc executing inside fork child -- unsafe state!")
}

// We are in the parent -- close the write end of the pipe before reading.
wtr.Close()
output, err := io.ReadAll(rdr)
rdr.Close()
if err != nil {
return nil, fmt.Errorf("reading from userns spawn failed: %w", err)
}

// Ditto for the error pipe.
errWtr.Close()
errOutput, err := io.ReadAll(errRdr)
errRdr.Close()
if err != nil {
return nil, fmt.Errorf("reading from userns spawn error pipe failed: %w", err)
}
errOutput = bytes.TrimSpace(errOutput)

// Clean up the child.
child, err := os.FindProcess(int(childPid))
if err != nil {
return nil, fmt.Errorf("could not find userns spawn process: %w", err)
}
state, err := child.Wait()
if err != nil {
return nil, fmt.Errorf("failed to wait for userns spawn process: %w", err)
}
if !state.Success() {
errStr := string(errOutput)
if errStr == "" {
errStr = fmt.Sprintf("unknown error (status code %d)", state.ExitCode())
}
return nil, fmt.Errorf("userns spawn: %s", errStr)
} else if len(errOutput) > 0 {
// We can just ignore weird output in the error pipe if the process
// didn't bail(), but for completeness output for debugging.
logrus.Debugf("userns spawn succeeded but unexpected error message found: %s", string(errOutput))
}
// The subprocess succeeded, return whatever it wrote to the pipe.
return output, nil
}

func GetUserNamespaceMappings(nsPath string) (uidMap, gidMap []configs.IDMap, err error) {
var (
pid int
extra rune
tryFastPath bool
)

// nsPath is usually of the form /proc/<pid>/ns/user, which means that we
// already have a pid that is part of the user namespace and thus we can
// just use the pid to read from /proc/<pid>/*id_map.
//
// Note that Sscanf doesn't consume the whole input, so we check for any
// trailing data with %c. That way, we can be sure the pattern matched
// /proc/$pid/ns/user _exactly_ iff n === 1.
if n, _ := fmt.Sscanf(nsPath, "/proc/%d/ns/user%c", &pid, &extra); n == 1 {
tryFastPath = pid > 0
}

for _, mapType := range []struct {
name string
idMap *[]configs.IDMap
}{
{"uid_map", &uidMap},
{"gid_map", &gidMap},
} {
var mapData []byte

if tryFastPath {
path := fmt.Sprintf("/proc/%d/%s", pid, mapType.name)
data, err := os.ReadFile(path)
if err != nil {
// Do not error out here -- we need to try the slow path if the
// fast path failed.
logrus.Debugf("failed to use fast path to read %s from userns %s (error: %s), falling back to slow userns-join path", mapType.name, nsPath, err)
} else {
mapData = data
}
} else {
logrus.Debugf("cannot use fast path to read %s from userns %s, falling back to slow userns-join path", mapType.name, nsPath)
}

if mapData == nil {
// We have to actually join the namespace if we cannot take the
// fast path. The path is resolved with respect to the child
// process, so just use /proc/self.
data, err := spawnUserNamespaceCat(nsPath, "/proc/self/"+mapType.name)
if err != nil {
return nil, nil, err
}
mapData = data
}
idMap, err := parseIdmapData(mapData)
if err != nil {
return nil, nil, fmt.Errorf("failed to parse %s of userns %s: %w", mapType.name, nsPath, err)
}
*mapType.idMap = idMap
}

return uidMap, gidMap, nil
}
Loading

0 comments on commit 0c8e2cc

Please sign in to comment.