diff --git a/checkpoint.go b/checkpoint.go index b8bfa045d6c..a8a27f248bc 100644 --- a/checkpoint.go +++ b/checkpoint.go @@ -38,7 +38,7 @@ checkpointed.`, cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"}, cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"}, cli.BoolFlag{Name: "pre-dump", Usage: "dump container's memory information only, leave the container running after this"}, - cli.StringFlag{Name: "manage-cgroups-mode", Value: "", Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'"}, + cli.StringFlag{Name: "manage-cgroups-mode", Value: "", Usage: "cgroups mode: soft|full|strict|ignore (default: soft)"}, cli.StringSliceFlag{Name: "empty-ns", Usage: "create a namespace, but don't restore its properties"}, cli.BoolFlag{Name: "auto-dedup", Usage: "enable auto deduplication of memory images"}, }, @@ -67,17 +67,6 @@ checkpointed.`, return err } - // these are the mandatory criu options for a container - if err := setPageServer(context, options); err != nil { - return err - } - if err := setManageCgroupsMode(context, options); err != nil { - return err - } - if err := setEmptyNsMask(context, options); err != nil { - return err - } - err = container.Checkpoint(options) if err == nil && !(options.LeaveRunning || options.PreDump) { // Destroy the container unless we tell CRIU to keep it. @@ -119,59 +108,80 @@ func prepareImagePaths(context *cli.Context) (string, string, error) { return imagePath, parentPath, nil } -func setPageServer(context *cli.Context, options *libcontainer.CriuOpts) error { - // xxx following criu opts are optional - // The dump image can be sent to a criu page server +func criuOptions(context *cli.Context) (*libcontainer.CriuOpts, error) { + imagePath, parentPath, err := prepareImagePaths(context) + if err != nil { + return nil, err + } + + opts := &libcontainer.CriuOpts{ + ImagesDirectory: imagePath, + WorkDirectory: context.String("work-path"), + ParentImage: parentPath, + LeaveRunning: context.Bool("leave-running"), + TcpEstablished: context.Bool("tcp-established"), + ExternalUnixConnections: context.Bool("ext-unix-sk"), + ShellJob: context.Bool("shell-job"), + FileLocks: context.Bool("file-locks"), + PreDump: context.Bool("pre-dump"), + AutoDedup: context.Bool("auto-dedup"), + LazyPages: context.Bool("lazy-pages"), + StatusFd: context.Int("status-fd"), + LsmProfile: context.String("lsm-profile"), + LsmMountContext: context.String("lsm-mount-context"), + } + + // CRIU options below may or may not be set. + if psOpt := context.String("page-server"); psOpt != "" { address, port, err := net.SplitHostPort(psOpt) if err != nil || address == "" || port == "" { - return errors.New("Use --page-server ADDRESS:PORT to specify page server") + return nil, errors.New("Use --page-server ADDRESS:PORT to specify page server") } portInt, err := strconv.Atoi(port) if err != nil { - return errors.New("Invalid port number") + return nil, errors.New("Invalid port number") } - options.PageServer = libcontainer.CriuPageServerInfo{ + opts.PageServer = libcontainer.CriuPageServerInfo{ Address: address, Port: int32(portInt), } } - return nil -} -func setManageCgroupsMode(context *cli.Context, options *libcontainer.CriuOpts) error { - if cgOpt := context.String("manage-cgroups-mode"); cgOpt != "" { - switch cgOpt { - case "soft": - options.ManageCgroupsMode = criu.CriuCgMode_SOFT - case "full": - options.ManageCgroupsMode = criu.CriuCgMode_FULL - case "strict": - options.ManageCgroupsMode = criu.CriuCgMode_STRICT - default: - return errors.New("Invalid manage cgroups mode") - } + switch context.String("manage-cgroups-mode") { + case "": + // do nothing + case "soft": + opts.ManageCgroupsMode = criu.CriuCgMode_SOFT + case "full": + opts.ManageCgroupsMode = criu.CriuCgMode_FULL + case "strict": + opts.ManageCgroupsMode = criu.CriuCgMode_STRICT + case "ignore": + opts.ManageCgroupsMode = criu.CriuCgMode_IGNORE + default: + return nil, errors.New("Invalid manage-cgroups-mode value") } - return nil -} -var namespaceMapping = map[specs.LinuxNamespaceType]int{ - specs.NetworkNamespace: unix.CLONE_NEWNET, -} - -func setEmptyNsMask(context *cli.Context, options *libcontainer.CriuOpts) error { - /* Runc doesn't manage network devices and their configuration */ + // runc doesn't manage network devices and their configuration. nsmask := unix.CLONE_NEWNET - for _, ns := range context.StringSlice("empty-ns") { - f, exists := namespaceMapping[specs.LinuxNamespaceType(ns)] - if !exists { - return fmt.Errorf("namespace %q is not supported", ns) + if context.IsSet("empty-ns") { + namespaceMapping := map[specs.LinuxNamespaceType]int{ + specs.NetworkNamespace: unix.CLONE_NEWNET, + } + + for _, ns := range context.StringSlice("empty-ns") { + f, exists := namespaceMapping[specs.LinuxNamespaceType(ns)] + if !exists { + return nil, fmt.Errorf("namespace %q is not supported", ns) + } + nsmask |= f } - nsmask |= f } - options.EmptyNs = uint32(nsmask) - return nil + opts.EmptyNs = uint32(nsmask) + + return opts, nil } diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index c099e458aed..b498e33bce2 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -1560,11 +1560,8 @@ func (c *Container) criuApplyCgroups(pid int, req *criurpc.CriuReq) error { return err } - if cgroups.IsCgroup2UnifiedMode() { - return nil - } - // the stuff below is cgroupv1-specific - + // TODO(@kolyshkin): should we use c.cgroupManager.GetPaths() + // instead of reading /proc/pid/cgroup? path := fmt.Sprintf("/proc/%d/cgroup", pid) cgroupsPaths, err := cgroups.ParseCgroupFile(path) if err != nil { diff --git a/man/runc-checkpoint.8.md b/man/runc-checkpoint.8.md index 373259d4ccf..a7dad29d3b3 100644 --- a/man/runc-checkpoint.8.md +++ b/man/runc-checkpoint.8.md @@ -57,7 +57,7 @@ together with **criu lazy-pages**. See : Do a pre-dump, i.e. dump container's memory information only, leaving the container running. See [criu iterative migration](https://criu.org/Iterative_migration). -**--manage-cgroups-mode** **soft**|**full**|**strict**. +**--manage-cgroups-mode** **soft**|**full**|**strict**|**ignore**. : Cgroups mode. Default is **soft**. See [criu --manage-cgroups option](https://criu.org/CLI/opt/--manage-cgroups). diff --git a/man/runc-restore.8.md b/man/runc-restore.8.md index a2b3da6c6fa..eab50db9717 100644 --- a/man/runc-restore.8.md +++ b/man/runc-restore.8.md @@ -37,10 +37,15 @@ image files directory. : Allow checkpoint/restore of file locks. See [criu --file-locks option](https://criu.org/CLI/opt/--file-locks). -**--manage-cgroups-mode** **soft**|**full**|**strict**. +**--manage-cgroups-mode** **soft**|**full**|**strict**|**ignore**. : Cgroups mode. Default is **soft**. See [criu --manage-cgroups option](https://criu.org/CLI/opt/--manage-cgroups). +: In particular, to restore the container into a different cgroup, +**--manage-cgroups-mode ignore** must be used during both +**checkpoint** and **restore**, and the _container_id_ (or +**cgroupsPath** property in OCI config, if set) must be changed. + **--bundle**|**-b** _path_ : Path to the root of the bundle directory. Default is current directory. diff --git a/restore.go b/restore.go index ccd1b232bc9..d65afcfc788 100644 --- a/restore.go +++ b/restore.go @@ -3,7 +3,6 @@ package main import ( "os" - "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/userns" "github.com/sirupsen/logrus" "github.com/urfave/cli" @@ -53,7 +52,7 @@ using the runc checkpoint command.`, cli.StringFlag{ Name: "manage-cgroups-mode", Value: "", - Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'", + Usage: "cgroups mode: soft|full|strict|ignore (default: soft)", }, cli.StringFlag{ Name: "bundle, b", @@ -113,9 +112,6 @@ using the runc checkpoint command.`, if err != nil { return err } - if err := setEmptyNsMask(context, options); err != nil { - return err - } status, err := startContainer(context, CT_ACT_RESTORE, options) if err != nil { return err @@ -126,27 +122,3 @@ using the runc checkpoint command.`, return nil }, } - -func criuOptions(context *cli.Context) (*libcontainer.CriuOpts, error) { - imagePath, parentPath, err := prepareImagePaths(context) - if err != nil { - return nil, err - } - - return &libcontainer.CriuOpts{ - ImagesDirectory: imagePath, - WorkDirectory: context.String("work-path"), - ParentImage: parentPath, - LeaveRunning: context.Bool("leave-running"), - TcpEstablished: context.Bool("tcp-established"), - ExternalUnixConnections: context.Bool("ext-unix-sk"), - ShellJob: context.Bool("shell-job"), - FileLocks: context.Bool("file-locks"), - PreDump: context.Bool("pre-dump"), - AutoDedup: context.Bool("auto-dedup"), - LazyPages: context.Bool("lazy-pages"), - StatusFd: context.Int("status-fd"), - LsmProfile: context.String("lsm-profile"), - LsmMountContext: context.String("lsm-mount-context"), - }, nil -} diff --git a/tests/integration/checkpoint.bats b/tests/integration/checkpoint.bats index 615ef8cb65f..0a8e58a2c9c 100644 --- a/tests/integration/checkpoint.bats +++ b/tests/integration/checkpoint.bats @@ -224,7 +224,14 @@ function simple_cr() { # TCP port for lazy migration port=27277 - __runc checkpoint --lazy-pages --page-server 0.0.0.0:${port} --status-fd ${lazy_w} --work-path ./work-dir --image-path ./image-dir test_busybox & + __runc checkpoint \ + --lazy-pages \ + --page-server 0.0.0.0:${port} \ + --status-fd ${lazy_w} \ + --manage-cgroups-mode=ignore \ + --work-path ./work-dir \ + --image-path ./image-dir \ + test_busybox & cpt_pid=$! # wait for lazy page server to be ready @@ -246,14 +253,18 @@ function simple_cr() { lp_pid=$! # Restore lazily from checkpoint. - # The restored container needs a different name (as well as systemd - # unit name, in case systemd cgroup driver is used) as the checkpointed - # container is not yet destroyed. It is only destroyed at that point - # in time when the last page is lazily transferred to the destination. + # + # The restored container needs a different name and a different cgroup + # (and a different systemd unit name, in case systemd cgroup driver is + # used) as the checkpointed container is not yet destroyed. It is only + # destroyed at that point in time when the last page is lazily + # transferred to the destination. + # # Killing the CRIU on the checkpoint side will let the container # continue to run if the migration failed at some point. - [ -v RUNC_USE_SYSTEMD ] && set_cgroups_path - runc_restore_with_pipes ./image-dir test_busybox_restore --lazy-pages + runc_restore_with_pipes ./image-dir test_busybox_restore \ + --lazy-pages \ + --manage-cgroups-mode=ignore wait $cpt_pid @@ -405,3 +416,44 @@ function simple_cr() { # busybox should be back up and running testcontainer test_busybox running } + +@test "checkpoint then restore into a different cgroup (via --manage-cgroups-mode ignore)" { + set_resources_limit + set_cgroups_path + runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox + [ "$status" -eq 0 ] + testcontainer test_busybox running + + local orig_path + orig_path=$(get_cgroup_path "pids") + # Check that the cgroup exists. + test -d "$orig_path" + + runc checkpoint --work-path ./work-dir --manage-cgroups-mode ignore test_busybox + grep -B 5 Error ./work-dir/dump.log || true + [ "$status" -eq 0 ] + testcontainer test_busybox checkpointed + # Check that the cgroup is gone. + ! test -d "$orig_path" + + # Restore into a different cgroup. + set_cgroups_path # Changes the path. + runc restore -d --manage-cgroups-mode ignore --pid-file pid \ + --work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox + grep -B 5 Error ./work-dir/restore.log || true + [ "$status" -eq 0 ] + testcontainer test_busybox running + + # Check that the old cgroup path doesn't exist. + ! test -d "$orig_path" + + # Check that the new path exists. + local new_path + new_path=$(get_cgroup_path "pids") + test -d "$new_path" + + # Check that container's init is in the new cgroup. + local pid + pid=$(cat "pid") + grep -q "${REL_CGROUPS_PATH}$" "/proc/$pid/cgroup" +} diff --git a/tests/integration/helpers.bash b/tests/integration/helpers.bash index a9b70ffd887..f07996450b7 100644 --- a/tests/integration/helpers.bash +++ b/tests/integration/helpers.bash @@ -232,19 +232,27 @@ function set_cgroups_path() { update_config '.linux.cgroupsPath |= "'"${OCI_CGROUPS_PATH}"'"' } -# Get a value from a cgroup file. -function get_cgroup_value() { - local source=$1 - local cgroup var current - +# Get a path to cgroup directory, based on controller name. +# Parameters: +# $1: controller name (like "pids") or a file name (like "pids.max"). +function get_cgroup_path() { if [ -v CGROUP_V2 ]; then - cgroup=$CGROUP_PATH - else - var=${source%%.*} # controller name (e.g. memory) - var=CGROUP_${var^^}_BASE_PATH # variable name (e.g. CGROUP_MEMORY_BASE_PATH) - eval cgroup=\$"${var}${REL_CGROUPS_PATH}" + echo "$CGROUP_PATH" + return fi - cat "$cgroup/$source" + + local var cgroup + var=${1%%.*} # controller name (e.g. memory) + var=CGROUP_${var^^}_BASE_PATH # variable name (e.g. CGROUP_MEMORY_BASE_PATH) + eval cgroup=\$"${var}${REL_CGROUPS_PATH}" + echo "$cgroup" +} + +# Get a value from a cgroup file. +function get_cgroup_value() { + local cgroup + cgroup="$(get_cgroup_path "$1")" + cat "$cgroup/$1" } # Helper to check a if value in a cgroup file matches the expected one.