From f6461a9eb941665700dd1a681a568bb0b429ee08 Mon Sep 17 00:00:00 2001 From: Will Martin Date: Mon, 22 Jan 2018 17:03:02 +0000 Subject: [PATCH 1/7] Avoid race when opening exec fifo When starting a container with `runc start` or `runc run`, the stub process (runc[2:INIT]) opens a fifo for writing. Its parent runc process will open the same fifo for reading. In this way, they synchronize. If the stub process exits at the wrong time, the parent runc process will block forever. This can happen when racing 2 runc operations against each other: `runc run/start`, and `runc delete`. It could also happen for other reasons, e.g. the kernel's OOM killer may select the stub process. This commit resolves this race by racing the opening of the exec fifo from the runc parent process against the stub process exiting. If the stub process exits before we open the fifo, we return an error. Another solution is to wait on the stub process. However, it seems it would require more refactoring to avoid calling wait multiple times on the same process, which is an error. Signed-off-by: Craig Furman (cherry picked from commit 8d3e6c9826815cf6f75dbd56c7b5a23fb5666108) Signed-off-by: Andrew Hsu --- libcontainer/container_linux.go | 69 ++++++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 9 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 173a3f0ffaa..22373e6f1c8 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -5,6 +5,7 @@ package libcontainer import ( "bytes" "encoding/json" + "errors" "fmt" "io" "io/ioutil" @@ -233,20 +234,70 @@ func (c *linuxContainer) Exec() error { func (c *linuxContainer) exec() error { path := filepath.Join(c.root, execFifoFilename) - f, err := os.OpenFile(path, os.O_RDONLY, 0) - if err != nil { - return newSystemErrorWithCause(err, "open exec fifo for reading") + + fifoOpen := make(chan struct{}) + select { + case <-awaitProcessExit(c.initProcess.pid(), fifoOpen): + return errors.New("container process is already dead") + case result := <-awaitFifoOpen(path): + close(fifoOpen) + if result.err != nil { + return result.err + } + f := result.file + defer f.Close() + if err := readFromExecFifo(f); err != nil { + return err + } + return os.Remove(path) } - defer f.Close() - data, err := ioutil.ReadAll(f) +} + +func readFromExecFifo(execFifo io.Reader) error { + data, err := ioutil.ReadAll(execFifo) if err != nil { return err } - if len(data) > 0 { - os.Remove(path) - return nil + if len(data) <= 0 { + return fmt.Errorf("cannot start an already running container") } - return fmt.Errorf("cannot start an already running container") + return nil +} + +func awaitProcessExit(pid int, exit <-chan struct{}) <-chan struct{} { + isDead := make(chan struct{}) + go func() { + for { + select { + case <-exit: + return + case <-time.After(time.Millisecond * 100): + stat, err := system.Stat(pid) + if err != nil || stat.State == system.Zombie { + close(isDead) + return + } + } + } + }() + return isDead +} + +func awaitFifoOpen(path string) <-chan openResult { + fifoOpened := make(chan openResult) + go func() { + f, err := os.OpenFile(path, os.O_RDONLY, 0) + if err != nil { + fifoOpened <- openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")} + } + fifoOpened <- openResult{file: f} + }() + return fifoOpened +} + +type openResult struct { + file *os.File + err error } func (c *linuxContainer) start(process *Process, isInit bool) error { From 0aa398022730a49fd3ec1b32ec67556ed0849286 Mon Sep 17 00:00:00 2001 From: Ed King Date: Tue, 23 Jan 2018 10:46:31 +0000 Subject: [PATCH 2/7] Return from goroutine when it should terminate Signed-off-by: Craig Furman (cherry picked from commit 5c0af14bf8925b845d91cb94fc1d5ab18140a81a) Signed-off-by: Andrew Hsu --- libcontainer/container_linux.go | 1 + 1 file changed, 1 insertion(+) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 22373e6f1c8..066a38db9b1 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -289,6 +289,7 @@ func awaitFifoOpen(path string) <-chan openResult { f, err := os.OpenFile(path, os.O_RDONLY, 0) if err != nil { fifoOpened <- openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")} + return } fifoOpened <- openResult{file: f} }() From ced7994a0ffa52be5782e5e82ced39de1694023a Mon Sep 17 00:00:00 2001 From: Andrew Hsu Date: Wed, 31 Jan 2018 18:30:03 -0800 Subject: [PATCH 3/7] Do not run shfmt on travis Signed-off-by: Tibor Vass --- .travis.yml | 2 +- Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 244c6439a27..e025f0e5443 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,7 +22,7 @@ before_install: - sudo apt-get install -y libseccomp-dev libapparmor-dev - go get -u github.com/golang/lint/golint - go get -u github.com/vbatts/git-validation - - go get -u github.com/mvdan/sh/cmd/shfmt +# - go get -u mvdan.cc/sh/cmd/shfmt # disabled because Dockerfile uses older version of shfmt - env | grep TRAVIS_ script: diff --git a/Makefile b/Makefile index d6d337dab5c..45d70f97dd9 100644 --- a/Makefile +++ b/Makefile @@ -133,7 +133,7 @@ clean: validate: script/validate-gofmt - script/validate-shfmt + [[ -z "$(TRAVIS)" ]] && script/validate-shfmt || true go vet $(allpackages) ci: validate localtest From af29c4190330722c1605d69623e1d3deb28c0c4a Mon Sep 17 00:00:00 2001 From: Andrew Hsu Date: Fri, 2 Feb 2018 00:36:16 +0000 Subject: [PATCH 4/7] Fix compile errors and port TestParseState Signed-off-by: Andrew Hsu Signed-off-by: Tibor Vass --- libcontainer/container_linux.go | 43 ++++++++++++++++++++++++++-- libcontainer/container_linux_test.go | 18 ++++++++++++ 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 066a38db9b1..fac107727d3 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -13,6 +13,7 @@ import ( "os/exec" "path/filepath" "reflect" + "strconv" "strings" "sync" "syscall" @@ -272,8 +273,8 @@ func awaitProcessExit(pid int, exit <-chan struct{}) <-chan struct{} { case <-exit: return case <-time.After(time.Millisecond * 100): - stat, err := system.Stat(pid) - if err != nil || stat.State == system.Zombie { + // if process became zombie, signal death + if state, err := pidState(pid); err != nil || state == 'Z' { close(isDead) return } @@ -1621,3 +1622,41 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na return bytes.NewReader(r.Serialize()), nil } + +// pidState returns the state code as specified in /proc/$pid/stat +// This function was created only to backport https://github.com/opencontainers/runc/pull/1698 +// without having to also backport https://github.com/opencontainers/runc/pull/1489 +func pidState(pid int) (int, error) { + bytes, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat")) + if err != nil { + return 0, err + } + + return parseState(string(bytes)) +} + +// parseState parses the state code from a string in the format of /proc/$pid/stat +// This function was created only to backport https://github.com/opencontainers/runc/pull/1698 +// without having to also backport https://github.com/opencontainers/runc/pull/1489 +func parseState(data string) (int, error) { + // From proc(5), field 2 could contain space and is inside `(` and `)`. + // The following is an example: + // 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0 + i := strings.LastIndex(data, ")") + if i <= 2 || i >= len(data)-1 { + return 0, fmt.Errorf("invalid stat data: %q", data) + } + + parts := strings.SplitN(data[:i], "(", 2) + if len(parts) != 2 { + return 0, fmt.Errorf("invalid stat data: %q", data) + } + + // parts indexes should be offset by 3 from the field number given + // proc(5), because parts is zero-indexed and we've removed fields + // one (PID) and two (Name) in the paren-split. + parts = strings.SplitN(data[i+2:], " ", 2) + var state int + fmt.Sscanf(parts[3-3], "%c", &state) + return state, nil +} diff --git a/libcontainer/container_linux_test.go b/libcontainer/container_linux_test.go index b7ce552ef02..68b45bfea7f 100644 --- a/libcontainer/container_linux_test.go +++ b/libcontainer/container_linux_test.go @@ -216,3 +216,21 @@ func TestGetContainerState(t *testing.T) { } } } + +func TestParseState(t *testing.T) { + data := map[string]int{ + "4902 (gunicorn: maste) S 4885 4902 4902 0 -1 4194560 29683 29929 61 83 78 16 96 17 20 0 1 0 9126532 52965376 1903 18446744073709551615 4194304 7461796 140733928751520 140733928698072 139816984959091 0 0 16781312 137447943 1 0 0 17 3 0 0 9 0 0 9559488 10071156 33050624 140733928758775 140733928758945 140733928758945 140733928759264 0": 'S', + "9534 (cat) R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": 'R', + + "24767 (irq/44-mei_me) S 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 -51 0 1 0 8722075 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 1 50 1 0 0 0 0 0 0 0 0 0 0 0": 'S', + } + for line, expected := range data { + state, err := parseState(line) + if err != nil { + t.Fatal(err) + } + if state != expected { + t.Fatalf("expected state %q but received %q", expected, state) + } + } +} From 2931f19cf11838dc2a34991fa2a7234f9b1e5c35 Mon Sep 17 00:00:00 2001 From: Tibor Vass Date: Mon, 12 Mar 2018 23:57:07 +0000 Subject: [PATCH 5/7] Use go_import_path in travis config Signed-off-by: Tibor Vass --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index e025f0e5443..7260c1e447f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,8 @@ go: - 1.8.x - tip +go_import_path: github.com/opencontainers/runc + matrix: allow_failures: - go: tip From 0aed39c82e655a73f52cd63064265c64c7da0407 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 28 Feb 2018 23:31:10 +0000 Subject: [PATCH 6/7] tests: allow to load kernel modules from a test container CRIU needs to load a few modules to checkpoint/resume containers. https://github.com/opencontainers/runc/issues/1745 Signed-off-by: Andrei Vagin --- Dockerfile | 1 + Makefile | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index fd9be94c098..f9bacc6efe0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,6 +20,7 @@ RUN apt-get update && apt-get install -y \ protobuf-c-compiler \ protobuf-compiler \ python-minimal \ + kmod \ --no-install-recommends \ && apt-get clean diff --git a/Makefile b/Makefile index 45d70f97dd9..877b9da6025 100644 --- a/Makefile +++ b/Makefile @@ -85,13 +85,13 @@ localtest: make localunittest localintegration localrootlessintegration unittest: runcimage - docker run -e TESTFLAGS -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localunittest + docker run -e TESTFLAGS -t --privileged --rm -v /lib/modules:/lib/modules:ro -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localunittest localunittest: all go test -timeout 3m -tags "$(BUILDTAGS)" ${TESTFLAGS} -v $(allpackages) integration: runcimage - docker run -e TESTFLAGS -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localintegration + docker run -e TESTFLAGS -t --privileged --rm -v /lib/modules:/lib/modules:ro -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localintegration localintegration: all bats -t tests/integration${TESTFLAGS} From f6bb33533da419b184fc5f9d08e5c2a12b4057d8 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Wed, 7 Jun 2017 21:08:44 -0700 Subject: [PATCH 7/7] libcontainer/console_linux.go: Make SaneTerminal public And use it only in local tooling that is forwarding the pseudoterminal master. That way runC no longer has an opinion on the onlcr setting for folks who are creating a terminal and detaching. They'll use --console-socket and can setup the pseudoterminal however they like without runC having an opinion. With this commit, the only cases where runC still has applies SaneTerminal is when *it* is the process consuming the master descriptor. Signed-off-by: W. Trevor King (cherry picked from commit 830c0d70df9143d13c7ac3d15614ccde50449b49) Signed-off-by: Tibor Vass --- contrib/cmd/recvtty/recvtty.go | 4 ++++ libcontainer/console_linux.go | 4 ++-- libcontainer/integration/execin_test.go | 1 + tty.go | 3 +++ 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/contrib/cmd/recvtty/recvtty.go b/contrib/cmd/recvtty/recvtty.go index 177a89359ed..ebb69197c3e 100644 --- a/contrib/cmd/recvtty/recvtty.go +++ b/contrib/cmd/recvtty/recvtty.go @@ -24,6 +24,7 @@ import ( "os" "strings" + "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/utils" "github.com/urfave/cli" ) @@ -100,6 +101,9 @@ func handleSingle(path string) error { if err != nil { return err } + if err = libcontainer.SaneTerminal(master); err != nil { + return err + } // Copy from our stdio to the master fd. quitChan := make(chan struct{}) diff --git a/libcontainer/console_linux.go b/libcontainer/console_linux.go index 5927bdc0fb2..477b5602e76 100644 --- a/libcontainer/console_linux.go +++ b/libcontainer/console_linux.go @@ -130,12 +130,12 @@ func ptsname(f *os.File) (string, error) { return fmt.Sprintf("/dev/pts/%d", n), nil } -// saneTerminal sets the necessary tty_ioctl(4)s to ensure that a pty pair +// SaneTerminal sets the necessary tty_ioctl(4)s to ensure that a pty pair // created by us acts normally. In particular, a not-very-well-known default of // Linux unix98 ptys is that they have +onlcr by default. While this isn't a // problem for terminal emulators, because we relay data from the terminal we // also relay that funky line discipline. -func saneTerminal(terminal *os.File) error { +func SaneTerminal(terminal *os.File) error { // Go doesn't have a wrapper for any of the termios ioctls. var termios unix.Termios diff --git a/libcontainer/integration/execin_test.go b/libcontainer/integration/execin_test.go index 019757f643c..366214fad81 100644 --- a/libcontainer/integration/execin_test.go +++ b/libcontainer/integration/execin_test.go @@ -299,6 +299,7 @@ func TestExecInTTY(t *testing.T) { err: err, } } + libcontainer.SaneTerminal(f) dc <- &cdata{ c: libcontainer.ConsoleFromFile(f), } diff --git a/tty.go b/tty.go index 9824df144c5..52e0254a1ca 100644 --- a/tty.go +++ b/tty.go @@ -74,6 +74,9 @@ func (t *tty) recvtty(process *libcontainer.Process, socket *os.File) error { if err != nil { return err } + if err = libcontainer.SaneTerminal(f); err != nil { + return err + } console := libcontainer.ConsoleFromFile(f) go io.Copy(console, os.Stdin) t.wg.Add(1)