tests/checkpoint.bats: fix test hang/failure

Commit a9e15e7 adds a check that stdin/out/err pipes are restored correctly. Commit ec26065 copy/pastes the same code to one more another test. Problem is (as pointed out in commit 5369f9a) these tests sometimes hang. I have also seen them fail. Apparently, the code used to create pipes and open them to fds is racy: ```shell cat $fifo | cat $fifo & pid=$! exec 50</proc/$pid/fd/0 exec 51>/proc/$pid/fd/0 ``` Since `cat | cat` is spawned asynchronously, by the time exec is used, the second cat process (i.e. $pid) is already fork'ed but it might not be exec'ed yet. As a result, we get this (`ls -l /proc/self/fd`): ``` lr-x------. 1 root root 64 Apr 20 02:39 50 -> /dev/pts/1 l-wx------. 1 root root 64 Apr 20 02:39 51 -> /dev/pts/1 ``` or, in some cases: ``` lr-x------. 1 root root 64 Apr 20 02:45 50 -> /dev/pts/1 l-wx------. 1 root root 64 Apr 20 02:45 51 -> 'pipe:[215791]' ``` instead of expected set of pipes: ``` > lr-x------. 1 root root 64 Apr 20 02:45 50 -> 'pipe:[215791]' > l-wx------. 1 root root 64 Apr 20 02:45 51 -> 'pipe:[215791]' ``` One possible workaround is to add `sleep 0.1` or so after cat|cat, but it is outright ugly (besides, we already have one sleep in the test code). The solution is to not use any external processes to create pipes. I admit this still looks not very comprehensible, but at least it is easier than before, and it works. While at it, remove code duplication, moving the setup and check code into a pair of functions. Finally, since the tests are working now, remove the skip. Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
opencontainers · Apr 21, 2020 · d5e68ce · d5e68ce
1 parent bf172ef
commit d5e68ce
Showing 1 changed file with 32 additions and 61 deletions.
diff --git a/tests/integration/checkpoint.bats b/tests/integration/checkpoint.bats
@@ -3,9 +3,6 @@
 load helpers
 
 function setup() {
-  if [[ -n "${RUNC_USE_SYSTEMD}" ]] ; then
-    skip "CRIU test suite is skipped on systemd cgroup driver for now."
-  fi
   # All checkpoint tests are currently failing on v2
   requires cgroups_v1
   # XXX: currently criu require root containers.
@@ -19,6 +16,33 @@ function teardown() {
   teardown_busybox
 }
 
+function setup_pipes() {
+	# The changes to 'terminal' are needed for running in detached mode
+	sed -i 's;"terminal": true;"terminal": false;' config.json
+	sed -i 's/"sh"/"sh","-c","for i in `seq 10`; do read xxx || continue; echo ponG $xxx; done"/' config.json
+
+	# Create two sets of pipes
+	# for stdout/stderr
+	exec 52<> <(:)
+	exec 50</proc/self/fd/52
+	exec 51>/proc/self/fd/52
+	exec 52>&-
+	# ... and stdin
+	exec 62<> <(:)
+	exec 60</proc/self/fd/62
+	exec 61>/proc/self/fd/62
+	exec 62>&-
+}
+
+function check_pipes() {
+	echo Ping >&61
+	exec 61>&-
+	exec 51>&-
+	run cat <&50
+	[ "$status" -eq 0 ]
+	[[ "${output}" == *"ponG Ping"* ]]
+}
+
 @test "checkpoint and restore" {
   runc run -d --console-socket $CONSOLE_SOCKET test_busybox
   [ "$status" -eq 0 ]
@@ -46,29 +70,7 @@ function teardown() {
 }
 
 @test "checkpoint --pre-dump and restore" {
-  # The changes to 'terminal' are needed for running in detached mode
-  sed -i 's;"terminal": true;"terminal": false;' config.json
-  sed -i 's/"sh"/"sh","-c","for i in `seq 10`; do read xxx || continue; echo ponG $xxx; done"/' config.json
-
-  # The following code creates pipes for stdin and stdout.
-  # CRIU can't handle fifo-s, so we need all these tricks.
-  fifo=`mktemp -u /tmp/runc-fifo-XXXXXX`
-  mkfifo $fifo
-
-  # stdout
-  cat $fifo | cat $fifo &
-  pid=$!
-  exec 50</proc/$pid/fd/0
-  exec 51>/proc/$pid/fd/0
-
-  # stdin
-  cat $fifo | cat $fifo &
-  pid=$!
-  exec 60</proc/$pid/fd/0
-  exec 61>/proc/$pid/fd/0
-
-  echo -n > $fifo
-  unlink $fifo
+  setup_pipes
 
   # run busybox
   __runc run -d test_busybox <&60 >&51 2>&51
@@ -107,12 +109,7 @@ function teardown() {
   [ "$status" -eq 0 ]
   [[ ${output} == "ok" ]]
 
-  echo Ping >&61
-  exec 61>&-
-  exec 51>&-
-  run cat <&50
-  [ "$status" -eq 0 ]
-  [[ "${output}" == *"ponG Ping"* ]]
+  check_pipes
 }
 
 @test "checkpoint --lazy-pages and restore" {
@@ -122,16 +119,10 @@ function teardown() {
     skip "this criu does not support lazy migration"
   fi
 
-  # The changes to 'terminal' are needed for running in detached mode
-  sed -i 's;"terminal": true;"terminal": false;' config.json
+  setup_pipes
+
   # This should not be necessary: https://github.com/checkpoint-restore/criu/issues/575
   sed -i 's;"readonly": true;"readonly": false;' config.json
-  sed -i 's/"sh"/"sh","-c","for i in `seq 10`; do read xxx || continue; echo ponG $xxx; done"/' config.json
-
-  # The following code creates pipes for stdin and stdout.
-  # CRIU can't handle fifo-s, so we need all these tricks.
-  fifo=`mktemp -u /tmp/runc-fifo-XXXXXX`
-  mkfifo $fifo
 
   # For lazy migration we need to know when CRIU is ready to serve
   # the memory pages via TCP.
@@ -141,21 +132,6 @@ function teardown() {
   # TCP port for lazy migration
   port=27277
 
-  # stdout
-  cat $fifo | cat $fifo &
-  pid=$!
-  exec 50</proc/$pid/fd/0
-  exec 51>/proc/$pid/fd/0
-
-  # stdin
-  cat $fifo | cat $fifo &
-  pid=$!
-  exec 60</proc/$pid/fd/0
-  exec 61>/proc/$pid/fd/0
-
-  echo -n > $fifo
-  unlink $fifo
-
   # run busybox
   __runc run -d test_busybox <&60 >&51 2>&51
   [ $? -eq 0 ]
@@ -212,12 +188,7 @@ function teardown() {
   [ "$status" -eq 0 ]
   [[ ${output} == "ok" ]]
 
-  echo Ping >&61
-  exec 61>&-
-  exec 51>&-
-  run cat <&50
-  [ "$status" -eq 0 ]
-  [[ "${output}" == *"ponG Ping"* ]]
+  check_pipes
 }
 
 @test "checkpoint and restore in external network namespace" {