From 99b03b3141a44522f4d14b9240b06b55c2271556 Mon Sep 17 00:00:00 2001 From: Maximilian Heyne Date: Wed, 22 May 2024 13:41:08 +0000 Subject: [PATCH] agent: retry mount on ENOMEM There is a race between firecracker-containerd replacing the stub drive with the actual drive and mounting this drive. When the disk is replaced the kernel will schedule asynchronous work in virtblk_config_changed. In the meantime firecracker-containerd can proceed and already send a mount command to the agent running in the guest. This mount operation will, however, fail because the guest kernel still sees the stub drive with only 512 bytes in size. The resulting error code is a ENOMEM in this case. This commit therefore adds this as an retryable error code to accommodate for this situation. The issue can be reproduced when an artificial msleep(1000) is added in virtblk_config_changed_work. This produced the following error: error="failed to get stub drive for task \"test\": failed to mount drive inside vm: failed to mount newly patched drive: rpc error: code = Unknown desc = non-retryable failure mounting drive from \"/dev/vdb\" to \"/container/test/rootfs\": cannot allocate memory" Signed-off-by: Maximilian Heyne --- agent/error.go | 4 ++-- agent/error_test.go | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/agent/error.go b/agent/error.go index b2f31bb5f..ef4bd942d 100644 --- a/agent/error.go +++ b/agent/error.go @@ -18,8 +18,8 @@ import ( ) // isRetryableMountError will check to see if the error passed in is an -// syscall.EINVAL +// syscall.EINVAL or syscall.ENOMEM func isRetryableMountError(err error) bool { errno, ok := err.(syscall.Errno) - return ok && errno == syscall.EINVAL + return ok && (errno == syscall.EINVAL || errno == syscall.ENOMEM) } diff --git a/agent/error_test.go b/agent/error_test.go index 1c5878b08..74ab02a30 100644 --- a/agent/error_test.go +++ b/agent/error_test.go @@ -37,6 +37,11 @@ func TestIsRetryableMountError(t *testing.T) { Error: syscall.EINVAL, Expected: true, }, + { + Name: "syscall.Errno ENOMEM case", + Error: syscall.ENOMEM, + Expected: true, + }, { Name: "syscall.Errno ENOENT case", Error: syscall.ENOENT,