Skip to content

Commit

Permalink
seccomp: Add wait_killable semantic to seccomp user notifier
Browse files Browse the repository at this point in the history
The user notifier feature allows for filtering of seccomp notifications in
userspace. While the user notifier is handling the syscall, the notifying
process can be preempted, thus ending the notification. This has become a
growing problem, as Golang has adopted signal based async preemption[1]. In
this, it will preempt every 10ms, thus leaving the supervisor less than
10ms to respond to a given notification. If the syscall require I/O (mount,
connect) on behalf of the process, it can easily take 10ms.

This allows the supervisor to set a flag that moves the process into a
state where it is only killable by terminating signals as opposed to all
signals.

Signed-off-by: Sargun Dhillon <sargun@sargun.me>

[1]: golang/go#24543
  • Loading branch information
sargun authored and intel-lab-lkp committed Feb 20, 2021
1 parent 53a68d0 commit b2cc7b4
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 6 deletions.
10 changes: 10 additions & 0 deletions include/uapi/linux/seccomp.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,16 @@ struct seccomp_notif_sizes {
__u16 seccomp_data;
};

/*
* Valid flags for struct seccomp_notif
*
* SECCOMP_USER_NOTIF_FLAG_WAIT_KILLABLE
*
* Prevent the notifying process from being interrupted by non-fatal, unmasked
* signals.
*/
#define SECCOMP_USER_NOTIF_FLAG_WAIT_KILLABLE (1UL << 0)

struct seccomp_notif {
__u64 id;
__u32 pid;
Expand Down
35 changes: 29 additions & 6 deletions kernel/seccomp.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ struct seccomp_knotif {

/* outstanding addfd requests */
struct list_head addfd;

bool wait_killable;
};

/**
Expand Down Expand Up @@ -1082,6 +1084,7 @@ static int seccomp_do_user_notification(int this_syscall,
long ret = 0;
struct seccomp_knotif n = {};
struct seccomp_kaddfd *addfd, *tmp;
bool wait_killable = false;

mutex_lock(&match->notify_lock);
err = -ENOSYS;
Expand All @@ -1103,8 +1106,14 @@ static int seccomp_do_user_notification(int this_syscall,
* This is where we wait for a reply from userspace.
*/
do {
wait_killable = n.state == SECCOMP_NOTIFY_SENT &&
n.wait_killable;

mutex_unlock(&match->notify_lock);
err = wait_for_completion_interruptible(&n.ready);
if (wait_killable)
err = wait_for_completion_killable(&n.ready);
else
err = wait_for_completion_interruptible(&n.ready);
mutex_lock(&match->notify_lock);
if (err != 0)
goto interrupted;
Expand Down Expand Up @@ -1422,14 +1431,16 @@ static long seccomp_notify_recv(struct seccomp_filter *filter,
struct seccomp_notif unotif;
ssize_t ret;

ret = copy_from_user(&unotif, buf, sizeof(unotif));
if (ret)
return -EFAULT;

/* Verify that we're not given garbage to keep struct extensible. */
ret = check_zeroed_user(buf, sizeof(unotif));
if (ret < 0)
return ret;
if (!ret)
if (unotif.flags & ~(SECCOMP_USER_NOTIF_FLAG_WAIT_KILLABLE))
return -EINVAL;

memset(&unotif, 0, sizeof(unotif));
if (unotif.id || unotif.pid)
return -EINVAL;

ret = down_interruptible(&filter->notif->request);
if (ret < 0)
Expand Down Expand Up @@ -1457,6 +1468,12 @@ static long seccomp_notify_recv(struct seccomp_filter *filter,
unotif.pid = task_pid_vnr(knotif->task);
unotif.data = *(knotif->data);

if (unotif.flags & SECCOMP_USER_NOTIF_FLAG_WAIT_KILLABLE) {
knotif->wait_killable = true;
complete(&knotif->ready);
}


knotif->state = SECCOMP_NOTIFY_SENT;
wake_up_poll(&filter->wqh, EPOLLOUT | EPOLLWRNORM);
ret = 0;
Expand All @@ -1475,6 +1492,12 @@ static long seccomp_notify_recv(struct seccomp_filter *filter,
mutex_lock(&filter->notify_lock);
knotif = find_notification(filter, unotif.id);
if (knotif) {
/* Reset the waiting state */
if (knotif->wait_killable) {
knotif->wait_killable = false;
complete(&knotif->ready);
}

knotif->state = SECCOMP_NOTIFY_INIT;
up(&filter->notif->request);
}
Expand Down

0 comments on commit b2cc7b4

Please sign in to comment.