Skip to content

Commit

Permalink
pythongh-104372: Cleanup _posixsubprocess make_inheritable for asyn…
Browse files Browse the repository at this point in the history
…c signal safety and no GIL requirement (python#104518)

Move all of the Python C API calls into the parent process up front
instead of doing PyLong_AsLong and PyErr_Occurred and PyTuple_GET from
the post-fork/vfork child process.

Much of this was long overdue. We shouldn't have been using PyTuple and
PyLong APIs within all of these low level functions anyways.
  • Loading branch information
gpshead authored and JelleZijlstra committed May 18, 2023
1 parent 4fea66d commit 4b2bf16
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 34 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Refactored the ``_posixsubprocess`` internals to avoid Python C API usage between fork and exec when marking ``pass_fds=`` file descriptors inheritable.
125 changes: 91 additions & 34 deletions Modules/_posixsubprocess.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,16 +160,17 @@ _sanity_check_python_fd_sequence(PyObject *fd_sequence)

/* Is fd found in the sorted Python Sequence? */
static int
_is_fd_in_sorted_fd_sequence(int fd, PyObject *fd_sequence)
_is_fd_in_sorted_fd_sequence(int fd, int *fd_sequence,
Py_ssize_t fd_sequence_len)
{
/* Binary search. */
Py_ssize_t search_min = 0;
Py_ssize_t search_max = PyTuple_GET_SIZE(fd_sequence) - 1;
Py_ssize_t search_max = fd_sequence_len - 1;
if (search_max < 0)
return 0;
do {
long middle = (search_min + search_max) / 2;
long middle_fd = PyLong_AsLong(PyTuple_GET_ITEM(fd_sequence, middle));
long middle_fd = fd_sequence[middle];
if (fd == middle_fd)
return 1;
if (fd > middle_fd)
Expand All @@ -180,24 +181,56 @@ _is_fd_in_sorted_fd_sequence(int fd, PyObject *fd_sequence)
return 0;
}

/*
* Do all the Python C API calls in the parent process to turn the pass_fds
* "py_fds_to_keep" tuple into a C array. The caller owns allocation and
* freeing of the array.
*
* On error an unknown number of array elements may have been filled in.
* A Python exception has been set when an error is returned.
*
* Returns: -1 on error, 0 on success.
*/
static int
make_inheritable(PyObject *py_fds_to_keep, int errpipe_write)
convert_fds_to_keep_to_c(PyObject *py_fds_to_keep, int *c_fds_to_keep)
{
Py_ssize_t i, len;

len = PyTuple_GET_SIZE(py_fds_to_keep);
for (i = 0; i < len; ++i) {
PyObject* fdobj = PyTuple_GET_ITEM(py_fds_to_keep, i);
long fd = PyLong_AsLong(fdobj);
assert(!PyErr_Occurred());
assert(0 <= fd && fd <= INT_MAX);
if (PyErr_Occurred()) {
return -1;
}
if (fd < 0 || fd > INT_MAX) {
PyErr_SetString(PyExc_ValueError,
"fd out of range in fds_to_keep.");
return -1;
}
c_fds_to_keep[i] = (int)fd;
}
return 0;
}


/* This function must be async-signal-safe as it is called from child_exec()
* after fork() or vfork().
*/
static int
make_inheritable(int *c_fds_to_keep, Py_ssize_t len, int errpipe_write)
{
Py_ssize_t i;

for (i = 0; i < len; ++i) {
int fd = c_fds_to_keep[i];
if (fd == errpipe_write) {
/* errpipe_write is part of py_fds_to_keep. It must be closed at
/* errpipe_write is part of fds_to_keep. It must be closed at
exec(), but kept open in the child process until exec() is
called. */
continue;
}
if (_Py_set_inheritable_async_safe((int)fd, 1, NULL) < 0)
if (_Py_set_inheritable_async_safe(fd, 1, NULL) < 0)
return -1;
}
return 0;
Expand Down Expand Up @@ -233,7 +266,7 @@ safe_get_max_fd(void)


/* Close all file descriptors in the given range except for those in
* py_fds_to_keep by invoking closer on each subrange.
* fds_to_keep by invoking closer on each subrange.
*
* If end_fd == -1, it's guessed via safe_get_max_fd(), but it isn't
* possible to know for sure what the max fd to go up to is for
Expand All @@ -243,19 +276,18 @@ safe_get_max_fd(void)
static int
_close_range_except(int start_fd,
int end_fd,
PyObject *py_fds_to_keep,
int *fds_to_keep,
Py_ssize_t fds_to_keep_len,
int (*closer)(int, int))
{
if (end_fd == -1) {
end_fd = Py_MIN(safe_get_max_fd(), INT_MAX);
}
Py_ssize_t num_fds_to_keep = PyTuple_GET_SIZE(py_fds_to_keep);
Py_ssize_t keep_seq_idx;
/* As py_fds_to_keep is sorted we can loop through the list closing
/* As fds_to_keep is sorted we can loop through the list closing
* fds in between any in the keep list falling within our range. */
for (keep_seq_idx = 0; keep_seq_idx < num_fds_to_keep; ++keep_seq_idx) {
PyObject* py_keep_fd = PyTuple_GET_ITEM(py_fds_to_keep, keep_seq_idx);
int keep_fd = PyLong_AsLong(py_keep_fd);
for (keep_seq_idx = 0; keep_seq_idx < fds_to_keep_len; ++keep_seq_idx) {
int keep_fd = fds_to_keep[keep_seq_idx];
if (keep_fd < start_fd)
continue;
if (closer(start_fd, keep_fd - 1) != 0)
Expand Down Expand Up @@ -295,7 +327,7 @@ _brute_force_closer(int first, int last)
}

/* Close all open file descriptors in the range from start_fd and higher
* Do not close any in the sorted py_fds_to_keep list.
* Do not close any in the sorted fds_to_keep list.
*
* This version is async signal safe as it does not make any unsafe C library
* calls, malloc calls or handle any locks. It is _unfortunate_ to be forced
Expand All @@ -310,14 +342,16 @@ _brute_force_closer(int first, int last)
* it with some cpp #define magic to work on other OSes as well if you want.
*/
static void
_close_open_fds_safe(int start_fd, PyObject* py_fds_to_keep)
_close_open_fds_safe(int start_fd, int *fds_to_keep, Py_ssize_t fds_to_keep_len)
{
int fd_dir_fd;

fd_dir_fd = _Py_open_noraise(FD_DIR, O_RDONLY);
if (fd_dir_fd == -1) {
/* No way to get a list of open fds. */
_close_range_except(start_fd, -1, py_fds_to_keep, _brute_force_closer);
_close_range_except(start_fd, -1,
fds_to_keep, fds_to_keep_len,
_brute_force_closer);
return;
} else {
char buffer[sizeof(struct linux_dirent64)];
Expand All @@ -336,7 +370,8 @@ _close_open_fds_safe(int start_fd, PyObject* py_fds_to_keep)
if ((fd = _pos_int_from_ascii(entry->d_name)) < 0)
continue; /* Not a number. */
if (fd != fd_dir_fd && fd >= start_fd &&
!_is_fd_in_sorted_fd_sequence(fd, py_fds_to_keep)) {
!_is_fd_in_sorted_fd_sequence(fd, fds_to_keep,
fds_to_keep_len)) {
close(fd);
}
}
Expand All @@ -357,7 +392,7 @@ _unsafe_closer(int first, int last)
}

/* Close all open file descriptors from start_fd and higher.
* Do not close any in the sorted py_fds_to_keep tuple.
* Do not close any in the sorted fds_to_keep tuple.
*
* This function violates the strict use of async signal safe functions. :(
* It calls opendir(), readdir() and closedir(). Of these, the one most
Expand All @@ -370,11 +405,13 @@ _unsafe_closer(int first, int last)
* http://womble.decadent.org.uk/readdir_r-advisory.html
*/
static void
_close_open_fds_maybe_unsafe(int start_fd, PyObject* py_fds_to_keep)
_close_open_fds_maybe_unsafe(int start_fd, int *fds_to_keep,
Py_ssize_t fds_to_keep_len)
{
DIR *proc_fd_dir;
#ifndef HAVE_DIRFD
while (_is_fd_in_sorted_fd_sequence(start_fd, py_fds_to_keep)) {
while (_is_fd_in_sorted_fd_sequence(start_fd, fds_to_keep,
fds_to_keep_len)) {
++start_fd;
}
/* Close our lowest fd before we call opendir so that it is likely to
Expand All @@ -393,7 +430,8 @@ _close_open_fds_maybe_unsafe(int start_fd, PyObject* py_fds_to_keep)
proc_fd_dir = opendir(FD_DIR);
if (!proc_fd_dir) {
/* No way to get a list of open fds. */
_close_range_except(start_fd, -1, py_fds_to_keep, _unsafe_closer);
_close_range_except(start_fd, -1, fds_to_keep, fds_to_keep_len,
_unsafe_closer);
} else {
struct dirent *dir_entry;
#ifdef HAVE_DIRFD
Expand All @@ -407,14 +445,16 @@ _close_open_fds_maybe_unsafe(int start_fd, PyObject* py_fds_to_keep)
if ((fd = _pos_int_from_ascii(dir_entry->d_name)) < 0)
continue; /* Not a number. */
if (fd != fd_used_by_opendir && fd >= start_fd &&
!_is_fd_in_sorted_fd_sequence(fd, py_fds_to_keep)) {
!_is_fd_in_sorted_fd_sequence(fd, fds_to_keep,
fds_to_keep_len)) {
close(fd);
}
errno = 0;
}
if (errno) {
/* readdir error, revert behavior. Highly Unlikely. */
_close_range_except(start_fd, -1, py_fds_to_keep, _unsafe_closer);
_close_range_except(start_fd, -1, fds_to_keep, fds_to_keep_len,
_unsafe_closer);
}
closedir(proc_fd_dir);
}
Expand Down Expand Up @@ -442,16 +482,16 @@ _close_range_closer(int first, int last)
#endif

static void
_close_open_fds(int start_fd, PyObject* py_fds_to_keep)
_close_open_fds(int start_fd, int *fds_to_keep, Py_ssize_t fds_to_keep_len)
{
#ifdef HAVE_ASYNC_SAFE_CLOSE_RANGE
if (_close_range_except(
start_fd, INT_MAX, py_fds_to_keep,
start_fd, INT_MAX, fds_to_keep, fds_to_keep_len,
_close_range_closer) == 0) {
return;
}
#endif
_close_open_fds_fallback(start_fd, py_fds_to_keep);
_close_open_fds_fallback(start_fd, fds_to_keep, fds_to_keep_len);
}

#ifdef VFORK_USABLE
Expand Down Expand Up @@ -544,7 +584,7 @@ child_exec(char *const exec_array[],
Py_ssize_t extra_group_size, const gid_t *extra_groups,
uid_t uid, int child_umask,
const void *child_sigmask,
PyObject *py_fds_to_keep,
int *fds_to_keep, Py_ssize_t fds_to_keep_len,
PyObject *preexec_fn,
PyObject *preexec_fn_args_tuple)
{
Expand All @@ -554,7 +594,7 @@ child_exec(char *const exec_array[],
/* Buffer large enough to hold a hex integer. We can't malloc. */
char hex_errno[sizeof(saved_errno)*2+1];

if (make_inheritable(py_fds_to_keep, errpipe_write) < 0)
if (make_inheritable(fds_to_keep, fds_to_keep_len, errpipe_write) < 0)
goto error;

/* Close parent's pipe ends. */
Expand Down Expand Up @@ -676,7 +716,7 @@ child_exec(char *const exec_array[],
/* close FDs after executing preexec_fn, which might open FDs */
if (close_fds) {
/* TODO HP-UX could use pstat_getproc() if anyone cares about it. */
_close_open_fds(3, py_fds_to_keep);
_close_open_fds(3, fds_to_keep, fds_to_keep_len);
}

/* This loop matches the Lib/os.py _execvpe()'s PATH search when */
Expand Down Expand Up @@ -750,7 +790,7 @@ do_fork_exec(char *const exec_array[],
Py_ssize_t extra_group_size, const gid_t *extra_groups,
uid_t uid, int child_umask,
const void *child_sigmask,
PyObject *py_fds_to_keep,
int *fds_to_keep, Py_ssize_t fds_to_keep_len,
PyObject *preexec_fn,
PyObject *preexec_fn_args_tuple)
{
Expand Down Expand Up @@ -801,7 +841,8 @@ do_fork_exec(char *const exec_array[],
close_fds, restore_signals, call_setsid, pgid_to_set,
gid, extra_group_size, extra_groups,
uid, child_umask, child_sigmask,
py_fds_to_keep, preexec_fn, preexec_fn_args_tuple);
fds_to_keep, fds_to_keep_len,
preexec_fn, preexec_fn_args_tuple);
_exit(255);
return 0; /* Dead code to avoid a potential compiler warning. */
}
Expand Down Expand Up @@ -881,6 +922,8 @@ subprocess_fork_exec_impl(PyObject *module, PyObject *process_args,
Py_ssize_t extra_group_size = 0;
int need_after_fork = 0;
int saved_errno = 0;
int *c_fds_to_keep = NULL;
Py_ssize_t fds_to_keep_len = PyTuple_GET_SIZE(py_fds_to_keep);

PyInterpreterState *interp = PyInterpreterState_Get();
if ((preexec_fn != Py_None) && (interp != PyInterpreterState_Main())) {
Expand Down Expand Up @@ -1031,6 +1074,15 @@ subprocess_fork_exec_impl(PyObject *module, PyObject *process_args,
#endif /* HAVE_SETREUID */
}

c_fds_to_keep = PyMem_RawMalloc(fds_to_keep_len * sizeof(int));
if (c_fds_to_keep == NULL) {
PyErr_SetString(PyExc_MemoryError, "failed to malloc c_fds_to_keep");
goto cleanup;
}
if (convert_fds_to_keep_to_c(py_fds_to_keep, c_fds_to_keep) < 0) {
goto cleanup;
}

/* This must be the last thing done before fork() because we do not
* want to call PyOS_BeforeFork() if there is any chance of another
* error leading to the cleanup: code without calling fork(). */
Expand Down Expand Up @@ -1073,7 +1125,8 @@ subprocess_fork_exec_impl(PyObject *module, PyObject *process_args,
close_fds, restore_signals, call_setsid, pgid_to_set,
gid, extra_group_size, extra_groups,
uid, child_umask, old_sigmask,
py_fds_to_keep, preexec_fn, preexec_fn_args_tuple);
c_fds_to_keep, fds_to_keep_len,
preexec_fn, preexec_fn_args_tuple);

/* Parent (original) process */
if (pid == (pid_t)-1) {
Expand Down Expand Up @@ -1103,6 +1156,10 @@ subprocess_fork_exec_impl(PyObject *module, PyObject *process_args,
PyOS_AfterFork_Parent();

cleanup:
if (c_fds_to_keep != NULL) {
PyMem_RawFree(c_fds_to_keep);
}

if (saved_errno != 0) {
errno = saved_errno;
/* We can't call this above as PyOS_AfterFork_Parent() calls back
Expand Down

0 comments on commit 4b2bf16

Please sign in to comment.