From aa1a5c33c0b42a616b7a14014e733554119afe88 Mon Sep 17 00:00:00 2001 From: Christiano Haesbaert Date: Mon, 23 Jan 2023 10:35:00 +0100 Subject: [PATCH] Fix luv signals (issue #400) This makes sure we can process signals in luv the same way we do in uring. As stated in #400 the main issue is that luv's mainloop will restart on EINTR and we will never unwind back to ocaml land, so even though the process got the signal, the runtime will not see it until something else goes on. The trick here is to abuse POSIX thread semantics and shove all signals into one specific systhread by blocking them in the other threads. Additionally, I've fixed a bug in OCaml 5.0 where the systhreads end up starting with all signals blocked: https://github.com/ocaml/ocaml/pull/11880. This PR works even with/without the bug. Danger Zone ~~~~~~~~~~~ This is tricky ! If you're thinking we can do better than pipes, think again ! Unix.sigsuspend doesn't work on multithreaded, we don't have a real Unix.pause (it's implemented on top of sigsuspend !). The semantics for kill(2) to "self" are different than "from outside" when multithreaded, and the runtime doesn't expose pthread_kill(2). That's why on the test we have to fork and signal back to the parent. I know, it's horrible. Co-authored-by: Thomas Leonard --- lib_eio_luv/eio_luv.ml | 34 ++++++++++++++++++++++++++++++++-- tests/signal.md | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 2 deletions(-) create mode 100644 tests/signal.md diff --git a/lib_eio_luv/eio_luv.ml b/lib_eio_luv/eio_luv.ml index 46623a1ba..3d71fec25 100644 --- a/lib_eio_luv/eio_luv.ml +++ b/lib_eio_luv/eio_luv.ml @@ -1186,7 +1186,7 @@ let rec wakeup ~async ~io_queued run_q = Luv.Async.send async |> or_raise | None -> () -let rec run : type a. (_ -> a) -> a = fun main -> +let rec run2 : type a. (_ -> a) -> a = fun main -> let loop = Luv.Loop.init () |> or_raise in let run_q = Lf_queue.create () in let io_queued = ref false in @@ -1198,7 +1198,7 @@ let rec run : type a. (_ -> a) -> a = fun main -> Luv.Loop.stop loop ) |> or_raise in let st = { loop; async; run_q; fd_map = Fd_map.empty } in - let stdenv = stdenv ~run_event_loop:run in + let stdenv = stdenv ~run_event_loop:run2 in let rec fork ~new_fiber:fiber fn = Ctf.note_switch (Fiber_context.tid fiber); let open Effect.Deep in @@ -1304,3 +1304,33 @@ let rec run : type a. (_ -> a) -> a = fun main -> | `Done v -> v | `Ex (ex, bt) -> Printexc.raise_with_backtrace ex bt | `Running -> failwith "Deadlock detected: no events scheduled but main function hasn't returned" + +let start_signal_thread () = + let all = List.init 64 (fun x -> x) in + let omask = Thread.sigmask SIG_SETMASK all in + let inp, outp = Unix.pipe ~cloexec:true () in + let tid = + Thread.create + (fun () -> + Thread.sigmask SIG_SETMASK [] |> ignore; + let bytes = Bytes.create 1 in + let rec loop () = + match Unix.read inp bytes 0 1 with + | exception Unix.Unix_error (Unix.EINTR, _, _) -> loop () + | 0 -> Unix.close inp + | _ -> failwith "signal pipe didn't return EOF or EINTR" + in + loop () + ) () + in + tid, omask, outp + +let stop_signal_thread (tid, omask, outp) = + Unix.close outp; + Thread.join tid; + Unix.sigprocmask SIG_SETMASK omask |> ignore + +let run main = + let sigctx = start_signal_thread () in + Fun.protect (fun () -> run2 main) + ~finally:(fun () -> stop_signal_thread sigctx) diff --git a/tests/signal.md b/tests/signal.md new file mode 100644 index 000000000..b06136907 --- /dev/null +++ b/tests/signal.md @@ -0,0 +1,39 @@ +# Setting up the environment + +```ocaml +# #require "eio_main";; +# open Eio.Std;; +``` + +# Test cases + +Prove we can catch sigint: +```ocaml +# Eio_main.run @@ fun _stdenv -> + let interrupted = Eio.Condition.create () in + let old = Sys.signal Sys.sigint + (Signal_handle (fun num -> if num = Sys.sigint then Eio.Condition.broadcast interrupted)) + in + Fiber.both + (fun () -> + Eio.Condition.await_no_mutex interrupted; + traceln "interrupted!"; + ) + (fun () -> + let ppid = Unix.getpid () in + match Unix.fork () with + | 0 -> + Unix.kill ppid Sys.sigint; + Unix._exit 0 + | child_pid -> + let wait () = + let pid, status = Unix.waitpid [] child_pid in + assert (pid = child_pid); + assert (status = (Unix.WEXITED 0)) + in + try wait () with Unix.Unix_error (Unix.EINTR, _, _) -> wait () + ); + Sys.set_signal Sys.sigint old;; ++interrupted! +- : unit = () +```