diff --git a/docs/admins/deployment.rst b/docs/admins/deployment.rst index c00f8fc21..9bb34d607 100644 --- a/docs/admins/deployment.rst +++ b/docs/admins/deployment.rst @@ -245,7 +245,8 @@ Useful options: ``log.logfile_path`` is not set, this also shows all log output in the terminal. -IRRd can be stopped by sending a SIGTERM signal. +IRRd can be stopped by sending a SIGTERM signal. A SIGUSR1 will log a +traceback of all threads in a specific IRRd process. .. _deployment-https: diff --git a/docs/releases/4.3.0.rst b/docs/releases/4.3.0.rst index f6fda3c39..8eea6842b 100644 --- a/docs/releases/4.3.0.rst +++ b/docs/releases/4.3.0.rst @@ -115,3 +115,10 @@ Other dependency versions IRRd now requires Redis 5 or newer. PostgreSQL 11 or newer is strongly recommended before upgrading, as it makes database migrations significantly faster. + + +Debugging info on SIGUSR1 +------------------------- +IRRd processes will now log a traceback of all their threads when +receiving a SIGUSR1 signal. This can be helpful when debugging +hanging workers or other complex issues. diff --git a/irrd/daemon/main.py b/irrd/daemon/main.py index 88caf793a..911f46f10 100755 --- a/irrd/daemon/main.py +++ b/irrd/daemon/main.py @@ -1,7 +1,6 @@ #!/usr/bin/env python # flake8: noqa: E402 import argparse -import grp import logging import multiprocessing import os @@ -13,6 +12,7 @@ from typing import Tuple, Optional import daemon +import grp import psutil from daemon.daemon import change_process_owner from pid import PidFile, PidFileError @@ -20,7 +20,7 @@ logger = logging.getLogger(__name__) sys.path.append(str(Path(__file__).resolve().parents[2])) -from irrd.utils.process_support import ExceptionLoggingProcess +from irrd.utils.process_support import ExceptionLoggingProcess, set_traceback_handler from irrd.storage.preload import PreloadStoreManager from irrd.server.whois.server import start_whois_server from irrd.server.http.server import run_http_server @@ -99,6 +99,7 @@ def main(): def run_irrd(mirror_frequency: int, config_file_path: str, uid: Optional[int], gid: Optional[int]): terminated = False os.environ[ENV_MAIN_PROCESS_PID] = str(os.getpid()) + set_traceback_handler() whois_process = ExceptionLoggingProcess( target=start_whois_server, diff --git a/irrd/server/http/app.py b/irrd/server/http/app.py index 1a3b580be..6f7078cd3 100644 --- a/irrd/server/http/app.py +++ b/irrd/server/http/app.py @@ -25,8 +25,7 @@ from irrd.server.http.event_stream import EventStreamEndpoint, EventStreamInitialDownloadEndpoint from irrd.storage.database_handler import DatabaseHandler from irrd.storage.preload import Preloader -from irrd.utils.process_support import memory_trim - +from irrd.utils.process_support import memory_trim, set_traceback_handler logger = logging.getLogger(__name__) @@ -46,6 +45,7 @@ async def startup(): is read from the environment. """ setproctitle("irrd-http-server-listener") + set_traceback_handler() global app config_path = os.getenv(ENV_UVICORN_WORKER_CONFIG_PATH) config_init(config_path) diff --git a/irrd/utils/process_support.py b/irrd/utils/process_support.py index d5ef2ed7e..7ece3d942 100644 --- a/irrd/utils/process_support.py +++ b/irrd/utils/process_support.py @@ -2,8 +2,13 @@ import logging import os import signal +import sys +import threading +import traceback from multiprocessing import Process +from setproctitle import getproctitle + logger = logging.getLogger(__name__) @@ -26,3 +31,21 @@ def memory_trim(): # pragma: no cover ctypes.CDLL(None).malloc_trim(0) except Exception: pass + + +def set_traceback_handler(): # pragma: no cover + """ + Log a traceback of all threads when receiving SIGUSR1. + This is inherited by child processes, so only set twice: + in the main process, and in the uvicorn app startup. + """ + def sigusr1_handler(signal, frame): + thread_names = {th.ident: th.name for th in threading.enumerate()} + code = [f"Traceback follows for all threads of process {os.getpid()} ({getproctitle()}):"] + for thread_id, stack in sys._current_frames().items(): + thread_name = thread_names.get(thread_id, "") + code.append(f"\n## Thread: {thread_name}({thread_id}) ##\n") + code += traceback.format_list(traceback.extract_stack(stack)) + logger.info("".join(code)) + + signal.signal(signal.SIGUSR1, sigusr1_handler)