diff --git a/changelog.d/5609.bugfix b/changelog.d/5609.bugfix new file mode 100644 index 000000000000..534ee22a1b3f --- /dev/null +++ b/changelog.d/5609.bugfix @@ -0,0 +1 @@ +Fix 'utime went backwards' errors on daemonization. diff --git a/synapse/app/_base.py b/synapse/app/_base.py index d50a9840d4f6..9b5c30f50665 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -93,33 +93,36 @@ def start_reactor( install_dns_limiter(reactor) def run(): - # make sure that we run the reactor with the sentinel log context, - # otherwise other PreserveLoggingContext instances will get confused - # and complain when they see the logcontext arbitrarily swapping - # between the sentinel and `run` logcontexts. - with PreserveLoggingContext(): - logger.info("Running") - - change_resource_limit(soft_file_limit) - if gc_thresholds: - gc.set_threshold(*gc_thresholds) - reactor.run() - - if daemonize: - if print_pidfile: - print(pid_file) - - daemon = Daemonize( - app=appname, - pid=pid_file, - action=run, - auto_close_fds=False, - verbose=True, - logger=logger, - ) - daemon.start() - else: - run() + logger.info("Running") + change_resource_limit(soft_file_limit) + if gc_thresholds: + gc.set_threshold(*gc_thresholds) + reactor.run() + + # make sure that we run the reactor with the sentinel log context, + # otherwise other PreserveLoggingContext instances will get confused + # and complain when they see the logcontext arbitrarily swapping + # between the sentinel and `run` logcontexts. + # + # We also need to drop the logcontext before forking if we're daemonizing, + # otherwise the cputime metrics get confused about the per-thread resource usage + # appearing to go backwards. + with PreserveLoggingContext(): + if daemonize: + if print_pidfile: + print(pid_file) + + daemon = Daemonize( + app=appname, + pid=pid_file, + action=run, + auto_close_fds=False, + verbose=True, + logger=logger, + ) + daemon.start() + else: + run() def quit_with_error(error_string): diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index 9e1b53780405..30dfa1d6b2ed 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -52,6 +52,15 @@ def get_thread_resource_usage(): return None +# get an id for the current thread. +# +# threading.get_ident doesn't actually return an OS-level tid, and annoyingly, +# on Linux it actually returns the same value either side of a fork() call. However +# we only fork in one place, so it's not worth the hoop-jumping to get a real tid. +# +get_thread_id = threading.get_ident + + class ContextResourceUsage(object): """Object for tracking the resources used by a log context @@ -225,7 +234,7 @@ def __init__(self, name=None, parent_context=None, request=None): # became active. self.usage_start = None - self.main_thread = threading.current_thread() + self.main_thread = get_thread_id() self.request = None self.tag = "" self.alive = True @@ -318,7 +327,7 @@ def copy_to(self, record): record.request = self.request def start(self): - if threading.current_thread() is not self.main_thread: + if get_thread_id() != self.main_thread: logger.warning("Started logcontext %s on different thread", self) return @@ -328,7 +337,7 @@ def start(self): self.usage_start = get_thread_resource_usage() def stop(self): - if threading.current_thread() is not self.main_thread: + if get_thread_id() != self.main_thread: logger.warning("Stopped logcontext %s on different thread", self) return @@ -355,7 +364,7 @@ def get_resource_usage(self): # If we are on the correct thread and we're currently running then we # can include resource usage so far. - is_main_thread = threading.current_thread() is self.main_thread + is_main_thread = get_thread_id() == self.main_thread if self.alive and self.usage_start and is_main_thread: utime_delta, stime_delta = self._get_cputime() res.ru_utime += utime_delta