Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Reload cache factors from disk on SIGHUP #12673

Merged
merged 17 commits into from
May 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/12673.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Synapse will now reload [cache config](https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#caching) when it receives a [SIGHUP](https://en.wikipedia.org/wiki/SIGHUP) signal.
6 changes: 6 additions & 0 deletions docs/sample_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,12 @@ retention:
# A cache 'factor' is a multiplier that can be applied to each of
# Synapse's caches in order to increase or decrease the maximum
# number of entries that can be stored.
#
# The configuration for cache factors (caches.global_factor and
# caches.per_cache_factors) can be reloaded while the application is running,
# by sending a SIGHUP signal to the Synapse process. Changes to other parts of
# the caching config will NOT be applied after a SIGHUP is received; a restart
# is necessary.

# The number of events to cache in memory. Not affected by
# caches.global_factor.
Expand Down
17 changes: 17 additions & 0 deletions docs/usage/configuration/config_documentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -1130,6 +1130,23 @@ caches:
expire_caches: false
sync_response_cache_duration: 2m
```

### Reloading cache factors

The cache factors (i.e. `caches.global_factor` and `caches.per_cache_factors`) may be reloaded at any time by sending a
[`SIGHUP`](https://en.wikipedia.org/wiki/SIGHUP) signal to Synapse using e.g.

```commandline
kill -HUP [PID_OF_SYNAPSE_PROCESS]
```

If you are running multiple workers, you must individually update the worker
config file and send this signal to each worker process.

If you're using the [example systemd service](https://github.com/matrix-org/synapse/blob/develop/contrib/systemd/matrix-synapse.service)
file in Synapse's `contrib` directory, you can send a `SIGHUP` signal by using
`systemctl reload matrix-synapse`.

---
## Database ##
Config options related to database settings.
Expand Down
44 changes: 44 additions & 0 deletions synapse/app/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,12 @@
from twisted.protocols.tls import TLSMemoryBIOFactory
from twisted.python.threadpool import ThreadPool

import synapse.util.caches
from synapse.api.constants import MAX_PDU_SIZE
from synapse.app import check_bind_error
from synapse.app.phone_stats_home import start_phone_stats_home
from synapse.config import ConfigError
from synapse.config._base import format_config_error
from synapse.config.homeserver import HomeServerConfig
from synapse.config.server import ManholeConfig
from synapse.crypto import context_factory
Expand Down Expand Up @@ -426,6 +429,10 @@ def run_sighup(*args: Any, **kwargs: Any) -> None:
signal.signal(signal.SIGHUP, run_sighup)

register_sighup(refresh_certificate, hs)
register_sighup(reload_cache_config, hs.config)

# Apply the cache config.
hs.config.caches.resize_all_caches()

# Load the certificate from disk.
refresh_certificate(hs)
Expand Down Expand Up @@ -480,6 +487,43 @@ def run_sighup(*args: Any, **kwargs: Any) -> None:
atexit.register(gc.freeze)


def reload_cache_config(config: HomeServerConfig) -> None:
"""Reload cache config from disk and immediately apply it.resize caches accordingly.

If the config is invalid, a `ConfigError` is logged and no changes are made.

Otherwise, this:
- replaces the `caches` section on the given `config` object,
- resizes all caches according to the new cache factors, and

Note that the following cache config keys are read, but not applied:
- event_cache_size: used to set a max_size and _original_max_size on
EventsWorkerStore._get_event_cache when it is created. We'd have to update
the _original_max_size (and maybe
- sync_response_cache_duration: would have to update the timeout_sec attribute on
HomeServer -> SyncHandler -> ResponseCache.
- track_memory_usage. This affects synapse.util.caches.TRACK_MEMORY_USAGE which
influences Synapse's self-reported metrics.

Also, the HTTPConnectionPool in SimpleHTTPClient sets its maxPersistentPerHost
parameter based on the global_factor. This won't be applied on a config reload.
"""
try:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Docstring pwease?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

previous_cache_config = config.reload_config_section("caches")
except ConfigError as e:
logger.warning("Failed to reload cache config")
for f in format_config_error(e):
logger.warning(f)
else:
logger.debug(
"New cache config. Was:\n %s\nNow:\n",
previous_cache_config.__dict__,
config.caches.__dict__,
)
synapse.util.caches.TRACK_MEMORY_USAGE = config.caches.track_memory_usage
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, so this is safe to update to False, but I'm not 100% sure its safe to update this to True. At the very least the metrics will be wrong for a while, as we won't track memory usage for anything that is already in the cache.

Happy for us to do one of:

  1. Not update this
  2. Only update this to False
  3. Check its safe to update this to True and add a big comment about the fact it will underestimate the cache size for a while

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll go for option 1 and update the docstring.

I'll also update the docs to only say that SIGHUP reloads cache factors.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One more look over please?

config.caches.resize_all_caches()


def setup_sentry(hs: "HomeServer") -> None:
"""Enable sentry integration, if enabled in configuration"""

Expand Down
36 changes: 2 additions & 34 deletions synapse/app/homeserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import logging
import os
import sys
from typing import Dict, Iterable, Iterator, List
from typing import Dict, Iterable, List

from matrix_common.versionstring import get_distribution_version_string

Expand Down Expand Up @@ -45,7 +45,7 @@
redirect_stdio_to_logs,
register_start,
)
from synapse.config._base import ConfigError
from synapse.config._base import ConfigError, format_config_error
from synapse.config.emailconfig import ThreepidBehaviour
from synapse.config.homeserver import HomeServerConfig
from synapse.config.server import ListenerConfig
Expand Down Expand Up @@ -399,38 +399,6 @@ async def start() -> None:
return hs


def format_config_error(e: ConfigError) -> Iterator[str]:
"""
Formats a config error neatly

The idea is to format the immediate error, plus the "causes" of those errors,
hopefully in a way that makes sense to the user. For example:

Error in configuration at 'oidc_config.user_mapping_provider.config.display_name_template':
Failed to parse config for module 'JinjaOidcMappingProvider':
invalid jinja template:
unexpected end of template, expected 'end of print statement'.

Args:
e: the error to be formatted

Returns: An iterator which yields string fragments to be formatted
"""
yield "Error in configuration"

if e.path:
yield " at '%s'" % (".".join(e.path),)

yield ":\n %s" % (e.msg,)

parent_e = e.__cause__
indent = 1
while parent_e:
indent += 1
yield ":\n%s%s" % (" " * indent, str(parent_e))
parent_e = parent_e.__cause__


def run(hs: HomeServer) -> None:
_base.start_reactor(
"synapse-homeserver",
Expand Down
81 changes: 74 additions & 7 deletions synapse/config/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,18 @@

import argparse
import errno
import logging
import os
from collections import OrderedDict
from hashlib import sha256
from textwrap import dedent
from typing import (
Any,
ClassVar,
Collection,
Dict,
Iterable,
Iterator,
List,
MutableMapping,
Optional,
Expand All @@ -40,6 +44,8 @@

from synapse.util.templates import _create_mxc_to_http_filter, _format_ts_filter

logger = logging.getLogger(__name__)


class ConfigError(Exception):
"""Represents a problem parsing the configuration
Expand All @@ -55,6 +61,38 @@ def __init__(self, msg: str, path: Optional[Iterable[str]] = None):
self.path = path


def format_config_error(e: ConfigError) -> Iterator[str]:
"""
Formats a config error neatly

The idea is to format the immediate error, plus the "causes" of those errors,
hopefully in a way that makes sense to the user. For example:

Error in configuration at 'oidc_config.user_mapping_provider.config.display_name_template':
Failed to parse config for module 'JinjaOidcMappingProvider':
invalid jinja template:
unexpected end of template, expected 'end of print statement'.

Args:
e: the error to be formatted

Returns: An iterator which yields string fragments to be formatted
"""
yield "Error in configuration"

if e.path:
yield " at '%s'" % (".".join(e.path),)

yield ":\n %s" % (e.msg,)

parent_e = e.__cause__
indent = 1
while parent_e:
indent += 1
yield ":\n%s%s" % (" " * indent, str(parent_e))
parent_e = parent_e.__cause__


# We split these messages out to allow packages to override with package
# specific instructions.
MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS = """\
Expand Down Expand Up @@ -119,7 +157,7 @@ class Config:
defined in subclasses.
"""

section: str
section: ClassVar[str]

def __init__(self, root_config: "RootConfig" = None):
self.root = root_config
Expand Down Expand Up @@ -309,9 +347,12 @@ class RootConfig:
class, lower-cased and with "Config" removed.
"""

config_classes = []
config_classes: List[Type[Config]] = []

def __init__(self, config_files: Collection[str] = ()):
# Capture absolute paths here, so we can reload config after we daemonize.
self.config_files = [os.path.abspath(path) for path in config_files]

def __init__(self):
for config_class in self.config_classes:
if config_class.section is None:
raise ValueError("%r requires a section name" % (config_class,))
Expand Down Expand Up @@ -512,12 +553,10 @@ def load_config_with_parser(
object from parser.parse_args(..)`
"""

obj = cls()

config_args = parser.parse_args(argv)

config_files = find_config_files(search_paths=config_args.config_path)

obj = cls(config_files)
if not config_files:
parser.error("Must supply a config file.")

Expand Down Expand Up @@ -627,7 +666,7 @@ def load_or_generate_config(

generate_missing_configs = config_args.generate_missing_configs

obj = cls()
obj = cls(config_files)

if config_args.generate_config:
if config_args.report_stats is None:
Expand Down Expand Up @@ -727,6 +766,34 @@ def generate_missing_files(
) -> None:
self.invoke_all("generate_files", config_dict, config_dir_path)

def reload_config_section(self, section_name: str) -> Config:
"""Reconstruct the given config section, leaving all others unchanged.

This works in three steps:

1. Create a new instance of the relevant `Config` subclass.
2. Call `read_config` on that instance to parse the new config.
3. Replace the existing config instance with the new one.

:raises ValueError: if the given `section` does not exist.
:raises ConfigError: for any other problems reloading config.

:returns: the previous config object, which no longer has a reference to this
RootConfig.
"""
existing_config: Optional[Config] = getattr(self, section_name, None)
if existing_config is None:
raise ValueError(f"Unknown config section '{section_name}'")
logger.info("Reloading config section '%s'", section_name)

new_config_data = read_config_files(self.config_files)
new_config = type(existing_config)(self)
new_config.read_config(new_config_data)
setattr(self, section_name, new_config)

existing_config.root = None
return existing_config


def read_config_files(config_files: Iterable[str]) -> Dict[str, Any]:
"""Read the config files into a dict
Expand Down
15 changes: 14 additions & 1 deletion synapse/config/_base.pyi
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
import argparse
from typing import (
Any,
Collection,
Dict,
Iterable,
Iterator,
List,
Literal,
MutableMapping,
Optional,
Tuple,
Type,
TypeVar,
Union,
overload,
)

import jinja2
Expand Down Expand Up @@ -64,6 +68,8 @@ class ConfigError(Exception):
self.msg = msg
self.path = path

def format_config_error(e: ConfigError) -> Iterator[str]: ...

MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS: str
MISSING_REPORT_STATS_SPIEL: str
MISSING_SERVER_NAME: str
Expand Down Expand Up @@ -117,7 +123,8 @@ class RootConfig:
background_updates: background_updates.BackgroundUpdateConfig

config_classes: List[Type["Config"]] = ...
def __init__(self) -> None: ...
config_files: List[str]
def __init__(self, config_files: Collection[str] = ...) -> None: ...
def invoke_all(
self, func_name: str, *args: Any, **kwargs: Any
) -> MutableMapping[str, Any]: ...
Expand Down Expand Up @@ -157,6 +164,12 @@ class RootConfig:
def generate_missing_files(
self, config_dict: dict, config_dir_path: str
) -> None: ...
@overload
def reload_config_section(
self, section_name: Literal["caches"]
) -> cache.CacheConfig: ...
@overload
def reload_config_section(self, section_name: str) -> Config: ...

class Config:
root: RootConfig
Expand Down
Loading