From 1f090c0b7def3f206e5b9c612f621b7e083e6ceb Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 26 Aug 2022 08:49:33 -0400 Subject: [PATCH 01/10] Remove configuration options for direct TCP replication. --- docs/upgrade.md | 16 ++++++++++++++ .../configuration/config_documentation.md | 2 -- docs/workers.md | 22 +++++-------------- synapse/app/homeserver.py | 11 ---------- synapse/config/server.py | 1 - 5 files changed, 21 insertions(+), 31 deletions(-) diff --git a/docs/upgrade.md b/docs/upgrade.md index 0ab5bfeaf0ba..dced3aa5b29e 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -89,6 +89,22 @@ process, for example: dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb ``` +# Upgrading to v1.67.0 + +## Direct TCP replication is no longer supported: migrate to Redis + +Redis support was added in v1.13.0 with it becoming the recommended method in +v1.18.0. It replaced the old direct TCP connections (which was deprecated as of +v1.18.0) to the main process. With Redis, rather than all the workers connecting +to the main process, all the workers and the main process connect to Redis, +which relays replication commands between processes. This can give a significant +cpu saving on the main process and will be a prerequisite for upcoming +performance improvements. + +To migrate to Redis add the [`redis` config](./workers.md#shared-configuration), +and remove the TCP `replication` listener from master and `worker_replication_port` +from worker config. + # Upgrading to v1.66.0 ## Delegation of email validation no longer supported diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 4c59e3dcf2cc..8e01353c9dad 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -431,8 +431,6 @@ Sub-options for each listener include: * `metrics`: (see the docs [here](../../metrics-howto.md)), - * `replication`: (see the docs [here](../../workers.md)). - * `tls`: set to true to enable TLS for this listener. Will use the TLS key/cert specified in tls_private_key_path / tls_certificate_path. * `x_forwarded`: Only valid for an 'http' listener. Set to true to use the X-Forwarded-For header as the client IP. Useful when Synapse is diff --git a/docs/workers.md b/docs/workers.md index 6969c424d8cd..73750e7af6a7 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -32,13 +32,8 @@ stream between all configured Synapse processes. Additionally, processes may make HTTP requests to each other, primarily for operations which need to wait for a reply ─ such as sending an event. -Redis support was added in v1.13.0 with it becoming the recommended method in -v1.18.0. It replaced the old direct TCP connections (which is deprecated as of -v1.18.0) to the main process. With Redis, rather than all the workers connecting -to the main process, all the workers and the main process connect to Redis, -which relays replication commands between processes. This can give a significant -cpu saving on the main process and will be a prerequisite for upcoming -performance improvements. +All the workers and tha main process connect to Redis, which relays replication +commands between processes. If Redis support is enabled Synapse will use it as a shared cache, as well as a pub/sub mechanism. @@ -325,7 +320,6 @@ effects of bursts of events from that bridge on events sent by normal users. Additionally, the writing of specific streams (such as events) can be moved off of the main process to a particular worker. -(This is only supported with Redis-based replication.) To enable this, the worker must have a HTTP replication listener configured, have a `worker_name` and be listed in the `instance_map` config. The same worker @@ -618,15 +612,9 @@ which processes handle the various proccessing such as push notifications. ## Migration from old config -There are two main independent changes that have been made: introducing Redis -support and merging apps into `synapse.app.generic_worker`. Both these changes -are backwards compatible and so no changes to the config are required, however -server admins are encouraged to plan to migrate to Redis as the old style direct -TCP replication config is deprecated. - -To migrate to Redis add the `redis` config as above, and optionally remove the -TCP `replication` listener from master and `worker_replication_port` from worker -config. +A main change that has occurred is the merging of worker apps into +`synapse.app.generic_worker`. This change is backwards compatible and so no +changes to the config are required. To migrate apps to use `synapse.app.generic_worker` simply update the `worker_app` option in the worker configs, and where worker are started (e.g. diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index e57a9260326f..883f2fd2ecd8 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -57,7 +57,6 @@ from synapse.logging.context import LoggingContext from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource -from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory from synapse.rest import ClientRestResource from synapse.rest.admin import AdminRestResource from synapse.rest.health import HealthResource @@ -290,16 +289,6 @@ def start_listening(self) -> None: manhole_settings=self.config.server.manhole_settings, manhole_globals={"hs": self}, ) - elif listener.type == "replication": - services = listen_tcp( - listener.bind_addresses, - listener.port, - ReplicationStreamProtocolFactory(self), - ) - for s in services: - self.get_reactor().addSystemEventTrigger( - "before", "shutdown", s.stopListening - ) elif listener.type == "metrics": if not self.config.metrics.enable_metrics: logger.warning( diff --git a/synapse/config/server.py b/synapse/config/server.py index 085fe22c5117..f0fca8c6ef0e 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -165,7 +165,6 @@ def generate_ip_set( "http", "metrics", "manhole", - "replication", } KNOWN_RESOURCES = { From c4427fb822ca4fd263512ca052627116d848a16f Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 26 Aug 2022 10:06:04 -0400 Subject: [PATCH 02/10] Remove obsolete Redis config for workers. --- .github/workflows/tests.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 144cb9ffaac2..08b1c55e11e7 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -209,7 +209,6 @@ jobs: POSTGRES: ${{ matrix.postgres && 1}} MULTI_POSTGRES: ${{ (matrix.postgres == 'multi-postgres') && 1}} WORKERS: ${{ matrix.workers && 1 }} - REDIS: ${{ matrix.redis && 1 }} BLACKLIST: ${{ matrix.workers && 'synapse-blacklist-with-workers' }} TOP: ${{ github.workspace }} @@ -236,7 +235,6 @@ jobs: - sytest-tag: buster postgres: postgres workers: workers - redis: redis steps: - uses: actions/checkout@v2 From abda58f87e80a465cac835bc42acfb3ae3a8de36 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 26 Aug 2022 11:14:09 -0400 Subject: [PATCH 03/10] Newsfragment --- changelog.d/13647.removal | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/13647.removal diff --git a/changelog.d/13647.removal b/changelog.d/13647.removal new file mode 100644 index 000000000000..88f25e28d796 --- /dev/null +++ b/changelog.d/13647.removal @@ -0,0 +1 @@ +Remove the ability to direct TCP replication with workers. Workers now require using Redis. From 53346afd8f33260173e7f1c77e2f9fa97071332b Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 30 Aug 2022 15:07:00 -0400 Subject: [PATCH 04/10] Clarify changelog. Co-authored-by: reivilibre --- changelog.d/13647.removal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/13647.removal b/changelog.d/13647.removal index 88f25e28d796..0190a65dba1e 100644 --- a/changelog.d/13647.removal +++ b/changelog.d/13647.removal @@ -1 +1 @@ -Remove the ability to direct TCP replication with workers. Workers now require using Redis. +Remove the ability to use direct TCP replication with workers. Direct TCP replication was deprecated in Synapse v1.18.0. Workers now require using Redis. From eedb69959b0c11893140bfd55ccc8ac30044ed1f Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 30 Aug 2022 15:07:37 -0400 Subject: [PATCH 05/10] Clarify documentation. Co-authored-by: reivilibre --- docs/upgrade.md | 2 +- docs/workers.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/upgrade.md b/docs/upgrade.md index dced3aa5b29e..cabce8713c6c 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -98,7 +98,7 @@ v1.18.0. It replaced the old direct TCP connections (which was deprecated as of v1.18.0) to the main process. With Redis, rather than all the workers connecting to the main process, all the workers and the main process connect to Redis, which relays replication commands between processes. This can give a significant -cpu saving on the main process and will be a prerequisite for upcoming +CPU saving on the main process and is a prerequisite for upcoming performance improvements. To migrate to Redis add the [`redis` config](./workers.md#shared-configuration), diff --git a/docs/workers.md b/docs/workers.md index 73750e7af6a7..2adaec2eb2df 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -32,7 +32,7 @@ stream between all configured Synapse processes. Additionally, processes may make HTTP requests to each other, primarily for operations which need to wait for a reply ─ such as sending an event. -All the workers and tha main process connect to Redis, which relays replication +All the workers and the main process connect to Redis, which relays replication commands between processes. If Redis support is enabled Synapse will use it as a shared cache, as well as a From 398c493d68f71a5b4a75c3839ea559aeeebd51ae Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 30 Aug 2022 15:12:22 -0400 Subject: [PATCH 06/10] Clarify that the HTTP replication resource is still needed. --- docs/upgrade.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/upgrade.md b/docs/upgrade.md index cabce8713c6c..a1898c00b4d0 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -102,8 +102,9 @@ CPU saving on the main process and is a prerequisite for upcoming performance improvements. To migrate to Redis add the [`redis` config](./workers.md#shared-configuration), -and remove the TCP `replication` listener from master and `worker_replication_port` -from worker config. +and remove the TCP `replication` listener from config of the master and +`worker_replication_port` from worker config. Note that a HTTP listener with a +`replication` resource is still required. # Upgrading to v1.66.0 From 76dcd63464564952c2ac25bd30ca6f87e03d8fbd Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 1 Sep 2022 14:48:29 -0400 Subject: [PATCH 07/10] Add error messages explaining that direct TCP was removed. --- synapse/config/server.py | 15 +++++++++++++-- synapse/config/workers.py | 8 +++++--- tests/app/test_openid_listener.py | 4 ++-- tests/test_server.py | 2 +- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/synapse/config/server.py b/synapse/config/server.py index f0fca8c6ef0e..c91df636d9a7 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -36,6 +36,12 @@ logger = logging.Logger(__name__) +DIRECT_TCP_ERROR = """ +Using direct TCP replication for workers is no longer supported. + +Please see https://matrix-org.github.io/synapse/latest/upgrade.html#direct-tcp-replication-is-no-longer-supported-migrate-to-redis +""" + # by default, we attempt to listen on both '::' *and* '0.0.0.0' because some OSes # (Windows, macOS, other BSD/Linux where net.ipv6.bindv6only is set) will only listen # on IPv6 when '::' is set. @@ -514,7 +520,9 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: ): raise ConfigError("allowed_avatar_mimetypes must be a list") - self.listeners = [parse_listener_def(x) for x in config.get("listeners", [])] + self.listeners = [ + parse_listener_def(i, x) for i, x in enumerate(config.get("listeners", [])) + ] # no_tls is not really supported any more, but let's grandfather it in # here. @@ -879,9 +887,12 @@ def read_gc_thresholds( ) -def parse_listener_def(listener: Any) -> ListenerConfig: +def parse_listener_def(num: int, listener: Any) -> ListenerConfig: """parse a listener config from the config file""" listener_type = listener["type"] + # Raise a helpful error if direct TCP replication is still configured. + if listener_type == "replication": + raise ConfigError(DIRECT_TCP_ERROR, ("listeners", str(num), "type")) port = listener.get("port") if not isinstance(port, int): diff --git a/synapse/config/workers.py b/synapse/config/workers.py index f2716422b52b..0fb725dd8fc6 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -27,7 +27,7 @@ RoutableShardedWorkerHandlingConfig, ShardedWorkerHandlingConfig, ) -from .server import ListenerConfig, parse_listener_def +from .server import DIRECT_TCP_ERROR, ListenerConfig, parse_listener_def _FEDERATION_SENDER_WITH_SEND_FEDERATION_ENABLED_ERROR = """ The send_federation config option must be disabled in the main @@ -128,7 +128,8 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: self.worker_app = None self.worker_listeners = [ - parse_listener_def(x) for x in config.get("worker_listeners", []) + parse_listener_def(i, x) + for i, x in enumerate(config.get("worker_listeners", [])) ] self.worker_daemonize = bool(config.get("worker_daemonize")) self.worker_pid_file = config.get("worker_pid_file") @@ -142,7 +143,8 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: self.worker_replication_host = config.get("worker_replication_host", None) # The port on the main synapse for TCP replication - self.worker_replication_port = config.get("worker_replication_port", None) + if "worker_replication_port" in config: + raise ConfigError(DIRECT_TCP_ERROR, ("worker_replication_port",)) # The port on the main synapse for HTTP replication endpoint self.worker_replication_http_port = config.get("worker_replication_http_port") diff --git a/tests/app/test_openid_listener.py b/tests/app/test_openid_listener.py index 264e10108242..c7dae58eb549 100644 --- a/tests/app/test_openid_listener.py +++ b/tests/app/test_openid_listener.py @@ -61,7 +61,7 @@ def test_openid_listener(self, names, expectation): } # Listen with the config - self.hs._listen_http(parse_listener_def(config)) + self.hs._listen_http(parse_listener_def(0, config)) # Grab the resource from the site that was told to listen site = self.reactor.tcpServers[0][1] @@ -109,7 +109,7 @@ def test_openid_listener(self, names, expectation): } # Listen with the config - self.hs._listener_http(self.hs.config, parse_listener_def(config)) + self.hs._listener_http(self.hs.config, parse_listener_def(0, config)) # Grab the resource from the site that was told to listen site = self.reactor.tcpServers[0][1] diff --git a/tests/test_server.py b/tests/test_server.py index 23975d59c30d..7c66448245d0 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -228,7 +228,7 @@ def _make_request(self, method: bytes, path: bytes) -> FakeChannel: site = SynapseSite( "test", "site_tag", - parse_listener_def({"type": "http", "port": 0}), + parse_listener_def(0, {"type": "http", "port": 0}), self.resource, "1.0", max_request_body_size=4096, From cab07cb7f37a8758bd0bfef63b577ce8274be346 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 1 Sep 2022 14:51:51 -0400 Subject: [PATCH 08/10] Remove the (buster, postgres, workers) job which is similar enough to (buster, multi-postgres, workers). --- .github/workflows/tests.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 57e5ac209e7f..fe15caf4a6ce 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -232,10 +232,6 @@ jobs: postgres: multi-postgres workers: workers - - sytest-tag: buster - postgres: postgres - workers: workers - steps: - uses: actions/checkout@v2 - name: Prepare test blacklist From 45e0c19e71b417d3e1a512d80f9302a5f052a5e8 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 1 Sep 2022 14:51:00 -0400 Subject: [PATCH 09/10] Remove use of worker_replication_port. --- synapse/replication/tcp/handler.py | 58 +++++++++++------------------- 1 file changed, 21 insertions(+), 37 deletions(-) diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py index e1cbfa50ebd2..0f166d16aa5f 100644 --- a/synapse/replication/tcp/handler.py +++ b/synapse/replication/tcp/handler.py @@ -35,7 +35,6 @@ from synapse.metrics import LaterGauge from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.replication.tcp.client import DirectTcpReplicationClientFactory from synapse.replication.tcp.commands import ( ClearUserSyncsCommand, Command, @@ -332,46 +331,31 @@ async def _process_command( def start_replication(self, hs: "HomeServer") -> None: """Helper method to start replication.""" - if hs.config.redis.redis_enabled: - from synapse.replication.tcp.redis import ( - RedisDirectTcpReplicationClientFactory, - ) + from synapse.replication.tcp.redis import RedisDirectTcpReplicationClientFactory - # First let's ensure that we have a ReplicationStreamer started. - hs.get_replication_streamer() + # First let's ensure that we have a ReplicationStreamer started. + hs.get_replication_streamer() - # We need two connections to redis, one for the subscription stream and - # one to send commands to (as you can't send further redis commands to a - # connection after SUBSCRIBE is called). + # We need two connections to redis, one for the subscription stream and + # one to send commands to (as you can't send further redis commands to a + # connection after SUBSCRIBE is called). - # First create the connection for sending commands. - outbound_redis_connection = hs.get_outbound_redis_connection() + # First create the connection for sending commands. + outbound_redis_connection = hs.get_outbound_redis_connection() - # Now create the factory/connection for the subscription stream. - self._factory = RedisDirectTcpReplicationClientFactory( - hs, - outbound_redis_connection, - channel_names=self._channels_to_subscribe_to, - ) - hs.get_reactor().connectTCP( - hs.config.redis.redis_host, - hs.config.redis.redis_port, - self._factory, - timeout=30, - bindAddress=None, - ) - else: - client_name = hs.get_instance_name() - self._factory = DirectTcpReplicationClientFactory(hs, client_name, self) - host = hs.config.worker.worker_replication_host - port = hs.config.worker.worker_replication_port - hs.get_reactor().connectTCP( - host, - port, - self._factory, - timeout=30, - bindAddress=None, - ) + # Now create the factory/connection for the subscription stream. + self._factory = RedisDirectTcpReplicationClientFactory( + hs, + outbound_redis_connection, + channel_names=self._channels_to_subscribe_to, + ) + hs.get_reactor().connectTCP( + hs.config.redis.redis_host, + hs.config.redis.redis_port, + self._factory, + timeout=30, + bindAddress=None, + ) def get_streams(self) -> Dict[str, Stream]: """Get a map from stream name to all streams.""" From 4960ac4daf91c94c1f00be3f83c9bbd96b2155cc Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 1 Sep 2022 15:12:04 -0400 Subject: [PATCH 10/10] Remove obsolete option from test config. --- tests/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/utils.py b/tests/utils.py index d2c6d1e85242..65db4376973c 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -135,7 +135,6 @@ def default_config( "enable_registration_captcha": False, "macaroon_secret_key": "not even a little secret", "password_providers": [], - "worker_replication_url": "", "worker_app": None, "block_non_admin_invites": False, "federation_domain_whitelist": None,