From 45ec2ac308fab3bd6d4d4f4d2ffd3e8150201e2c Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Wed, 27 Jul 2022 15:26:42 +0100 Subject: [PATCH 1/5] Add a failing test --- tests/handlers/test_federation.py | 118 +++++++++++++++++++++++++++++- 1 file changed, 117 insertions(+), 1 deletion(-) diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py index fb06e5e81266..bf79bae3e4b9 100644 --- a/tests/handlers/test_federation.py +++ b/tests/handlers/test_federation.py @@ -14,6 +14,7 @@ import logging from typing import cast from unittest import TestCase +from unittest.mock import Mock, patch from twisted.test.proto_helpers import MemoryReactor @@ -22,6 +23,7 @@ from synapse.api.room_versions import RoomVersions from synapse.events import EventBase, make_event_from_dict from synapse.federation.federation_base import event_from_pdu_json +from synapse.federation.federation_client import SendJoinResult from synapse.logging.context import LoggingContext, run_in_background from synapse.rest import admin from synapse.rest.client import login, room @@ -30,7 +32,7 @@ from synapse.util.stringutils import random_string from tests import unittest -from tests.test_utils import event_injection +from tests.test_utils import event_injection, make_awaitable logger = logging.getLogger(__name__) @@ -449,3 +451,117 @@ def test_invalid_nested(self) -> None: }, RoomVersions.V6, ) + + +class PartialJoinTestCase(unittest.FederatingHomeserverTestCase): + def test_failed_partial_join_is_clean(self) -> None: + """ + Tests that, when failing to partial-join a room, we don't get stuck with + a partial-state flag on a room. + """ + + fed_handler = self.hs.get_federation_handler() + fed_client = fed_handler.federation_client + + room_id = "!room:example.com" + membership_event = make_event_from_dict( + { + "room_id": room_id, + "type": "m.room.member", + "sender": "@alice:test", + "state_key": "@alice:test", + "content": {"membership": "join"}, + }, + RoomVersions.V10, + ) + + mock_make_membership_event = Mock( + return_value=make_awaitable( + ( + "example.com", + membership_event, + RoomVersions.V10, + ) + ) + ) + + EVENT_CREATE = make_event_from_dict( + { + "room_id": room_id, + "type": "m.room.create", + "sender": "@kristina:example.com", + "state_key": "", + "event_id": "$e", + "depth": 0, + "content": {"creator": "@kristina:example.com", "room_version": "10"}, + "auth_events": [], + "origin_server_ts": 1, + } + ) + EVENT_CREATOR_MEMBERSHIP = make_event_from_dict( + { + "room_id": room_id, + "type": "m.room.member", + "sender": "@kristina:example.com", + "state_key": "@kristina:example.com", + "content": {"membership": "join"}, + "event_id": "$f", + "depth": 1, + "prev_events": ["$e"], + "auth_events": ["$e"], + "origin_server_ts": 1, + } + ) + EVENT_INVITATION_MEMBERSHIP = make_event_from_dict( + { + "room_id": room_id, + "type": "m.room.member", + "sender": "@kristina:example.com", + "state_key": "@alice:test", + "content": {"membership": "invite"}, + "event_id": "$g:test", + "depth": 2, + "prev_events": ["$f"], + "auth_events": ["$e", "$f"], + "origin_server_ts": 1, + } + ) + mock_send_join = Mock( + return_value=make_awaitable( + SendJoinResult( + membership_event, + "example.com", + state=[ + EVENT_CREATE, + EVENT_CREATOR_MEMBERSHIP, + EVENT_INVITATION_MEMBERSHIP, + ], + auth_chain=[ + EVENT_CREATE, + EVENT_CREATOR_MEMBERSHIP, + EVENT_INVITATION_MEMBERSHIP, + ], + partial_state=True, + servers_in_room=["example.com"], + ) + ) + ) + + with patch.object( + fed_client, "make_membership_event", mock_make_membership_event + ), patch.object(fed_client, "send_join", mock_send_join): + # Join and check that our join event is rejected + join_exc = self.get_failure( + fed_handler.do_invite_join(["example.com"], room_id, "@alice:test", {}), + SynapseError, + ) + self.assertIn("Join event was rejected", str(join_exc)) + + store = self.hs.get_datastores().main + + # Check that we don't have a left-over partial_state entry. + self.assertFalse( + self.get_success(store.is_partial_state_room(room_id)), + f"Stale partial-stated room flag left over for {room_id} after a" + f" failed do_invite_join!", + ) From 9f336873a480f9d18e3720703f64320f71039589 Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Wed, 27 Jul 2022 15:27:21 +0100 Subject: [PATCH 2/5] Newsfile Signed-off-by: Olivier Wilkinson (reivilibre) --- changelog.d/13403.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/13403.misc diff --git a/changelog.d/13403.misc b/changelog.d/13403.misc new file mode 100644 index 000000000000..cb7b38153c8d --- /dev/null +++ b/changelog.d/13403.misc @@ -0,0 +1 @@ +Faster Room Joins: don't leave a stuck room partial state flag if the join fails. From 43be962bc760b5b26d5806c36ef30a9919061e3d Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Thu, 28 Jul 2022 15:00:16 +0100 Subject: [PATCH 3/5] Always start the background process as it will clean up failed partial joins too --- synapse/handlers/federation.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 3b5eaf515624..1cf6cb32e3cb 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -546,9 +546,9 @@ async def do_invite_join( ) if ret.partial_state: - # TODO(faster_joins): roll this back if we don't manage to start the - # background resync (eg process_remote_join fails) - # https://github.com/matrix-org/synapse/issues/12998 + # Mark the room as having partial state. + # The background process is responsible for unmarking this flag, + # even if the join fails. await self.store.store_partial_state_room(room_id, ret.servers_in_room) try: @@ -574,17 +574,21 @@ async def do_invite_join( room_id, ) raise LimitExceededError(msg=e.msg, errcode=e.errcode, retry_after_ms=0) - - if ret.partial_state: - # Kick off the process of asynchronously fetching the state for this - # room. - run_as_background_process( - desc="sync_partial_state_room", - func=self._sync_partial_state_room, - initial_destination=origin, - other_destinations=ret.servers_in_room, - room_id=room_id, - ) + finally: + # Always kick off the background process that asynchronously fetches + # state for the room. + # If the join failed, the background process is responsible for + # cleaning up — including unmarking the room as a partial state room. + if ret.partial_state: + # Kick off the process of asynchronously fetching the state for this + # room. + run_as_background_process( + desc="sync_partial_state_room", + func=self._sync_partial_state_room, + initial_destination=origin, + other_destinations=ret.servers_in_room, + room_id=room_id, + ) # We wait here until this instance has seen the events come down # replication (if we're using replication) as the below uses caches. From 3c4cd4f4b81a05a1e74020c63d9bd77d51ab80f1 Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Fri, 29 Jul 2022 14:13:40 +0100 Subject: [PATCH 4/5] Don't manually create event IDs like a noob :-) --- tests/handlers/test_federation.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py index bf79bae3e4b9..c69795fc4abe 100644 --- a/tests/handlers/test_federation.py +++ b/tests/handlers/test_federation.py @@ -491,12 +491,12 @@ def test_failed_partial_join_is_clean(self) -> None: "type": "m.room.create", "sender": "@kristina:example.com", "state_key": "", - "event_id": "$e", "depth": 0, "content": {"creator": "@kristina:example.com", "room_version": "10"}, "auth_events": [], "origin_server_ts": 1, - } + }, + room_version=RoomVersions.V10, ) EVENT_CREATOR_MEMBERSHIP = make_event_from_dict( { @@ -505,12 +505,12 @@ def test_failed_partial_join_is_clean(self) -> None: "sender": "@kristina:example.com", "state_key": "@kristina:example.com", "content": {"membership": "join"}, - "event_id": "$f", "depth": 1, - "prev_events": ["$e"], - "auth_events": ["$e"], + "prev_events": [EVENT_CREATE.event_id], + "auth_events": [EVENT_CREATE.event_id], "origin_server_ts": 1, - } + }, + room_version=RoomVersions.V10, ) EVENT_INVITATION_MEMBERSHIP = make_event_from_dict( { @@ -519,12 +519,15 @@ def test_failed_partial_join_is_clean(self) -> None: "sender": "@kristina:example.com", "state_key": "@alice:test", "content": {"membership": "invite"}, - "event_id": "$g:test", "depth": 2, - "prev_events": ["$f"], - "auth_events": ["$e", "$f"], + "prev_events": [EVENT_CREATOR_MEMBERSHIP.event_id], + "auth_events": [ + EVENT_CREATE.event_id, + EVENT_CREATOR_MEMBERSHIP.event_id, + ], "origin_server_ts": 1, - } + }, + room_version=RoomVersions.V10, ) mock_send_join = Mock( return_value=make_awaitable( From 305028594363ce110ec3ee2943748fe513384af6 Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Mon, 1 Aug 2022 11:28:31 +0100 Subject: [PATCH 5/5] Add a comment explaining why the join event is rejected, leading to the error --- tests/handlers/test_federation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py index c69795fc4abe..1750e8afa4d3 100644 --- a/tests/handlers/test_federation.py +++ b/tests/handlers/test_federation.py @@ -554,6 +554,7 @@ def test_failed_partial_join_is_clean(self) -> None: fed_client, "make_membership_event", mock_make_membership_event ), patch.object(fed_client, "send_join", mock_send_join): # Join and check that our join event is rejected + # (The join event is rejected because it doesn't have any signatures) join_exc = self.get_failure( fed_handler.do_invite_join(["example.com"], room_id, "@alice:test", {}), SynapseError,