Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Faster Room Joins: don't leave a stuck room partial state flag if the join fails. #13403

Merged
merged 5 commits into from
Aug 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/13403.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Faster Room Joins: don't leave a stuck room partial state flag if the join fails.
32 changes: 18 additions & 14 deletions synapse/handlers/federation.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,9 +546,9 @@ async def do_invite_join(
)

if ret.partial_state:
# TODO(faster_joins): roll this back if we don't manage to start the
# background resync (eg process_remote_join fails)
# https://github.com/matrix-org/synapse/issues/12998
# Mark the room as having partial state.
# The background process is responsible for unmarking this flag,
# even if the join fails.
await self.store.store_partial_state_room(room_id, ret.servers_in_room)

try:
Expand All @@ -574,17 +574,21 @@ async def do_invite_join(
room_id,
)
raise LimitExceededError(msg=e.msg, errcode=e.errcode, retry_after_ms=0)

if ret.partial_state:
# Kick off the process of asynchronously fetching the state for this
# room.
run_as_background_process(
desc="sync_partial_state_room",
func=self._sync_partial_state_room,
initial_destination=origin,
other_destinations=ret.servers_in_room,
room_id=room_id,
)
finally:
# Always kick off the background process that asynchronously fetches
# state for the room.
# If the join failed, the background process is responsible for
# cleaning up — including unmarking the room as a partial state room.
if ret.partial_state:
# Kick off the process of asynchronously fetching the state for this
# room.
run_as_background_process(
desc="sync_partial_state_room",
func=self._sync_partial_state_room,
initial_destination=origin,
other_destinations=ret.servers_in_room,
room_id=room_id,
)

# We wait here until this instance has seen the events come down
# replication (if we're using replication) as the below uses caches.
Expand Down
122 changes: 121 additions & 1 deletion tests/handlers/test_federation.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import logging
from typing import cast
from unittest import TestCase
from unittest.mock import Mock, patch

from twisted.test.proto_helpers import MemoryReactor

Expand All @@ -22,6 +23,7 @@
from synapse.api.room_versions import RoomVersions
from synapse.events import EventBase, make_event_from_dict
from synapse.federation.federation_base import event_from_pdu_json
from synapse.federation.federation_client import SendJoinResult
from synapse.logging.context import LoggingContext, run_in_background
from synapse.rest import admin
from synapse.rest.client import login, room
Expand All @@ -30,7 +32,7 @@
from synapse.util.stringutils import random_string

from tests import unittest
from tests.test_utils import event_injection
from tests.test_utils import event_injection, make_awaitable

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -449,3 +451,121 @@ def test_invalid_nested(self) -> None:
},
RoomVersions.V6,
)


class PartialJoinTestCase(unittest.FederatingHomeserverTestCase):
def test_failed_partial_join_is_clean(self) -> None:
"""
Tests that, when failing to partial-join a room, we don't get stuck with
a partial-state flag on a room.
"""

fed_handler = self.hs.get_federation_handler()
fed_client = fed_handler.federation_client

room_id = "!room:example.com"
membership_event = make_event_from_dict(
{
"room_id": room_id,
"type": "m.room.member",
"sender": "@alice:test",
"state_key": "@alice:test",
"content": {"membership": "join"},
},
RoomVersions.V10,
)
reivilibre marked this conversation as resolved.
Show resolved Hide resolved

mock_make_membership_event = Mock(
return_value=make_awaitable(
(
"example.com",
membership_event,
RoomVersions.V10,
)
)
)

EVENT_CREATE = make_event_from_dict(
{
"room_id": room_id,
"type": "m.room.create",
"sender": "@kristina:example.com",
"state_key": "",
"depth": 0,
"content": {"creator": "@kristina:example.com", "room_version": "10"},
"auth_events": [],
"origin_server_ts": 1,
},
room_version=RoomVersions.V10,
)
EVENT_CREATOR_MEMBERSHIP = make_event_from_dict(
{
"room_id": room_id,
"type": "m.room.member",
"sender": "@kristina:example.com",
"state_key": "@kristina:example.com",
"content": {"membership": "join"},
"depth": 1,
"prev_events": [EVENT_CREATE.event_id],
"auth_events": [EVENT_CREATE.event_id],
"origin_server_ts": 1,
},
room_version=RoomVersions.V10,
)
EVENT_INVITATION_MEMBERSHIP = make_event_from_dict(
{
"room_id": room_id,
"type": "m.room.member",
"sender": "@kristina:example.com",
"state_key": "@alice:test",
"content": {"membership": "invite"},
"depth": 2,
"prev_events": [EVENT_CREATOR_MEMBERSHIP.event_id],
"auth_events": [
EVENT_CREATE.event_id,
EVENT_CREATOR_MEMBERSHIP.event_id,
],
"origin_server_ts": 1,
},
room_version=RoomVersions.V10,
)
mock_send_join = Mock(
return_value=make_awaitable(
SendJoinResult(
membership_event,
"example.com",
state=[
EVENT_CREATE,
EVENT_CREATOR_MEMBERSHIP,
EVENT_INVITATION_MEMBERSHIP,
],
auth_chain=[
EVENT_CREATE,
EVENT_CREATOR_MEMBERSHIP,
EVENT_INVITATION_MEMBERSHIP,
],
partial_state=True,
servers_in_room=["example.com"],
)
)
)

with patch.object(
fed_client, "make_membership_event", mock_make_membership_event
), patch.object(fed_client, "send_join", mock_send_join):
# Join and check that our join event is rejected
# (The join event is rejected because it doesn't have any signatures)
join_exc = self.get_failure(
fed_handler.do_invite_join(["example.com"], room_id, "@alice:test", {}),
SynapseError,
)
self.assertIn("Join event was rejected", str(join_exc))

store = self.hs.get_datastores().main

# Check that we don't have a left-over partial_state entry.
self.assertFalse(
self.get_success(store.is_partial_state_room(room_id)),
f"Stale partial-stated room flag left over for {room_id} after a"
f" failed do_invite_join!",
)