From ad5d75e367ba883f410157b72a7ebae1ee72ecf3 Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Wed, 20 Jul 2022 21:09:51 +0100 Subject: [PATCH 01/10] Skip soft fail checks for rooms with partial state When a room has the partial state flag, we may not have an accurate `m.room.member` event for event senders in the room's current state, and so cannot perform soft fail checks correctly. Skip the soft fail check entirely in this case. As an alternative, we could block until we have full state, but that would prevent us from receiving incoming events over federation, which is undesirable. Signed-off-by: Sean Quah --- changelog.d/13354.misc | 1 + synapse/handlers/federation_event.py | 10 ++++++++++ 2 files changed, 11 insertions(+) create mode 100644 changelog.d/13354.misc diff --git a/changelog.d/13354.misc b/changelog.d/13354.misc new file mode 100644 index 000000000000..e08ee7866a32 --- /dev/null +++ b/changelog.d/13354.misc @@ -0,0 +1 @@ +Faster room joins: skip soft fail checks while Synapse only has partial room state, since the current membership of event senders may not be accurately known. diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index a5f4ce7c8af9..9d9f1696f247 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -1664,11 +1664,21 @@ async def _check_for_soft_fail( """Checks if we should soft fail the event; if so, marks the event as such. + Does nothing for events in rooms with partial state, since we may not have an + accurate membership event for the sender in the current state. + Args: event state_ids: The state at the event if we don't have all the event's prev events origin: The host the event originates from. """ + if await self._store.is_partial_state_room(event.room_id): + # We might not know the sender's membership in the current state, so don't + # soft fail anything. Even if we do have a membership for the sender in the + # current state, it may have been derived from state resolution between + # partial and full state and may not be accurate. + return + extrem_ids_list = await self._store.get_latest_event_ids_in_room(event.room_id) extrem_ids = set(extrem_ids_list) prev_event_ids = set(event.prev_event_ids()) From ff8f6cf2ccfb1ddf203a8f41de3402d599938caf Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Fri, 8 Jul 2022 15:02:12 +0100 Subject: [PATCH 02/10] Make the `partial_state` parameter to `compute_event_context` stricter Previously, `partial_state=False` either meant "full state" or "calculate the flag yourself", depending on whether `state_ids_before_event` was provided. Split out the latter meaning into `partial_state=None`. Signed-off-by: Sean Quah --- synapse/handlers/federation_event.py | 3 +++ synapse/handlers/message.py | 4 ++++ synapse/state/__init__.py | 18 ++++++++++++------ tests/storage/test_events.py | 7 ++++++- tests/test_state.py | 2 ++ 5 files changed, 27 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 9d9f1696f247..f85ef14c4fe5 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -540,6 +540,8 @@ async def update_state_for_partial_state_event( context = await self._state_handler.compute_event_context( event, state_ids_before_event=state_ids, + # TODO(faster_joins): Calculate the partial_state flag correctly. + partial_state=None if state_ids is None else False, ) if context.partial_state: # this can happen if some or all of the event's prev_events still have @@ -1132,6 +1134,7 @@ async def _process_received_pdu( context = await self._state_handler.compute_event_context( event, state_ids_before_event=state_ids, + partial_state=None if state_ids is None else False, ) try: await self._check_event_auth(origin, event, context) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index bd7baef0510b..e0bcc40b9389 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1135,6 +1135,10 @@ async def create_new_client_event( context = await self.state.compute_event_context( event, state_ids_before_event=state_map_for_event, + # TODO(faster_joins): check how MSC2716 works and whether we can have + # partial state here + # https://github.com/matrix-org/synapse/issues/13003 + partial_state=False, ) else: context = await self.state.compute_event_context(event) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 87ccd52f0ab7..69834de0de7c 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -255,7 +255,7 @@ async def compute_event_context( self, event: EventBase, state_ids_before_event: Optional[StateMap[str]] = None, - partial_state: bool = False, + partial_state: Optional[bool] = None, ) -> EventContext: """Build an EventContext structure for a non-outlier event. @@ -270,8 +270,12 @@ async def compute_event_context( it can't be calculated from existing events. This is normally only specified when receiving an event from federation where we don't have the prev events, e.g. when backfilling. - partial_state: True if `state_ids_before_event` is partial and omits - non-critical membership events + partial_state: + `True` if `state_ids_before_event` is partial and omits non-critical + membership events. + `False` if `state_ids_before_event` is the full state. + `None` when `state_ids_before_event` is not provided. In this case, the + flag will be calculated based on `event`'s prev events. Returns: The event context. """ @@ -298,12 +302,14 @@ async def compute_event_context( ) ) + # the partial_state flag must be provided + assert partial_state is not None else: # otherwise, we'll need to resolve the state across the prev_events. # partial_state should not be set explicitly in this case: # we work it out dynamically - assert not partial_state + assert partial_state is None # if any of the prev-events have partial state, so do we. # (This is slightly racy - the prev-events might get fixed up before we use @@ -313,13 +319,13 @@ async def compute_event_context( incomplete_prev_events = await self.store.get_partial_state_events( prev_event_ids ) - if any(incomplete_prev_events.values()): + partial_state = any(incomplete_prev_events.values()) + if partial_state: logger.debug( "New/incoming event %s refers to prev_events %s with partial state", event.event_id, [k for (k, v) in incomplete_prev_events.items() if v], ) - partial_state = True logger.debug("calling resolve_state_groups from compute_event_context") # we've already taken into account partial state, so no need to wait for diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index 2ff88e64a59c..3ce4f35cb74d 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -70,7 +70,11 @@ def prepare(self, reactor, clock, homeserver): def persist_event(self, event, state=None): """Persist the event, with optional state""" context = self.get_success( - self.state.compute_event_context(event, state_ids_before_event=state) + self.state.compute_event_context( + event, + state_ids_before_event=state, + partial_state=None if state is None else False, + ) ) self.get_success(self._persistence.persist_event(event, context)) @@ -148,6 +152,7 @@ def test_do_not_prune_gap_if_state_different(self): self.state.compute_event_context( remote_event_2, state_ids_before_event=state_before_gap, + partial_state=False, ) ) diff --git a/tests/test_state.py b/tests/test_state.py index bafd6d1750ce..504530b49a8e 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -462,6 +462,7 @@ def test_annotate_with_old_message(self): state_ids_before_event={ (e.type, e.state_key): e.event_id for e in old_state }, + partial_state=False, ) ) @@ -492,6 +493,7 @@ def test_annotate_with_old_state(self): state_ids_before_event={ (e.type, e.state_key): e.event_id for e in old_state }, + partial_state=False, ) ) From f7c9a5a2dad8ceee188be55a8e42788e126139c6 Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Fri, 8 Jul 2022 15:28:11 +0100 Subject: [PATCH 03/10] Add `partial_state` parameter to `_process_received_pdu` Signed-off-by: Sean Quah --- synapse/handlers/federation_event.py | 28 ++++++++++++++++++++++------ tests/handlers/test_federation.py | 1 + 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index f85ef14c4fe5..54868bf541a7 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -278,7 +278,9 @@ async def on_receive_pdu(self, origin: str, pdu: EventBase) -> None: ) try: - await self._process_received_pdu(origin, pdu, state_ids=None) + await self._process_received_pdu( + origin, pdu, state_ids=None, partial_state=None + ) except PartialStateConflictError: # The room was un-partial stated while we were processing the PDU. # Try once more, with full state this time. @@ -286,7 +288,9 @@ async def on_receive_pdu(self, origin: str, pdu: EventBase) -> None: "Room %s was un-partial stated while processing the PDU, trying again.", room_id, ) - await self._process_received_pdu(origin, pdu, state_ids=None) + await self._process_received_pdu( + origin, pdu, state_ids=None, partial_state=None + ) async def on_send_membership_event( self, origin: str, event: EventBase @@ -814,7 +818,11 @@ async def _process_pulled_event( # https://github.com/matrix-org/synapse/issues/13002 await self._process_received_pdu( - origin, event, state_ids=state_ids, backfilled=backfilled + origin, + event, + state_ids=state_ids, + partial_state=False, + backfilled=backfilled, ) except FederationError as e: if e.code == 403: @@ -1096,6 +1104,7 @@ async def _process_received_pdu( origin: str, event: EventBase, state_ids: Optional[StateMap[str]], + partial_state: Optional[bool], backfilled: bool = False, ) -> None: """Called when we have a new non-outlier event. @@ -1119,14 +1128,21 @@ async def _process_received_pdu( state_ids: Normally None, but if we are handling a gap in the graph (ie, we are missing one or more prev_events), the resolved state at the - event. Must not be partial state. + event + + partial_state: + `True` if `state_ids` is partial and omits non-critical membership + events. + `False` if `state_ids` is the full state. + `None` if `state_ids` is not provided. In this case, the flag will be + calculated based on `event`'s prev events. backfilled: True if this is part of a historical batch of events (inhibits notification to clients, and validation of device keys.) PartialStateConflictError: if the room was un-partial stated in between computing the state at the event and persisting it. The caller should retry - exactly once in this case. Will never be raised if `state_ids` is provided. + exactly once in this case. """ logger.debug("Processing event: %s", event) assert not event.internal_metadata.outlier @@ -1134,7 +1150,7 @@ async def _process_received_pdu( context = await self._state_handler.compute_event_context( event, state_ids_before_event=state_ids, - partial_state=None if state_ids is None else False, + partial_state=partial_state, ) try: await self._check_event_auth(origin, event, context) diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py index 8a0bb91f40ce..fb06e5e81266 100644 --- a/tests/handlers/test_federation.py +++ b/tests/handlers/test_federation.py @@ -287,6 +287,7 @@ def test_backfill_with_many_backward_extremities(self) -> None: state_ids={ (e.type, e.state_key): e.event_id for e in current_state }, + partial_state=False, ) ) From bca5a17584d5e346863bd769b9e3e8740d11fda9 Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Fri, 8 Jul 2022 15:34:48 +0100 Subject: [PATCH 04/10] Add `await_full_state` parameter to `get_state_groups_ids`, so that it can return partial state Signed-off-by: Sean Quah --- synapse/storage/controllers/state.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index e08f956e6ef7..20805c94fa19 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -82,13 +82,15 @@ async def get_state_group_delta( return state_group_delta.prev_group, state_group_delta.delta_ids async def get_state_groups_ids( - self, _room_id: str, event_ids: Collection[str] + self, _room_id: str, event_ids: Collection[str], await_full_state: bool = True ) -> Dict[int, MutableStateMap[str]]: """Get the event IDs of all the state for the state groups for the given events Args: _room_id: id of the room for these events event_ids: ids of the events + await_full_state: if `True`, will block if we do not yet have complete + state at these events. Returns: dict of state_group_id -> (dict of (type, state_key) -> event id) @@ -100,7 +102,9 @@ async def get_state_groups_ids( if not event_ids: return {} - event_to_groups = await self.get_state_group_for_events(event_ids) + event_to_groups = await self.get_state_group_for_events( + event_ids, await_full_state=await_full_state + ) groups = set(event_to_groups.values()) group_to_state = await self.stores.state._get_state_for_groups(groups) From 1d48d238c1623a12030945fbf6ff4309b99254df Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Fri, 8 Jul 2022 15:44:43 +0100 Subject: [PATCH 05/10] Allow `_resolve_state_at_missing_prevs` to return partial state, otherwise we can block when receiving or backfilling events over federation Signed-off-by: Sean Quah --- synapse/handlers/federation_event.py | 50 ++++++++++++++++------------ 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 54868bf541a7..f787afa33b2d 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -538,16 +538,20 @@ async def update_state_for_partial_state_event( # # This is the same operation as we do when we receive a regular event # over federation. - state_ids = await self._resolve_state_at_missing_prevs(destination, event) - - # build a new state group for it if need be - context = await self._state_handler.compute_event_context( - event, - state_ids_before_event=state_ids, - # TODO(faster_joins): Calculate the partial_state flag correctly. - partial_state=None if state_ids is None else False, + state_ids, partial_state = await self._resolve_state_at_missing_prevs( + destination, event ) - if context.partial_state: + + context = None + if not partial_state: + # build a new state group for it if need be + context = await self._state_handler.compute_event_context( + event, + state_ids_before_event=state_ids, + partial_state=partial_state, + ) + + if context is None or context.partial_state: # this can happen if some or all of the event's prev_events still have # partial state - ie, an event has an earlier stream_ordering than one # or more of its prev_events, so we de-partial-state it before its @@ -812,16 +816,12 @@ async def _process_pulled_event( return try: - state_ids = await self._resolve_state_at_missing_prevs(origin, event) - # TODO(faster_joins): make sure that _resolve_state_at_missing_prevs does - # not return partial state - # https://github.com/matrix-org/synapse/issues/13002 - + state_ids, partial_state = await self._resolve_state_at_missing_prevs(origin, event) await self._process_received_pdu( origin, event, state_ids=state_ids, - partial_state=False, + partial_state=partial_state, backfilled=backfilled, ) except FederationError as e: @@ -832,7 +832,7 @@ async def _process_pulled_event( async def _resolve_state_at_missing_prevs( self, dest: str, event: EventBase - ) -> Optional[StateMap[str]]: + ) -> Tuple[Optional[StateMap[str]], Optional[bool]]: """Calculate the state at an event with missing prev_events. This is used when we have pulled a batch of events from a remote server, and @@ -859,8 +859,10 @@ async def _resolve_state_at_missing_prevs( event: an event to check for missing prevs. Returns: - if we already had all the prev events, `None`. Otherwise, returns - the event ids of the state at `event`. + if we already had all the prev events, `None, None`. Otherwise, returns a + tuple containing: + * the event ids of the state at `event`. + * a boolean indicating whether the state may be partial. Raises: FederationError if we fail to get the state from the remote server after any @@ -874,7 +876,7 @@ async def _resolve_state_at_missing_prevs( missing_prevs = prevs - seen if not missing_prevs: - return None + return None, None logger.info( "Event %s is missing prev_events %s: calculating state for a " @@ -886,9 +888,15 @@ async def _resolve_state_at_missing_prevs( # resolve them to find the correct state at the current event. try: + # Determine whether we may be about to retrieve partial state + # Events may be un-partial stated right after we compute the partial state + # flag, but that's okay, as long as the flag errs on the conservative side. + partial_state_flags = await self._store.get_partial_state_events(seen) + partial_state = any(partial_state_flags.values()) + # Get the state of the events we know about ours = await self._state_storage_controller.get_state_groups_ids( - room_id, seen + room_id, seen, await_full_state=False ) # state_maps is a list of mappings from (type, state_key) to event_id @@ -934,7 +942,7 @@ async def _resolve_state_at_missing_prevs( "We can't get valid state history.", affected=event_id, ) - return state_map + return state_map, partial_state async def _get_state_ids_after_missing_prev_event( self, From 65172100bfae6877391ec36d9673ef790329a9e9 Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Fri, 8 Jul 2022 15:50:17 +0100 Subject: [PATCH 06/10] Retry persisting events from federation when we lose the partial state race Now that we persist events with partial state, we have to handle `PartialStateConflictError`s, by retrying just once. Signed-off-by: Sean Quah --- synapse/handlers/federation_event.py | 34 +++++++++++++++++++++------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index f787afa33b2d..cb7d27e81c71 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -816,14 +816,32 @@ async def _process_pulled_event( return try: - state_ids, partial_state = await self._resolve_state_at_missing_prevs(origin, event) - await self._process_received_pdu( - origin, - event, - state_ids=state_ids, - partial_state=partial_state, - backfilled=backfilled, - ) + try: + state_ids, partial_state = await self._resolve_state_at_missing_prevs( + origin, event + ) + await self._process_received_pdu( + origin, + event, + state_ids=state_ids, + partial_state=partial_state, + backfilled=backfilled, + ) + except PartialStateConflictError: + # The room was un-partial stated while we were processing the event. + # Try once more, with full state this time. + state_ids, partial_state = await self._resolve_state_at_missing_prevs( + origin, event + ) + # We ought to have full state now, barring some unlikely race where we left and + # rejoned the room in the background. + await self._process_received_pdu( + origin, + event, + state_ids=state_ids, + partial_state=partial_state, + backfilled=backfilled, + ) except FederationError as e: if e.code == 403: logger.warning("Pulled event %s failed history check.", event_id) From b879d8c8dd13ac62adc0da403f5eac5c3c228f37 Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Thu, 21 Jul 2022 20:35:55 +0100 Subject: [PATCH 07/10] Add newsfile --- changelog.d/13355.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/13355.misc diff --git a/changelog.d/13355.misc b/changelog.d/13355.misc new file mode 100644 index 000000000000..7715075885c7 --- /dev/null +++ b/changelog.d/13355.misc @@ -0,0 +1 @@ +Faster room joins: avoid blocking when pulling events with partially missing prev events. From 7d55d8859dcb58318c4778ef9b1cc029cdea502c Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Fri, 22 Jul 2022 15:48:36 +0100 Subject: [PATCH 08/10] Raise an error if we still have partial state on the 2nd try --- synapse/handlers/federation_event.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index cb7d27e81c71..1e8ad4244e2e 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -833,8 +833,15 @@ async def _process_pulled_event( state_ids, partial_state = await self._resolve_state_at_missing_prevs( origin, event ) + # We ought to have full state now, barring some unlikely race where we left and # rejoned the room in the background. + if state_ids is not None and partial_state: + raise AssertionError( + f"Event {event.event_id} still has a partial resolved state " + f"after room {event.room_id} was un-partial stated" + ) + await self._process_received_pdu( origin, event, From 7870d44b320b558bdbeec7185c77738d3705cef1 Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Fri, 22 Jul 2022 16:13:33 +0100 Subject: [PATCH 09/10] Add comment for update_state_for_partial_state_event optimisation --- synapse/handlers/federation_event.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 1e8ad4244e2e..c62fce08a2c7 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -542,8 +542,24 @@ async def update_state_for_partial_state_event( destination, event ) + # There are three possible cases for (state_ids, partial_state): + # * `state_ids` and `partial_state` are both `None` if we had all the + # prev_events. The prev_events may or may not have partial state and + # we won't know until we compute the event context. + # * `state_ids` is not `None` and `partial_state` is `False` if we were + # missing any prev_events. We calculated the full state after the + # prev_events. + # * `state_ids` is not `None` and `partial_state` is `True` if we were + # missing some, but not all, prev_events. At least one of the + # prev_events we did have had partial state, so we calculated a partial + # state after the prev_events. + context = None - if not partial_state: + if state_ids is not None and partial_state: + # the state after the prev events is still partial. We can't de-partial + # state the event, so don't bother building the event context. + pass + else: # build a new state group for it if need be context = await self._state_handler.compute_event_context( event, From 765614f5b90676595a7a3c91266cc1868610e2fd Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Mon, 25 Jul 2022 15:32:42 +0100 Subject: [PATCH 10/10] Update synapse/handlers/federation_event.py Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/handlers/federation_event.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index c62fce08a2c7..f823c099c1e8 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -547,8 +547,8 @@ async def update_state_for_partial_state_event( # prev_events. The prev_events may or may not have partial state and # we won't know until we compute the event context. # * `state_ids` is not `None` and `partial_state` is `False` if we were - # missing any prev_events. We calculated the full state after the - # prev_events. + # missing some prev_events (but we have full state for any we did + # have). We calculated the full state after the prev_events. # * `state_ids` is not `None` and `partial_state` is `True` if we were # missing some, but not all, prev_events. At least one of the # prev_events we did have had partial state, so we calculated a partial