From f30302db588c0865cba8d1bc966939ab3bac4185 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 18 Oct 2021 15:24:19 -0500 Subject: [PATCH 01/47] Scratch debugging why events appear out of order on remote homeservers --- synapse/events/utils.py | 17 ++-- synapse/handlers/federation.py | 10 +- synapse/handlers/room_batch.py | 9 +- .../databases/main/event_federation.py | 94 +++++++++++++++---- 4 files changed, 100 insertions(+), 30 deletions(-) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 23bd24d96394..895835abee28 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -283,13 +283,13 @@ def format_event_for_client_v1(d: JsonDict) -> JsonDict: def format_event_for_client_v2(d: JsonDict) -> JsonDict: drop_keys = ( - "auth_events", - "prev_events", - "hashes", - "signatures", - "depth", - "origin", - "prev_state", + # "auth_events", + # "prev_events", + # "hashes", + # "signatures", + # "depth", + # "origin", + # "prev_state", ) for key in drop_keys: d.pop(key, None) @@ -340,6 +340,9 @@ def serialize_event( d["event_id"] = e.event_id + # TODO: Remove + d["stream_ordering"] = e.internal_metadata.stream_ordering + if "age_ts" in d["unsigned"]: d["unsigned"]["age"] = time_now_ms - d["unsigned"]["age_ts"] del d["unsigned"]["age_ts"] diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index e072efad161f..355291ff452e 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -148,14 +148,14 @@ async def _maybe_backfill_inner( insertion_events_to_be_backfilled = ( await self.store.get_insertion_event_backwards_extremities_in_room(room_id) ) - logger.debug( + logger.info( "_maybe_backfill_inner: extremities oldest_events_with_depth=%s insertion_events_to_be_backfilled=%s", oldest_events_with_depth, insertion_events_to_be_backfilled, ) if not oldest_events_with_depth and not insertion_events_to_be_backfilled: - logger.debug("Not backfilling as no extremeties found.") + logger.info("Not backfilling as no extremeties found.") return False # We only want to paginate if we can actually see the events we'll get, @@ -203,7 +203,7 @@ async def _maybe_backfill_inner( redact=False, check_history_visibility_only=True, ) - logger.debug( + logger.info( "_maybe_backfill_inner: filtered_extremities %s", filtered_extremities ) @@ -230,7 +230,7 @@ async def _maybe_backfill_inner( # much larger factor will result in triggering a backfill request much # earlier than necessary. if current_depth - 2 * limit > max_depth: - logger.debug( + logger.info( "Not backfilling as we don't need to. %d < %d - 2 * %d", max_depth, current_depth, @@ -249,7 +249,7 @@ async def _maybe_backfill_inner( t for t in sorted_extremeties_tuple if int(t[1]) <= current_depth ] - logger.debug( + logger.info( "room_id: %s, backfill: current_depth: %s, limit: %s, max_depth: %s, extrems: %s filtered_sorted_extremeties_tuple: %s", room_id, current_depth, diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py index 2f5a3e4d193d..88c7d4b00140 100644 --- a/synapse/handlers/room_batch.py +++ b/synapse/handlers/room_batch.py @@ -353,13 +353,20 @@ async def persist_historical_events( # Events are sorted by (topological_ordering, stream_ordering) # where topological_ordering is just depth. for (event, context) in reversed(events_to_persist): - await self.event_creation_handler.handle_new_client_event( + result_event = await self.event_creation_handler.handle_new_client_event( await self.create_requester_for_user_id_from_app_service( event["sender"], app_service_requester.app_service ), event=event, context=context, ) + logger.info( + "result_event depth=%s stream_ordering=%s event_id=%s body=%s", + result_event.depth, + result_event.internal_metadata.stream_ordering, + result_event.event_id, + result_event.content.get("body", None), + ) return event_ids diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 10184d6ae762..c857158648d3 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1014,19 +1014,22 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): # Look for the prev_event_id connected to the given event_id query = """ - SELECT depth, prev_event_id FROM event_edges - /* Get the depth of the prev_event_id from the events table */ + SELECT depth, stream_ordering, prev_event_id FROM event_edges + /* Get the depth and stream_ordering of the prev_event_id from the events table */ INNER JOIN events ON prev_event_id = events.event_id - /* Find an event which matches the given event_id */ + /* Look for an edge which matches the given event_id */ WHERE event_edges.event_id = ? AND event_edges.is_state = ? + /* Because we can have many events at the same depth, + * we want to also tie-break and sort on stream_ordering */ + ORDER BY depth DESC, stream_ordering DESC LIMIT ? """ # Look for the "insertion" events connected to the given event_id connected_insertion_event_query = """ - SELECT e.depth, i.event_id FROM insertion_event_edges AS i + SELECT e.depth, e.stream_ordering, i.event_id FROM insertion_event_edges AS i /* Get the depth of the insertion event from the events table */ INNER JOIN events AS e USING (event_id) /* Find an insertion event which points via prev_events to the given event_id */ @@ -1036,7 +1039,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): # Find any batch connections of a given insertion event batch_connection_query = """ - SELECT e.depth, c.event_id FROM insertion_events AS i + SELECT e.depth, e.stream_ordering, c.event_id FROM insertion_events AS i /* Find the batch that connects to the given insertion event */ INNER JOIN batch_events AS c ON i.next_batch_id = c.batch_id @@ -1055,26 +1058,68 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): queue = PriorityQueue() for event_id in event_list: - depth = self.db_pool.simple_select_one_onecol_txn( + event_lookup_result = self.db_pool.simple_select_one_txn( txn, table="events", keyvalues={"event_id": event_id, "room_id": room_id}, - retcol="depth", + retcols=( + "depth", + "stream_ordering", + ), allow_none=True, ) - if depth: - queue.put((-depth, event_id)) + if event_lookup_result["depth"]: + queue.put( + ( + -event_lookup_result["depth"], + -event_lookup_result["stream_ordering"], + event_id, + ) + ) while not queue.empty() and len(event_results) < limit: try: - _, event_id = queue.get_nowait() + _, _, event_id = queue.get_nowait() except Empty: break if event_id in event_results: continue + event_lookup_result = self.db_pool.simple_select_one_txn( + txn, + table="events", + keyvalues={"event_id": event_id}, + retcols=["type", "depth", "stream_ordering", "content"], + allow_none=True, + ) + + event_json_lookup_result = self.db_pool.simple_select_one_onecol_txn( + txn, + table="event_json", + keyvalues={"event_id": event_id}, + retcol="json", + allow_none=True, + ) + + ev = db_to_json(event_json_lookup_result) + + if event_lookup_result: + logger.info( + "_get_backfill_events: event_results add event_id=%s type=%s depth=%s stream_ordering=%s content=%s", + event_id, + ev["type"], + ev["depth"], + event_lookup_result["stream_ordering"], + ev["content"].get("body", None), + ) + else: + logger.info( + "_get_backfill_events: event_results event_id=%s failed to lookup", + event_id, + ) + event_results.add(event_id) # Try and find any potential historical batches of message history. @@ -1094,8 +1139,15 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): ) for row in connected_insertion_event_id_results: connected_insertion_event_depth = row[0] - connected_insertion_event = row[1] - queue.put((-connected_insertion_event_depth, connected_insertion_event)) + connected_insertion_event_stream_ordering = row[1] + connected_insertion_event = row[2] + queue.put( + ( + -connected_insertion_event_depth, + -connected_insertion_event_stream_ordering, + connected_insertion_event, + ) + ) # Find any batch connections for the given insertion event txn.execute( @@ -1108,18 +1160,26 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): batch_start_event_id_results, ) for row in batch_start_event_id_results: - if row[1] not in event_results: - queue.put((-row[0], row[1])) + if row[2] not in event_results: + queue.put((-row[0], -row[1], row[2])) txn.execute(query, (event_id, False, limit - len(event_results))) prev_event_id_results = txn.fetchall() - logger.debug( + logger.info( "_get_backfill_events: prev_event_ids %s", prev_event_id_results ) + # TODO: Find out why stream_ordering is all out of order compared to + # when we persisted the events + + # TODO: We should probably skip adding the event itself if we + # branched off onto the insertion event first above. Need to make this a + # bit smart so it doesn't skip over the event altogether if we're at + # the end of the historical messages. + for row in prev_event_id_results: - if row[1] not in event_results: - queue.put((-row[0], row[1])) + if row[2] not in event_results: + queue.put((-row[0], -row[1], row[2])) return event_results From 438e2226cca4ace0730faaca6b538384d6975dcc Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 18 Oct 2021 16:40:01 -0500 Subject: [PATCH 02/47] Use OrderedDict to gurantee order returned is the same as we were building the list We are using a Dict over a list to gurantee we don't duplicate the event if it's already in there. I assume this is why we were using a Set before. --- synapse/handlers/federation_event.py | 12 ++++++++++++ synapse/storage/databases/main/event_federation.py | 8 ++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 0e455678aaf4..5e068cee097a 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -416,6 +416,18 @@ async def backfill( events = await self._federation_client.backfill( dest, room_id, limit=limit, extremities=extremities ) + logger.info( + "from remote server: got backfill response events=%s", + [ + { + "event_id": ev.event_id, + "type": ev["type"], + "depth": ev["depth"], + "content": ev["content"].get("body", None), + } + for ev in events + ], + ) if not events: return diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index c857158648d3..b9c48eea5631 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -14,7 +14,7 @@ import itertools import logging from queue import Empty, PriorityQueue -from typing import Collection, Dict, Iterable, List, Optional, Set, Tuple +from typing import Collection, Dict, Iterable, List, Optional, Set, OrderedDict, Tuple from prometheus_client import Counter, Gauge @@ -1007,7 +1007,7 @@ async def get_backfill_events(self, room_id: str, event_list: list, limit: int): def _get_backfill_events(self, txn, room_id, event_list, limit): logger.debug("_get_backfill_events: %s, %r, %s", room_id, event_list, limit) - event_results = set() + event_results = OrderedDict() # We want to make sure that we do a breadth-first, "depth" ordered # search. @@ -1120,7 +1120,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): event_id, ) - event_results.add(event_id) + event_results[event_id] = event_id # Try and find any potential historical batches of message history. # @@ -1181,7 +1181,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): if row[2] not in event_results: queue.put((-row[0], -row[1], row[2])) - return event_results + return event_results.values() async def get_missing_events(self, room_id, earliest_events, latest_events, limit): ids = await self.db_pool.runInteraction( From 49837391561067bdc23cab35e8a3067b693f90e9 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 18 Oct 2021 19:22:40 -0500 Subject: [PATCH 03/47] Avoid constant missing prev_event fetching while backfilling Persist backfilled event response from oldest -> newest to avoid having to go fetch missing prev_events which de-outliers every other event and screws up the stream_ordering. Missing prev_events aren't fetched as "backfilled" so the stream_ordering was incrementing. This helps us in MSC2716 land where we can more easily copy a similar stream_ordering that the originating homeserver has. --- synapse/handlers/federation_event.py | 32 ++++++++++++++++++- synapse/rest/client/room.py | 2 ++ .../databases/main/event_federation.py | 16 ++++------ synapse/storage/databases/main/events.py | 8 +++++ synapse/storage/databases/main/stream.py | 2 ++ synapse/storage/persist_events.py | 5 +++ 6 files changed, 55 insertions(+), 10 deletions(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 5e068cee097a..80b314ec148e 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -441,7 +441,37 @@ async def backfill( f"room {ev.room_id}, when we were backfilling in {room_id}" ) - await self._process_pulled_events(dest, events, backfilled=True) + await self._process_pulled_events( + dest, + # The /backfill response should start from `?v` and include the + # events that preceded it (so the list will be newest -> oldest). We + # reverse that order so the messages are oldest -> newest and we can + # persist the backfilled events without constantly have to go fetch + # missing prev_events which are probably included in the same + # backfill chunk. + reversed(events), + backfilled=True, + ) + + for ev in events: + event_after_persisted = await self._store.get_event( + ev.event_id, allow_none=True + ) + + if event_after_persisted: + logger.info( + "from remote server: processed backfilled event_id=%s type=%s depth=%s stream_ordering=%s content=%s", + ev.event_id, + event_after_persisted["type"], + event_after_persisted["depth"], + event_after_persisted.internal_metadata.stream_ordering, + event_after_persisted["content"].get("body", None), + ) + else: + logger.info( + "from remote server: processed backfilled event_id=%s failed to lookup", + ev.event_id, + ) async def _get_missing_events_for_pdu( self, origin: str, pdu: EventBase, prevs: Set[str], min_depth: int diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index ed95189b6d8b..43cfb46c068b 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -561,6 +561,7 @@ async def on_GET( pagination_config = await PaginationConfig.from_request( self.store, request, default_limit=10 ) + logger.info("/messages rest start pagination_config=%s", pagination_config) # Twisted will have processed the args by now. assert request.args is not None as_client_event = b"raw" not in request.args @@ -585,6 +586,7 @@ async def on_GET( event_filter=event_filter, ) + logger.info("/messages rest end msgs=%s", msgs) return 200, msgs diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index b9c48eea5631..3d20bb884566 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1141,13 +1141,14 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): connected_insertion_event_depth = row[0] connected_insertion_event_stream_ordering = row[1] connected_insertion_event = row[2] - queue.put( - ( - -connected_insertion_event_depth, - -connected_insertion_event_stream_ordering, - connected_insertion_event, + if connected_insertion_event not in event_results: + queue.put( + ( + -connected_insertion_event_depth, + -connected_insertion_event_stream_ordering, + connected_insertion_event, + ) ) - ) # Find any batch connections for the given insertion event txn.execute( @@ -1169,9 +1170,6 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): "_get_backfill_events: prev_event_ids %s", prev_event_id_results ) - # TODO: Find out why stream_ordering is all out of order compared to - # when we persisted the events - # TODO: We should probably skip adding the event itself if we # branched off onto the insertion event first above. Need to make this a # bit smart so it doesn't skip over the event altogether if we're at diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 37439f85628e..56d265213290 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -169,6 +169,14 @@ async def _persist_events_and_state_updates( async with stream_ordering_manager as stream_orderings: for (event, _), stream in zip(events_and_contexts, stream_orderings): + logger.info( + "_persist_events_and_state_updates backfilled=%s event_id=%s depth=%s stream_ordering=%s content=%s", + backfilled, + event.event_id, + event.depth, + stream, + event["content"].get("body", None), + ) event.internal_metadata.stream_ordering = stream await self.db_pool.runInteraction( diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index dc7884b1c0c3..d15c38d4b783 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -1166,6 +1166,7 @@ def _paginate_room_events_txn( "order": order, } + logger.info("stream: getting events sql=%s args=%s", sql, args) txn.execute(sql, args) # Filter the result set. @@ -1236,6 +1237,7 @@ async def paginate_room_events( event_filter, ) + logger.info("paginate_room_events event_ids(%d)=%s", len(rows), [r.event_id for r in rows]) events = await self.get_events_as_list( [r.event_id for r in rows], get_prev_content=True ) diff --git a/synapse/storage/persist_events.py b/synapse/storage/persist_events.py index 0e8270746d78..58d1c08906b5 100644 --- a/synapse/storage/persist_events.py +++ b/synapse/storage/persist_events.py @@ -309,6 +309,11 @@ async def persist_events( matched the transcation ID; the existing event is returned in such a case. """ + # logger.info( + # "persist_events backfilled=%s events_and_contexts=%s", + # backfilled, + # events_and_contexts, + # ) partitioned: Dict[str, List[Tuple[EventBase, EventContext]]] = {} for event, ctx in events_and_contexts: partitioned.setdefault(event.room_id, []).append((event, ctx)) From a64bb2e81120159b1387e6c458a220b8182423f5 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 18 Oct 2021 19:30:43 -0500 Subject: [PATCH 04/47] Add changelog --- changelog.d/11114.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/11114.bugfix diff --git a/changelog.d/11114.bugfix b/changelog.d/11114.bugfix new file mode 100644 index 000000000000..c6e65df97f90 --- /dev/null +++ b/changelog.d/11114.bugfix @@ -0,0 +1 @@ +Fix [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) historical messages backfilling in random order on remote homeservers. From 260ca06f0f1e3c22e386185b835d44544d44e264 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 19 Oct 2021 01:55:40 -0500 Subject: [PATCH 05/47] Some more trials of trying to get many many events to backfill in order on remote --- synapse/handlers/federation_event.py | 11 +++++++++-- synapse/rest/client/room.py | 1 - synapse/storage/databases/main/stream.py | 2 -- synapse/storage/persist_events.py | 5 ----- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 80b314ec148e..d9fe4a430cd2 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -449,7 +449,9 @@ async def backfill( # persist the backfilled events without constantly have to go fetch # missing prev_events which are probably included in the same # backfill chunk. - reversed(events), + # TODO: If we try to reverse this list, the stream_ordering will be backwards + # reversed(events), + events, backfilled=True, ) @@ -1271,7 +1273,12 @@ def prep(event: EventBase) -> Optional[Tuple[EventBase, EventContext]]: return event, context events_to_persist = (x for x in (prep(event) for event in fetched_events) if x) - await self.persist_events_and_notify(room_id, tuple(events_to_persist)) + await self.persist_events_and_notify( + room_id, + tuple(events_to_persist), + # TODO: Maybe this to get fetched missing events during backfill as backfill also :/ + backfilled=True, + ) async def _check_event_auth( self, diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 43cfb46c068b..49f5395b640c 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -586,7 +586,6 @@ async def on_GET( event_filter=event_filter, ) - logger.info("/messages rest end msgs=%s", msgs) return 200, msgs diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index d15c38d4b783..dc7884b1c0c3 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -1166,7 +1166,6 @@ def _paginate_room_events_txn( "order": order, } - logger.info("stream: getting events sql=%s args=%s", sql, args) txn.execute(sql, args) # Filter the result set. @@ -1237,7 +1236,6 @@ async def paginate_room_events( event_filter, ) - logger.info("paginate_room_events event_ids(%d)=%s", len(rows), [r.event_id for r in rows]) events = await self.get_events_as_list( [r.event_id for r in rows], get_prev_content=True ) diff --git a/synapse/storage/persist_events.py b/synapse/storage/persist_events.py index 58d1c08906b5..0e8270746d78 100644 --- a/synapse/storage/persist_events.py +++ b/synapse/storage/persist_events.py @@ -309,11 +309,6 @@ async def persist_events( matched the transcation ID; the existing event is returned in such a case. """ - # logger.info( - # "persist_events backfilled=%s events_and_contexts=%s", - # backfilled, - # events_and_contexts, - # ) partitioned: Dict[str, List[Tuple[EventBase, EventContext]]] = {} for event, ctx in events_and_contexts: partitioned.setdefault(event.room_id, []).append((event, ctx)) From 886071b66b743c0f0f0d25866680d4ba6d1f9bc8 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 20 Oct 2021 16:48:23 -0500 Subject: [PATCH 06/47] Fix backfill not picking up batch events connected to non-base insertion events Previously, we would only look for a batch event if the insertion event was connected to something else by prev_event. This is only the case for the base insertion event. And instead, we need to look for a batch event whenever we come across an insertion event. --- scripts-dev/complement.sh | 2 +- synapse/handlers/federation.py | 2 + synapse/handlers/federation_event.py | 2 +- .../databases/main/event_federation.py | 49 ++++++++++++------- 4 files changed, 34 insertions(+), 21 deletions(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 89af7a4fde89..549477b11fe8 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -65,4 +65,4 @@ if [[ -n "$1" ]]; then fi # Run the tests! -go test -v -tags synapse_blacklist,msc2946,msc3083,msc2403,msc2716 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests/... +go test -v -tags synapse_blacklist,msc2946,msc3083,msc2403,msc2716 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests/main_test.go ./tests/msc2716_test.go diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 355291ff452e..6bb9fbfa77c4 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -270,6 +270,8 @@ async def _maybe_backfill_inner( # request URI to be too long. extremities = dict(sorted_extremeties_tuple[:5]) + logger.info("backfill extremities=%s", extremities) + # Now we need to decide which hosts to hit first. # First we try hosts that are already in the room diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index d9fe4a430cd2..177352f8320d 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -1276,7 +1276,7 @@ def prep(event: EventBase) -> Optional[Tuple[EventBase, EventContext]]: await self.persist_events_and_notify( room_id, tuple(events_to_persist), - # TODO: Maybe this to get fetched missing events during backfill as backfill also :/ + # TODO: Maybe this to get fetched missing events during backfill as backfilled also :/ backfilled=True, ) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 3d20bb884566..f7da3cd4eb0a 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -18,7 +18,7 @@ from prometheus_client import Counter, Gauge -from synapse.api.constants import MAX_DEPTH +from synapse.api.constants import MAX_DEPTH, EventTypes from synapse.api.errors import StoreError from synapse.api.room_versions import EventFormatVersions, RoomVersion from synapse.events import EventBase, make_event_from_dict @@ -1013,8 +1013,8 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): # search. # Look for the prev_event_id connected to the given event_id - query = """ - SELECT depth, stream_ordering, prev_event_id FROM event_edges + connected_prev_event_query = """ + SELECT depth, stream_ordering, prev_event_id, events.type FROM event_edges /* Get the depth and stream_ordering of the prev_event_id from the events table */ INNER JOIN events ON prev_event_id = events.event_id @@ -1029,7 +1029,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): # Look for the "insertion" events connected to the given event_id connected_insertion_event_query = """ - SELECT e.depth, e.stream_ordering, i.event_id FROM insertion_event_edges AS i + SELECT e.depth, e.stream_ordering, i.event_id, e.type FROM insertion_event_edges AS i /* Get the depth of the insertion event from the events table */ INNER JOIN events AS e USING (event_id) /* Find an insertion event which points via prev_events to the given event_id */ @@ -1039,7 +1039,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): # Find any batch connections of a given insertion event batch_connection_query = """ - SELECT e.depth, e.stream_ordering, c.event_id FROM insertion_events AS i + SELECT e.depth, e.stream_ordering, c.event_id, e.type FROM insertion_events AS i /* Find the batch that connects to the given insertion event */ INNER JOIN batch_events AS c ON i.next_batch_id = c.batch_id @@ -1063,6 +1063,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): table="events", keyvalues={"event_id": event_id, "room_id": room_id}, retcols=( + "type", "depth", "stream_ordering", ), @@ -1075,12 +1076,13 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): -event_lookup_result["depth"], -event_lookup_result["stream_ordering"], event_id, + event_lookup_result["type"], ) ) while not queue.empty() and len(event_results) < limit: try: - _, _, event_id = queue.get_nowait() + _, _, event_id, event_type = queue.get_nowait() except Empty: break @@ -1125,46 +1127,55 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): # Try and find any potential historical batches of message history. # # First we look for an insertion event connected to the current - # event (by prev_event). If we find any, we need to go and try to - # find any batch events connected to the insertion event (by - # batch_id). If we find any, we'll add them to the queue and - # navigate up the DAG like normal in the next iteration of the loop. + # event (by prev_event). If we find any, we'll add them to the queue + # and navigate up the DAG like normal in the next iteration of the + # loop. txn.execute( connected_insertion_event_query, (event_id, limit - len(event_results)) ) connected_insertion_event_id_results = txn.fetchall() - logger.debug( + logger.info( "_get_backfill_events: connected_insertion_event_query %s", connected_insertion_event_id_results, ) for row in connected_insertion_event_id_results: connected_insertion_event_depth = row[0] connected_insertion_event_stream_ordering = row[1] - connected_insertion_event = row[2] - if connected_insertion_event not in event_results: + connected_insertion_event_id = row[2] + connected_insertion_event_type = row[3] + if connected_insertion_event_id not in event_results: queue.put( ( -connected_insertion_event_depth, -connected_insertion_event_stream_ordering, - connected_insertion_event, + connected_insertion_event_id, + connected_insertion_event_type, ) ) + # Second, we need to go and try to find any batch events connected + # to a given insertion event (by batch_id). If we find any, we'll + # add them to the queue and navigate up the DAG like normal in the + # next iteration of the loop. + if event_type == EventTypes.MSC2716_INSERTION: # Find any batch connections for the given insertion event txn.execute( batch_connection_query, - (connected_insertion_event, limit - len(event_results)), + (event_id, limit - len(event_results)), ) batch_start_event_id_results = txn.fetchall() - logger.debug( + logger.info( "_get_backfill_events: batch_start_event_id_results %s", batch_start_event_id_results, ) for row in batch_start_event_id_results: if row[2] not in event_results: - queue.put((-row[0], -row[1], row[2])) + queue.put((-row[0], -row[1], row[2], row[3])) - txn.execute(query, (event_id, False, limit - len(event_results))) + txn.execute( + connected_prev_event_query, + (event_id, False, limit - len(event_results)), + ) prev_event_id_results = txn.fetchall() logger.info( "_get_backfill_events: prev_event_ids %s", prev_event_id_results @@ -1177,7 +1188,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): for row in prev_event_id_results: if row[2] not in event_results: - queue.put((-row[0], -row[1], row[2])) + queue.put((-row[0], -row[1], row[2], row[3])) return event_results.values() From 477c15df721386563f9ee90c35f25a98b978c917 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 20 Oct 2021 22:11:12 -0500 Subject: [PATCH 07/47] Some more debug logging --- synapse/federation/federation_server.py | 2 ++ synapse/handlers/federation.py | 3 ++- synapse/handlers/room_batch.py | 8 ++++---- synapse/storage/databases/main/event_federation.py | 13 +++++++++++++ 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index d8c0b86f2301..015b61bf63a0 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -196,6 +196,8 @@ async def on_backfill_request( origin, room_id, versions, limit ) + logger.info("on_backfill_request pdus(%d)=%s", len(pdus), pdus) + res = self._transaction_dict_from_pdus(pdus) return 200, res diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 6bb9fbfa77c4..98feca5e24c3 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -250,11 +250,12 @@ async def _maybe_backfill_inner( ] logger.info( - "room_id: %s, backfill: current_depth: %s, limit: %s, max_depth: %s, extrems: %s filtered_sorted_extremeties_tuple: %s", + "room_id: %s, backfill: current_depth: %s, limit: %s, max_depth: %s, extrems (%d): %s filtered_sorted_extremeties_tuple: %s", room_id, current_depth, limit, max_depth, + len(sorted_extremeties_tuple), sorted_extremeties_tuple, filtered_sorted_extremeties_tuple, ) diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py index 88c7d4b00140..c7ee6836e221 100644 --- a/synapse/handlers/room_batch.py +++ b/synapse/handlers/room_batch.py @@ -184,7 +184,7 @@ async def persist_state_events_at_start( # Make the state events float off on their own so we don't have a # bunch of `@mxid joined the room` noise between each batch - prev_event_id_for_state_chain = generate_fake_event_id() + prev_event_ids_for_state_chain = [generate_fake_event_id()] for state_event in state_events_at_start: assert_params_in_dict( @@ -221,7 +221,7 @@ async def persist_state_events_at_start( action=membership, content=event_dict["content"], outlier=True, - prev_event_ids=[prev_event_id_for_state_chain], + prev_event_ids=prev_event_ids_for_state_chain, # Make sure to use a copy of this list because we modify it # later in the loop here. Otherwise it will be the same # reference and also update in the event when we append later. @@ -240,7 +240,7 @@ async def persist_state_events_at_start( ), event_dict, outlier=True, - prev_event_ids=[prev_event_id_for_state_chain], + prev_event_ids=prev_event_ids_for_state_chain, # Make sure to use a copy of this list because we modify it # later in the loop here. Otherwise it will be the same # reference and also update in the event when we append later. @@ -251,7 +251,7 @@ async def persist_state_events_at_start( state_event_ids_at_start.append(event_id) auth_event_ids.append(event_id) # Connect all the state in a floating chain - prev_event_id_for_state_chain = event_id + prev_event_ids_for_state_chain = [event_id] return state_event_ids_at_start diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index f7da3cd4eb0a..145ba7b59347 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1057,6 +1057,11 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): # we process the newest-in-time messages first going backwards in time. queue = PriorityQueue() + logger.info( + "_get_backfill_events: seeding backfill with event_list(%d)=%s", + len(event_list), + event_list, + ) for event_id in event_list: event_lookup_result = self.db_pool.simple_select_one_txn( txn, @@ -1070,6 +1075,14 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): allow_none=True, ) + logger.info( + "_get_backfill_events: seeding backfill with event_id=%s type=%s depth=%s stream_ordering=%s", + event_id, + event_lookup_result["type"], + event_lookup_result["depth"], + event_lookup_result["stream_ordering"], + ) + if event_lookup_result["depth"]: queue.put( ( From 4191f5615f42ad720161941b80f68f0bcdc3a797 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 21 Oct 2021 03:44:27 -0500 Subject: [PATCH 08/47] Remove fake prev events from historical state chain Fix https://github.com/matrix-org/synapse/issues/11091 We have to allow creation of events with no prev_events but do have auth_events. And since the historical member events are outliers with no prev_events to resolve them, we want to avoid putting them as backward extremeties. --- synapse/federation/federation_server.py | 2 + synapse/handlers/federation.py | 1 + synapse/handlers/federation_event.py | 3 +- synapse/handlers/message.py | 6 +- synapse/handlers/room_batch.py | 11 +++- synapse/handlers/room_member.py | 2 +- .../databases/main/event_federation.py | 2 +- synapse/storage/databases/main/events.py | 65 ++++++++++++++++++- 8 files changed, 83 insertions(+), 9 deletions(-) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 015b61bf63a0..36bcc1d81470 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -200,6 +200,8 @@ async def on_backfill_request( res = self._transaction_dict_from_pdus(pdus) + logger.info("on_backfill_request res=%s", res) + return 200, res async def on_incoming_transaction( diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 98feca5e24c3..d915f0b35ccf 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1061,6 +1061,7 @@ async def on_backfill_request( events = await self.store.get_backfill_events(room_id, pdu_list, limit) events = await filter_events_for_server(self.storage, origin, events) + logger.info("on_backfill_request resultant events(%d)=%s", len(events), events) return events diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 177352f8320d..c6eb7d088e41 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -417,7 +417,8 @@ async def backfill( dest, room_id, limit=limit, extremities=extremities ) logger.info( - "from remote server: got backfill response events=%s", + "from remote server: got backfill response events(%d)=%s", + len(events), [ { "event_id": ev.event_id, diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 2e024b551f99..f4ae4a392ce7 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -955,8 +955,10 @@ async def create_new_client_event( # event and then try to auth it (which fails with a somewhat confusing "No # create event in auth events") assert ( - builder.type == EventTypes.Create or len(prev_event_ids) > 0 - ), "Attempting to create an event with no prev_events" + builder.type == EventTypes.Create + or len(prev_event_ids) > 0 + or len(auth_event_ids) > 0 + ), "Attempting to create an event with no prev_events or auth_event_ids" event = await builder.build( prev_event_ids=prev_event_ids, diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py index c7ee6836e221..a2b2257d6fe4 100644 --- a/synapse/handlers/room_batch.py +++ b/synapse/handlers/room_batch.py @@ -184,7 +184,7 @@ async def persist_state_events_at_start( # Make the state events float off on their own so we don't have a # bunch of `@mxid joined the room` noise between each batch - prev_event_ids_for_state_chain = [generate_fake_event_id()] + prev_event_ids_for_state_chain = [] # generate_fake_event_id() for state_event in state_events_at_start: assert_params_in_dict( @@ -227,6 +227,15 @@ async def persist_state_events_at_start( # reference and also update in the event when we append later. auth_event_ids=auth_event_ids.copy(), ) + + mem_event = await self.store.get_event(event_id) + logger.info( + "room_batch mem_event_id=%s depth=%s stream_ordering=%s prev_event_ids=%s", + mem_event.event_id, + mem_event.depth, + mem_event.internal_metadata.stream_ordering, + mem_event.prev_event_ids(), + ) else: # TODO: Add some complement tests that adds state that is not member joins # and will use this code path. Maybe we only want to support join state events diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 74e6c7eca6b1..3ff82ab229d3 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -644,7 +644,7 @@ async def update_membership_locked( if block_invite: raise SynapseError(403, "Invites have been disabled on this server") - if prev_event_ids: + if prev_event_ids is not None: return await self._local_membership_update( requester=requester, target=target, diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 145ba7b59347..7f5d1b263fa1 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1127,7 +1127,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): ev["type"], ev["depth"], event_lookup_result["stream_ordering"], - ev["content"].get("body", None), + ev["content"].get("body", ev["content"]), ) else: logger.info( diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 56d265213290..21f69f578735 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2139,6 +2139,38 @@ def _update_backward_extremeties(self, txn, events): Forward extremities are handled when we first start persisting the events. """ + logger.info( + "_update_backward_extremeties events=%s", + [ + { + "event_id": ev.event_id, + "prev_events": ev.prev_event_ids(), + "outlier": ev.internal_metadata.is_outlier(), + } + for ev in events + ], + ) + + for ev in events: + for e_id in ev.prev_event_ids(): + query = """ + SELECT 1 FROM event_edges + INNER JOIN events AS e USING (event_id, room_id) + WHERE event_id = ? AND room_id = ? AND e.outlier = TRUE + """ + + txn.execute( + query, + (e_id, ev.room_id), + ) + result = txn.fetchall() + logger.info( + "_update_backward_extremeties test ev=%s prev_event_id=%s result=%s", + ev.event_id, + e_id, + result, + ) + # From the events passed in, add all of the prev events as backwards extremities. # Ignore any events that are already backwards extrems or outliers. query = ( @@ -2147,22 +2179,45 @@ def _update_backward_extremeties(self, txn, events): " SELECT 1 FROM event_backward_extremities" " WHERE event_id = ? AND room_id = ?" " )" + # 1. Don't add an event as a extremity again if we already persisted it + # as a non-outlier. + # 2. Don't add an outlier as an extremity if it has no prev_events " AND NOT EXISTS (" - " SELECT 1 FROM events WHERE event_id = ? AND room_id = ? " - " AND outlier = ?" + " SELECT 1 FROM events" + " LEFT JOIN event_edges edge" + " ON edge.event_id = events.event_id" + " WHERE events.event_id = ? AND events.room_id = ? AND (events.outlier = FALSE OR edge.event_id IS NULL)" " )" ) txn.execute_batch( query, [ - (e_id, ev.room_id, e_id, ev.room_id, e_id, ev.room_id, False) + (e_id, ev.room_id, e_id, ev.room_id, e_id, ev.room_id) for ev in events for e_id in ev.prev_event_ids() if not ev.internal_metadata.is_outlier() ], ) + for ev in events: + for e_id in ev.prev_event_ids(): + query = """ + SELECT * FROM event_backward_extremities + WHERE event_id = ? AND room_id = ? + """ + + txn.execute( + query, + (e_id, ev.room_id), + ) + result = txn.fetchall() + logger.info( + "_update_backward_extremeties ended up as prev_event_id=%s result=%s", + e_id, + result, + ) + # Delete all these events that we've already fetched and now know that their # prev events are the new backwards extremeties. query = ( @@ -2175,6 +2230,10 @@ def _update_backward_extremeties(self, txn, events): (ev.event_id, ev.room_id) for ev in events if not ev.internal_metadata.is_outlier() + # If we encountered an event with no prev_events, then we might + # as well remove it now because it won't ever have anything else + # to backfill from. + or len(ev.prev_event_ids()) == 0 ], ) From f39c1da083083de30d89406872dfd877d1ddbec2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 21 Oct 2021 04:03:29 -0500 Subject: [PATCH 09/47] Remove debug logging --- synapse/federation/federation_server.py | 4 -- synapse/handlers/federation.py | 13 ++--- synapse/handlers/federation_event.py | 43 +------------- synapse/handlers/room_batch.py | 24 +------- synapse/rest/client/room.py | 1 - .../databases/main/event_federation.py | 52 +---------------- synapse/storage/databases/main/events.py | 58 ------------------- 7 files changed, 11 insertions(+), 184 deletions(-) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 36bcc1d81470..d8c0b86f2301 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -196,12 +196,8 @@ async def on_backfill_request( origin, room_id, versions, limit ) - logger.info("on_backfill_request pdus(%d)=%s", len(pdus), pdus) - res = self._transaction_dict_from_pdus(pdus) - logger.info("on_backfill_request res=%s", res) - return 200, res async def on_incoming_transaction( diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index d915f0b35ccf..4e4bbf23a27f 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -148,14 +148,14 @@ async def _maybe_backfill_inner( insertion_events_to_be_backfilled = ( await self.store.get_insertion_event_backwards_extremities_in_room(room_id) ) - logger.info( + logger.debug( "_maybe_backfill_inner: extremities oldest_events_with_depth=%s insertion_events_to_be_backfilled=%s", oldest_events_with_depth, insertion_events_to_be_backfilled, ) if not oldest_events_with_depth and not insertion_events_to_be_backfilled: - logger.info("Not backfilling as no extremeties found.") + logger.debug("Not backfilling as no extremeties found.") return False # We only want to paginate if we can actually see the events we'll get, @@ -203,7 +203,7 @@ async def _maybe_backfill_inner( redact=False, check_history_visibility_only=True, ) - logger.info( + logger.debug( "_maybe_backfill_inner: filtered_extremities %s", filtered_extremities ) @@ -230,7 +230,7 @@ async def _maybe_backfill_inner( # much larger factor will result in triggering a backfill request much # earlier than necessary. if current_depth - 2 * limit > max_depth: - logger.info( + logger.debug( "Not backfilling as we don't need to. %d < %d - 2 * %d", max_depth, current_depth, @@ -249,7 +249,7 @@ async def _maybe_backfill_inner( t for t in sorted_extremeties_tuple if int(t[1]) <= current_depth ] - logger.info( + logger.debug( "room_id: %s, backfill: current_depth: %s, limit: %s, max_depth: %s, extrems (%d): %s filtered_sorted_extremeties_tuple: %s", room_id, current_depth, @@ -271,8 +271,6 @@ async def _maybe_backfill_inner( # request URI to be too long. extremities = dict(sorted_extremeties_tuple[:5]) - logger.info("backfill extremities=%s", extremities) - # Now we need to decide which hosts to hit first. # First we try hosts that are already in the room @@ -1061,7 +1059,6 @@ async def on_backfill_request( events = await self.store.get_backfill_events(room_id, pdu_list, limit) events = await filter_events_for_server(self.storage, origin, events) - logger.info("on_backfill_request resultant events(%d)=%s", len(events), events) return events diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index c6eb7d088e41..5edcb91403b5 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -416,19 +416,6 @@ async def backfill( events = await self._federation_client.backfill( dest, room_id, limit=limit, extremities=extremities ) - logger.info( - "from remote server: got backfill response events(%d)=%s", - len(events), - [ - { - "event_id": ev.event_id, - "type": ev["type"], - "depth": ev["depth"], - "content": ev["content"].get("body", None), - } - for ev in events - ], - ) if not events: return @@ -444,38 +431,10 @@ async def backfill( await self._process_pulled_events( dest, - # The /backfill response should start from `?v` and include the - # events that preceded it (so the list will be newest -> oldest). We - # reverse that order so the messages are oldest -> newest and we can - # persist the backfilled events without constantly have to go fetch - # missing prev_events which are probably included in the same - # backfill chunk. - # TODO: If we try to reverse this list, the stream_ordering will be backwards - # reversed(events), events, backfilled=True, ) - for ev in events: - event_after_persisted = await self._store.get_event( - ev.event_id, allow_none=True - ) - - if event_after_persisted: - logger.info( - "from remote server: processed backfilled event_id=%s type=%s depth=%s stream_ordering=%s content=%s", - ev.event_id, - event_after_persisted["type"], - event_after_persisted["depth"], - event_after_persisted.internal_metadata.stream_ordering, - event_after_persisted["content"].get("body", None), - ) - else: - logger.info( - "from remote server: processed backfilled event_id=%s failed to lookup", - ev.event_id, - ) - async def _get_missing_events_for_pdu( self, origin: str, pdu: EventBase, prevs: Set[str], min_depth: int ) -> None: @@ -1277,7 +1236,7 @@ def prep(event: EventBase) -> Optional[Tuple[EventBase, EventContext]]: await self.persist_events_and_notify( room_id, tuple(events_to_persist), - # TODO: Maybe this to get fetched missing events during backfill as backfilled also :/ + # Events we fetch during backfill should be marked as backfilled as well backfilled=True, ) diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py index a2b2257d6fe4..17ee1d95964a 100644 --- a/synapse/handlers/room_batch.py +++ b/synapse/handlers/room_batch.py @@ -13,10 +13,6 @@ logger = logging.getLogger(__name__) -def generate_fake_event_id() -> str: - return "$fake_" + random_string(43) - - class RoomBatchHandler: def __init__(self, hs: "HomeServer"): self.hs = hs @@ -184,7 +180,7 @@ async def persist_state_events_at_start( # Make the state events float off on their own so we don't have a # bunch of `@mxid joined the room` noise between each batch - prev_event_ids_for_state_chain = [] # generate_fake_event_id() + prev_event_ids_for_state_chain: List[str] = [] for state_event in state_events_at_start: assert_params_in_dict( @@ -227,15 +223,6 @@ async def persist_state_events_at_start( # reference and also update in the event when we append later. auth_event_ids=auth_event_ids.copy(), ) - - mem_event = await self.store.get_event(event_id) - logger.info( - "room_batch mem_event_id=%s depth=%s stream_ordering=%s prev_event_ids=%s", - mem_event.event_id, - mem_event.depth, - mem_event.internal_metadata.stream_ordering, - mem_event.prev_event_ids(), - ) else: # TODO: Add some complement tests that adds state that is not member joins # and will use this code path. Maybe we only want to support join state events @@ -362,20 +349,13 @@ async def persist_historical_events( # Events are sorted by (topological_ordering, stream_ordering) # where topological_ordering is just depth. for (event, context) in reversed(events_to_persist): - result_event = await self.event_creation_handler.handle_new_client_event( + await self.event_creation_handler.handle_new_client_event( await self.create_requester_for_user_id_from_app_service( event["sender"], app_service_requester.app_service ), event=event, context=context, ) - logger.info( - "result_event depth=%s stream_ordering=%s event_id=%s body=%s", - result_event.depth, - result_event.internal_metadata.stream_ordering, - result_event.event_id, - result_event.content.get("body", None), - ) return event_ids diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 49f5395b640c..ed95189b6d8b 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -561,7 +561,6 @@ async def on_GET( pagination_config = await PaginationConfig.from_request( self.store, request, default_limit=10 ) - logger.info("/messages rest start pagination_config=%s", pagination_config) # Twisted will have processed the args by now. assert request.args is not None as_client_event = b"raw" not in request.args diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 7f5d1b263fa1..f52ee9c1c824 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1057,11 +1057,6 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): # we process the newest-in-time messages first going backwards in time. queue = PriorityQueue() - logger.info( - "_get_backfill_events: seeding backfill with event_list(%d)=%s", - len(event_list), - event_list, - ) for event_id in event_list: event_lookup_result = self.db_pool.simple_select_one_txn( txn, @@ -1075,14 +1070,6 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): allow_none=True, ) - logger.info( - "_get_backfill_events: seeding backfill with event_id=%s type=%s depth=%s stream_ordering=%s", - event_id, - event_lookup_result["type"], - event_lookup_result["depth"], - event_lookup_result["stream_ordering"], - ) - if event_lookup_result["depth"]: queue.put( ( @@ -1102,39 +1089,6 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): if event_id in event_results: continue - event_lookup_result = self.db_pool.simple_select_one_txn( - txn, - table="events", - keyvalues={"event_id": event_id}, - retcols=["type", "depth", "stream_ordering", "content"], - allow_none=True, - ) - - event_json_lookup_result = self.db_pool.simple_select_one_onecol_txn( - txn, - table="event_json", - keyvalues={"event_id": event_id}, - retcol="json", - allow_none=True, - ) - - ev = db_to_json(event_json_lookup_result) - - if event_lookup_result: - logger.info( - "_get_backfill_events: event_results add event_id=%s type=%s depth=%s stream_ordering=%s content=%s", - event_id, - ev["type"], - ev["depth"], - event_lookup_result["stream_ordering"], - ev["content"].get("body", ev["content"]), - ) - else: - logger.info( - "_get_backfill_events: event_results event_id=%s failed to lookup", - event_id, - ) - event_results[event_id] = event_id # Try and find any potential historical batches of message history. @@ -1147,7 +1101,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): connected_insertion_event_query, (event_id, limit - len(event_results)) ) connected_insertion_event_id_results = txn.fetchall() - logger.info( + logger.debug( "_get_backfill_events: connected_insertion_event_query %s", connected_insertion_event_id_results, ) @@ -1177,7 +1131,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): (event_id, limit - len(event_results)), ) batch_start_event_id_results = txn.fetchall() - logger.info( + logger.debug( "_get_backfill_events: batch_start_event_id_results %s", batch_start_event_id_results, ) @@ -1190,7 +1144,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): (event_id, False, limit - len(event_results)), ) prev_event_id_results = txn.fetchall() - logger.info( + logger.debug( "_get_backfill_events: prev_event_ids %s", prev_event_id_results ) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 21f69f578735..cc20f0f6b254 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -169,14 +169,6 @@ async def _persist_events_and_state_updates( async with stream_ordering_manager as stream_orderings: for (event, _), stream in zip(events_and_contexts, stream_orderings): - logger.info( - "_persist_events_and_state_updates backfilled=%s event_id=%s depth=%s stream_ordering=%s content=%s", - backfilled, - event.event_id, - event.depth, - stream, - event["content"].get("body", None), - ) event.internal_metadata.stream_ordering = stream await self.db_pool.runInteraction( @@ -2139,38 +2131,6 @@ def _update_backward_extremeties(self, txn, events): Forward extremities are handled when we first start persisting the events. """ - logger.info( - "_update_backward_extremeties events=%s", - [ - { - "event_id": ev.event_id, - "prev_events": ev.prev_event_ids(), - "outlier": ev.internal_metadata.is_outlier(), - } - for ev in events - ], - ) - - for ev in events: - for e_id in ev.prev_event_ids(): - query = """ - SELECT 1 FROM event_edges - INNER JOIN events AS e USING (event_id, room_id) - WHERE event_id = ? AND room_id = ? AND e.outlier = TRUE - """ - - txn.execute( - query, - (e_id, ev.room_id), - ) - result = txn.fetchall() - logger.info( - "_update_backward_extremeties test ev=%s prev_event_id=%s result=%s", - ev.event_id, - e_id, - result, - ) - # From the events passed in, add all of the prev events as backwards extremities. # Ignore any events that are already backwards extrems or outliers. query = ( @@ -2200,24 +2160,6 @@ def _update_backward_extremeties(self, txn, events): ], ) - for ev in events: - for e_id in ev.prev_event_ids(): - query = """ - SELECT * FROM event_backward_extremities - WHERE event_id = ? AND room_id = ? - """ - - txn.execute( - query, - (e_id, ev.room_id), - ) - result = txn.fetchall() - logger.info( - "_update_backward_extremeties ended up as prev_event_id=%s result=%s", - e_id, - result, - ) - # Delete all these events that we've already fetched and now know that their # prev events are the new backwards extremeties. query = ( From 7da8012f310ee4914457bf039b9a401b45218d4c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 21 Oct 2021 04:06:03 -0500 Subject: [PATCH 10/47] Remove extra event info --- synapse/events/utils.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 895835abee28..23bd24d96394 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -283,13 +283,13 @@ def format_event_for_client_v1(d: JsonDict) -> JsonDict: def format_event_for_client_v2(d: JsonDict) -> JsonDict: drop_keys = ( - # "auth_events", - # "prev_events", - # "hashes", - # "signatures", - # "depth", - # "origin", - # "prev_state", + "auth_events", + "prev_events", + "hashes", + "signatures", + "depth", + "origin", + "prev_state", ) for key in drop_keys: d.pop(key, None) @@ -340,9 +340,6 @@ def serialize_event( d["event_id"] = e.event_id - # TODO: Remove - d["stream_ordering"] = e.internal_metadata.stream_ordering - if "age_ts" in d["unsigned"]: d["unsigned"]["age"] = time_now_ms - d["unsigned"]["age_ts"] del d["unsigned"]["age_ts"] From 69dfa16dcbe5ed3a677cfe7ebcf06fdacbaa0f37 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 21 Oct 2021 04:19:36 -0500 Subject: [PATCH 11/47] Move to sorting the backfill events in the existing sorted --- .../storage/databases/main/event_federation.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index f52ee9c1c824..59a4433c86f3 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1002,12 +1002,14 @@ async def get_backfill_events(self, room_id: str, event_list: list, limit: int): limit, ) events = await self.get_events_as_list(event_ids) - return sorted(events, key=lambda e: -e.depth) + return sorted( + events, key=lambda e: (-e.depth, -e.internal_metadata.stream_ordering) + ) def _get_backfill_events(self, txn, room_id, event_list, limit): logger.debug("_get_backfill_events: %s, %r, %s", room_id, event_list, limit) - event_results = OrderedDict() + event_results = set() # We want to make sure that we do a breadth-first, "depth" ordered # search. @@ -1089,7 +1091,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): if event_id in event_results: continue - event_results[event_id] = event_id + event_results.add(event_id) # Try and find any potential historical batches of message history. # @@ -1148,16 +1150,11 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): "_get_backfill_events: prev_event_ids %s", prev_event_id_results ) - # TODO: We should probably skip adding the event itself if we - # branched off onto the insertion event first above. Need to make this a - # bit smart so it doesn't skip over the event altogether if we're at - # the end of the historical messages. - for row in prev_event_id_results: if row[2] not in event_results: queue.put((-row[0], -row[1], row[2], row[3])) - return event_results.values() + return event_results async def get_missing_events(self, room_id, earliest_events, latest_events, limit): ids = await self.db_pool.runInteraction( From 83474d915811bea054df0a2f09f6cdd6032bc822 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 21 Oct 2021 04:31:10 -0500 Subject: [PATCH 12/47] Put MSC2716 backfill logic behind experimental feature flag --- synapse/handlers/federation.py | 11 ++- .../databases/main/event_federation.py | 99 ++++++++++--------- 2 files changed, 60 insertions(+), 50 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 4e4bbf23a27f..5aed4dd23018 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -145,9 +145,14 @@ async def _maybe_backfill_inner( oldest_events_with_depth = ( await self.store.get_oldest_event_ids_with_depth_in_room(room_id) ) - insertion_events_to_be_backfilled = ( - await self.store.get_insertion_event_backwards_extremities_in_room(room_id) - ) + + insertion_events_to_be_backfilled = [] + if self.hs.config.experimental.msc2716_enabled: + insertion_events_to_be_backfilled = ( + await self.store.get_insertion_event_backwards_extremities_in_room( + room_id + ) + ) logger.debug( "_maybe_backfill_inner: extremities oldest_events_with_depth=%s insertion_events_to_be_backfilled=%s", oldest_events_with_depth, diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 59a4433c86f3..dd828fe5cf4c 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -14,7 +14,7 @@ import itertools import logging from queue import Empty, PriorityQueue -from typing import Collection, Dict, Iterable, List, Optional, Set, OrderedDict, Tuple +from typing import Collection, Dict, Iterable, List, Optional, OrderedDict, Set, Tuple from prometheus_client import Counter, Gauge @@ -62,6 +62,8 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas def __init__(self, database: DatabasePool, db_conn, hs): super().__init__(database, db_conn, hs) + self.hs = hs + if hs.config.worker.run_background_tasks: hs.get_clock().looping_call( self._delete_old_forward_extrem_cache, 60 * 60 * 1000 @@ -1053,10 +1055,11 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): """ # In a PriorityQueue, the lowest valued entries are retrieved first. - # We're using depth as the priority in the queue. - # Depth is lowest at the oldest-in-time message and highest and - # newest-in-time message. We add events to the queue with a negative depth so that - # we process the newest-in-time messages first going backwards in time. + # We're using depth as the priority in the queue and tie-break based on + # stream_ordering. Depth is lowest at the oldest-in-time message and + # highest and newest-in-time message. We add events to the queue with a + # negative depth so that we process the newest-in-time messages first + # going backwards in time. stream_ordering follows the same pattern. queue = PriorityQueue() for event_id in event_list: @@ -1093,53 +1096,55 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): event_results.add(event_id) - # Try and find any potential historical batches of message history. - # - # First we look for an insertion event connected to the current - # event (by prev_event). If we find any, we'll add them to the queue - # and navigate up the DAG like normal in the next iteration of the - # loop. - txn.execute( - connected_insertion_event_query, (event_id, limit - len(event_results)) - ) - connected_insertion_event_id_results = txn.fetchall() - logger.debug( - "_get_backfill_events: connected_insertion_event_query %s", - connected_insertion_event_id_results, - ) - for row in connected_insertion_event_id_results: - connected_insertion_event_depth = row[0] - connected_insertion_event_stream_ordering = row[1] - connected_insertion_event_id = row[2] - connected_insertion_event_type = row[3] - if connected_insertion_event_id not in event_results: - queue.put( - ( - -connected_insertion_event_depth, - -connected_insertion_event_stream_ordering, - connected_insertion_event_id, - connected_insertion_event_type, - ) - ) - - # Second, we need to go and try to find any batch events connected - # to a given insertion event (by batch_id). If we find any, we'll - # add them to the queue and navigate up the DAG like normal in the - # next iteration of the loop. - if event_type == EventTypes.MSC2716_INSERTION: - # Find any batch connections for the given insertion event + if self.hs.config.experimental.msc2716_enabled: + # Try and find any potential historical batches of message history. + # + # First we look for an insertion event connected to the current + # event (by prev_event). If we find any, we'll add them to the queue + # and navigate up the DAG like normal in the next iteration of the + # loop. txn.execute( - batch_connection_query, + connected_insertion_event_query, (event_id, limit - len(event_results)), ) - batch_start_event_id_results = txn.fetchall() + connected_insertion_event_id_results = txn.fetchall() logger.debug( - "_get_backfill_events: batch_start_event_id_results %s", - batch_start_event_id_results, + "_get_backfill_events: connected_insertion_event_query %s", + connected_insertion_event_id_results, ) - for row in batch_start_event_id_results: - if row[2] not in event_results: - queue.put((-row[0], -row[1], row[2], row[3])) + for row in connected_insertion_event_id_results: + connected_insertion_event_depth = row[0] + connected_insertion_event_stream_ordering = row[1] + connected_insertion_event_id = row[2] + connected_insertion_event_type = row[3] + if connected_insertion_event_id not in event_results: + queue.put( + ( + -connected_insertion_event_depth, + -connected_insertion_event_stream_ordering, + connected_insertion_event_id, + connected_insertion_event_type, + ) + ) + + # Second, we need to go and try to find any batch events connected + # to a given insertion event (by batch_id). If we find any, we'll + # add them to the queue and navigate up the DAG like normal in the + # next iteration of the loop. + if event_type == EventTypes.MSC2716_INSERTION: + # Find any batch connections for the given insertion event + txn.execute( + batch_connection_query, + (event_id, limit - len(event_results)), + ) + batch_start_event_id_results = txn.fetchall() + logger.debug( + "_get_backfill_events: batch_start_event_id_results %s", + batch_start_event_id_results, + ) + for row in batch_start_event_id_results: + if row[2] not in event_results: + queue.put((-row[0], -row[1], row[2], row[3])) txn.execute( connected_prev_event_query, From 1263c7e2a926eca44d9bc364fade4fdf62536004 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 21 Oct 2021 04:39:34 -0500 Subject: [PATCH 13/47] Remove unused import --- synapse/storage/databases/main/event_federation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index dd828fe5cf4c..2cbc6343705f 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -14,7 +14,7 @@ import itertools import logging from queue import Empty, PriorityQueue -from typing import Collection, Dict, Iterable, List, Optional, OrderedDict, Set, Tuple +from typing import Collection, Dict, Iterable, List, Optional, Set, Tuple from prometheus_client import Counter, Gauge From ee47878439e916bced304eea3ce3cdbb91d18628 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 21 Oct 2021 04:49:28 -0500 Subject: [PATCH 14/47] Fix mypy lints --- synapse/handlers/federation.py | 2 +- synapse/handlers/message.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 5aed4dd23018..2d60ea6d8047 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -146,7 +146,7 @@ async def _maybe_backfill_inner( await self.store.get_oldest_event_ids_with_depth_in_room(room_id) ) - insertion_events_to_be_backfilled = [] + insertion_events_to_be_backfilled: Dict[str, int] = {} if self.hs.config.experimental.msc2716_enabled: insertion_events_to_be_backfilled = ( await self.store.get_insertion_event_backwards_extremities_in_room( diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index f4ae4a392ce7..1b872281f633 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -957,7 +957,9 @@ async def create_new_client_event( assert ( builder.type == EventTypes.Create or len(prev_event_ids) > 0 - or len(auth_event_ids) > 0 + # Allow an event to have empty list of prev_event_ids + # only if it has auth_event_ids. + or (auth_event_ids and len(auth_event_ids) > 0) ), "Attempting to create an event with no prev_events or auth_event_ids" event = await builder.build( From 1d3f4170464309d475d964ef098d08b303519ad4 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 21 Oct 2021 18:49:30 -0500 Subject: [PATCH 15/47] Revert back to string interpolation for SQL boolean value Maybe fixes the `sqlite3.OperationalError: no such column: FALSE,` problem in CI, https://github.com/matrix-org/synapse/runs/3962382283#step:4:11038 --- synapse/storage/databases/main/events.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index cc20f0f6b254..e4f5c8624437 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2146,14 +2146,14 @@ def _update_backward_extremeties(self, txn, events): " SELECT 1 FROM events" " LEFT JOIN event_edges edge" " ON edge.event_id = events.event_id" - " WHERE events.event_id = ? AND events.room_id = ? AND (events.outlier = FALSE OR edge.event_id IS NULL)" + " WHERE events.event_id = ? AND events.room_id = ? AND (events.outlier = ? OR edge.event_id IS NULL)" " )" ) txn.execute_batch( query, [ - (e_id, ev.room_id, e_id, ev.room_id, e_id, ev.room_id) + (e_id, ev.room_id, e_id, ev.room_id, e_id, ev.room_id, False) for ev in events for e_id in ev.prev_event_ids() if not ev.internal_metadata.is_outlier() From 4a12304cf77f88f66d494b8f6e93023bd44bc318 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 28 Oct 2021 01:42:46 -0500 Subject: [PATCH 16/47] Put empty prev_events behind new room version See https://github.com/matrix-org/synapse/pull/11114#discussion_r733475645 --- synapse/api/room_versions.py | 31 +++++++++++++++++++++++++++++++ synapse/handlers/message.py | 26 ++++++++++++++++---------- 2 files changed, 47 insertions(+), 10 deletions(-) diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py index 0a895bba480a..7ef4701b137a 100644 --- a/synapse/api/room_versions.py +++ b/synapse/api/room_versions.py @@ -81,6 +81,8 @@ class RoomVersion: msc2716_historical = attr.ib(type=bool) # MSC2716: Adds support for redacting "insertion", "chunk", and "marker" events msc2716_redactions = attr.ib(type=bool) + # MSC2716: Adds support for events with no `prev_events` but with some `auth_events` + msc2716_empty_prev_events = attr.ib(type=bool) class RoomVersions: @@ -99,6 +101,7 @@ class RoomVersions: msc2403_knocking=False, msc2716_historical=False, msc2716_redactions=False, + msc2716_empty_prev_events=False, ) V2 = RoomVersion( "2", @@ -115,6 +118,7 @@ class RoomVersions: msc2403_knocking=False, msc2716_historical=False, msc2716_redactions=False, + msc2716_empty_prev_events=False, ) V3 = RoomVersion( "3", @@ -131,6 +135,7 @@ class RoomVersions: msc2403_knocking=False, msc2716_historical=False, msc2716_redactions=False, + msc2716_empty_prev_events=False, ) V4 = RoomVersion( "4", @@ -147,6 +152,7 @@ class RoomVersions: msc2403_knocking=False, msc2716_historical=False, msc2716_redactions=False, + msc2716_empty_prev_events=False, ) V5 = RoomVersion( "5", @@ -163,6 +169,7 @@ class RoomVersions: msc2403_knocking=False, msc2716_historical=False, msc2716_redactions=False, + msc2716_empty_prev_events=False, ) V6 = RoomVersion( "6", @@ -179,6 +186,7 @@ class RoomVersions: msc2403_knocking=False, msc2716_historical=False, msc2716_redactions=False, + msc2716_empty_prev_events=False, ) MSC2176 = RoomVersion( "org.matrix.msc2176", @@ -195,6 +203,7 @@ class RoomVersions: msc2403_knocking=False, msc2716_historical=False, msc2716_redactions=False, + msc2716_empty_prev_events=False, ) V7 = RoomVersion( "7", @@ -211,6 +220,7 @@ class RoomVersions: msc2403_knocking=True, msc2716_historical=False, msc2716_redactions=False, + msc2716_empty_prev_events=False, ) V8 = RoomVersion( "8", @@ -227,6 +237,7 @@ class RoomVersions: msc2403_knocking=True, msc2716_historical=False, msc2716_redactions=False, + msc2716_empty_prev_events=False, ) V9 = RoomVersion( "9", @@ -243,6 +254,7 @@ class RoomVersions: msc2403_knocking=True, msc2716_historical=False, msc2716_redactions=False, + msc2716_empty_prev_events=False, ) MSC2716v3 = RoomVersion( "org.matrix.msc2716v3", @@ -259,6 +271,24 @@ class RoomVersions: msc2403_knocking=True, msc2716_historical=True, msc2716_redactions=True, + msc2716_empty_prev_events=False, + ) + MSC2716v4 = RoomVersion( + "org.matrix.msc2716v4", + RoomDisposition.UNSTABLE, + EventFormatVersions.V3, + StateResolutionVersions.V2, + enforce_key_validity=True, + special_case_aliases_auth=False, + strict_canonicaljson=True, + limit_notifications_power_levels=True, + msc2176_redaction_rules=False, + msc3083_join_rules=False, + msc3375_redaction_rules=False, + msc2403_knocking=True, + msc2716_historical=True, + msc2716_redactions=True, + msc2716_empty_prev_events=True, ) @@ -276,6 +306,7 @@ class RoomVersions: RoomVersions.V8, RoomVersions.V9, RoomVersions.MSC2716v3, + RoomVersions.MSC2716v4, ) } diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 1b872281f633..d6f0b99f5887 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -949,18 +949,24 @@ async def create_new_client_event( else: prev_event_ids = await self.store.get_prev_events_for_room(builder.room_id) - # we now ought to have some prev_events (unless it's a create event). - # - # do a quick sanity check here, rather than waiting until we've created the + # Do a quick sanity check here, rather than waiting until we've created the # event and then try to auth it (which fails with a somewhat confusing "No # create event in auth events") - assert ( - builder.type == EventTypes.Create - or len(prev_event_ids) > 0 - # Allow an event to have empty list of prev_event_ids - # only if it has auth_event_ids. - or (auth_event_ids and len(auth_event_ids) > 0) - ), "Attempting to create an event with no prev_events or auth_event_ids" + room_version_obj = await self.store.get_room_version(builder.room_id) + if room_version_obj.msc2716_empty_prev_events: + # We allow events with no `prev_events` but it better have some `auth_events` + assert ( + builder.type == EventTypes.Create + or len(prev_event_ids) > 0 + # Allow an event to have empty list of prev_event_ids + # only if it has auth_event_ids. + or (auth_event_ids and len(auth_event_ids) > 0) + ), "Attempting to create an event with no prev_events or auth_event_ids" + else: + # we now ought to have some prev_events (unless it's a create event). + assert ( + builder.type == EventTypes.Create or len(prev_event_ids) > 0 + ), "Attempting to create an event with no prev_events" event = await builder.build( prev_event_ids=prev_event_ids, From 9a6d8faafe3c09d5f70767637a1f6fe1ff7d231a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 28 Oct 2021 20:24:00 -0500 Subject: [PATCH 17/47] WIP: Don't include the event we branch from We want to backfill all of the history before adding the base event. But then there is a problem of how do we add the base event after exhausting all of the historical messages. Backfill will give us that extremity again but the current code will always choose the historical branch over and over and never move past it. I wish we could ask the federated homeserver if it already has the insertion event locally but we can't make any requests in the store code here :/ --- .../databases/main/event_federation.py | 35 ++++++++++++------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 4a4d35f77c5e..779f3e81816f 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1094,8 +1094,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): if event_id in event_results: continue - event_results.add(event_id) - + found_connected_historical_messages = False if self.hs.config.experimental.msc2716_enabled: # Try and find any potential historical batches of message history. # @@ -1117,7 +1116,9 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): connected_insertion_event_stream_ordering = row[1] connected_insertion_event_id = row[2] connected_insertion_event_type = row[3] + if connected_insertion_event_id not in event_results: + found_connected_historical_messages = True queue.put( ( -connected_insertion_event_depth, @@ -1146,18 +1147,26 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): if row[2] not in event_results: queue.put((-row[0], -row[1], row[2], row[3])) - txn.execute( - connected_prev_event_query, - (event_id, False, limit - len(event_results)), - ) - prev_event_id_results = txn.fetchall() - logger.debug( - "_get_backfill_events: prev_event_ids %s", prev_event_id_results - ) + # Only add the event_result itself if we didn't branch off on the history first + # TODO: How can we not branch off to the historical batch if + # the federated homeserver already has it backfilled? We + # can't make any requests here (no async stuff and should + # really only be database calls) + if not found_connected_historical_messages: + event_results.add(event_id) + + txn.execute( + connected_prev_event_query, + (event_id, False, limit - len(event_results)), + ) + prev_event_id_results = txn.fetchall() + logger.debug( + "_get_backfill_events: prev_event_ids %s", prev_event_id_results + ) - for row in prev_event_id_results: - if row[2] not in event_results: - queue.put((-row[0], -row[1], row[2], row[3])) + for row in prev_event_id_results: + if row[2] not in event_results: + queue.put((-row[0], -row[1], row[2], row[3])) return event_results From 3e09d4900012a42c6cd9ff4e7cd352156ded0069 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 28 Oct 2021 20:40:57 -0500 Subject: [PATCH 18/47] Revert "WIP: Don't include the event we branch from" This reverts commit 9a6d8faafe3c09d5f70767637a1f6fe1ff7d231a. --- .../databases/main/event_federation.py | 35 +++++++------------ 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 779f3e81816f..4a4d35f77c5e 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1094,7 +1094,8 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): if event_id in event_results: continue - found_connected_historical_messages = False + event_results.add(event_id) + if self.hs.config.experimental.msc2716_enabled: # Try and find any potential historical batches of message history. # @@ -1116,9 +1117,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): connected_insertion_event_stream_ordering = row[1] connected_insertion_event_id = row[2] connected_insertion_event_type = row[3] - if connected_insertion_event_id not in event_results: - found_connected_historical_messages = True queue.put( ( -connected_insertion_event_depth, @@ -1147,26 +1146,18 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): if row[2] not in event_results: queue.put((-row[0], -row[1], row[2], row[3])) - # Only add the event_result itself if we didn't branch off on the history first - # TODO: How can we not branch off to the historical batch if - # the federated homeserver already has it backfilled? We - # can't make any requests here (no async stuff and should - # really only be database calls) - if not found_connected_historical_messages: - event_results.add(event_id) - - txn.execute( - connected_prev_event_query, - (event_id, False, limit - len(event_results)), - ) - prev_event_id_results = txn.fetchall() - logger.debug( - "_get_backfill_events: prev_event_ids %s", prev_event_id_results - ) + txn.execute( + connected_prev_event_query, + (event_id, False, limit - len(event_results)), + ) + prev_event_id_results = txn.fetchall() + logger.debug( + "_get_backfill_events: prev_event_ids %s", prev_event_id_results + ) - for row in prev_event_id_results: - if row[2] not in event_results: - queue.put((-row[0], -row[1], row[2], row[3])) + for row in prev_event_id_results: + if row[2] not in event_results: + queue.put((-row[0], -row[1], row[2], row[3])) return event_results From 5afc264dd54221142b0acd0a56cbe07c3eac2113 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 29 Oct 2021 02:43:08 -0500 Subject: [PATCH 19/47] WIP: Sort events topologically when we receive them over backfill --- synapse/handlers/federation_event.py | 125 ++++++++++++++++++- synapse/handlers/message.py | 2 +- synapse/rest/client/room_batch.py | 2 +- synapse/storage/databases/main/room_batch.py | 2 +- 4 files changed, 126 insertions(+), 5 deletions(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 610a4e48c53b..66d3da871900 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -72,7 +72,7 @@ get_domain_from_id, ) from synapse.util.async_helpers import Linearizer, concurrently_execute -from synapse.util.iterutils import batch_iter +from synapse.util.iterutils import batch_iter, sorted_topologically from synapse.util.retryutils import NotRetryingDestination from synapse.util.stringutils import shortstr @@ -665,9 +665,130 @@ async def _process_pulled_events( notification to clients, and validation of device keys.) """ + logger.info( + "backfill events=%s", + [ + "event_id=%s,depth=%d,body=%s,prevs=%s\n" + % ( + event.event_id, + event.depth, + event.content.get("body", event.type), + event.prev_event_ids(), + ) + for event in events + ], + ) + # We want to sort these by depth so we process them and # tell clients about them in order. - sorted_events = sorted(events, key=lambda x: x.depth) + # sorted_events = sorted(events, key=lambda x: x.depth) + + event_ids = [event.event_id for event in events] + event_map = {event.event_id: event for event in events} + + # Since the insertion event we try to reference later on might be in the + # backfill chunk itself, we need to make it easy to lookup. Maps a given + # batch_id to the insertion event. + batch_id_map = { + event.content.get( + EventContentFields.MSC2716_NEXT_BATCH_ID, None + ): event.event_id + for event in events + if event.type == EventTypes.MSC2716_INSERTION + } + + successor_event_id_map = {} + for event in events: + for prev_event_id in event.prev_event_ids(): + successor_event_id_map.setdefault(prev_event_id, []).append( + event.event_id + ) + + event_id_graph = {} + for event in events: + # Assign the real edges to the graph. + # Make a copy so we don't modify the actual prev_events when we extend them below. + event_id_graph.setdefault(event.event_id, []).extend( + event.prev_event_ids().copy() + ) + + # We need to make some fake edge connections from the batch event at + # the bottom of the historical batch to the insertion event. This + # way the historical batch topologically sorts in ahead-in-time of + # the event we branched off of. + batch_id = event.content.get(EventContentFields.MSC2716_BATCH_ID, None) + if event.type == EventTypes.MSC2716_BATCH and batch_id: + # Maybe we can get lucky and save ourselves a lookup + # by checking the events in the backfill first + insertion_event_id = batch_id_map[ + batch_id + ] or await self._store.get_insertion_event_id_by_batch_id( + event.room_id, batch_id + ) + + if insertion_event_id: + # Add the insertion event as a fake edge connection to the batch + # event so the historical batch topologically sorts below + # the "live" event we branched off of. + event_id_graph.setdefault(event.event_id, []).append( + insertion_event_id + ) + + # Maybe we can get lucky and save ourselves a lookup + # by checking the events in the backfill first + insertion_event = event_map[ + insertion_event_id + ] or await self._store.get_event( + insertion_event_id, allow_none=True + ) + + if insertion_event: + # Also add some fake edges to connect the insertion + # event to it's prev_event successors so it sorts + # topologically behind-in-time the successor. Nestled + # perfectly between the prev_event and the successor. + for insertion_prev_event_id in insertion_event.prev_event_ids(): + successor_event_ids = successor_event_id_map[ + insertion_prev_event_id + ] + logger.info( + "insertion_event_id=%s successor_event_ids=%s", + insertion_event_id, + successor_event_ids, + ) + if successor_event_ids: + + event_id_graph.setdefault( + insertion_event_id, [] + ).extend( + [ + successor_event_id + for successor_event_id in successor_event_ids + # Don't add itself back as a successor + if successor_event_id != insertion_event_id + ] + ) + + # We want to sort topologically so we process them and tell clients + # about them in order. + sorted_events = [] + for event_id in sorted_topologically(event_ids, event_id_graph): + sorted_events.append(event_map[event_id]) + sorted_events = reversed(sorted_events) + + logger.info( + "backfill sorted_events=%s", + [ + "event_id=%s,depth=%d,body=%s,prevs=%s\n" + % ( + event.event_id, + event.depth, + event.content.get("body", event.type), + event.prev_event_ids(), + ) + for event in sorted_events + ], + ) for ev in sorted_events: with nested_logging_context(ev.event_id): diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index d6f0b99f5887..2f4b458d4564 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1511,7 +1511,7 @@ async def persist_and_notify_client_event( EventContentFields.MSC2716_NEXT_BATCH_ID ) conflicting_insertion_event_id = ( - await self.store.get_insertion_event_by_batch_id( + await self.store.get_insertion_event_id_by_batch_id( event.room_id, next_batch_id ) ) diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index 99f8156ad0ec..5423d39efde1 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -112,7 +112,7 @@ async def on_POST( # and have the batch connected. if batch_id_from_query: corresponding_insertion_event_id = ( - await self.store.get_insertion_event_by_batch_id( + await self.store.get_insertion_event_id_by_batch_id( room_id, batch_id_from_query ) ) diff --git a/synapse/storage/databases/main/room_batch.py b/synapse/storage/databases/main/room_batch.py index dcbce8fdcf03..97b261843782 100644 --- a/synapse/storage/databases/main/room_batch.py +++ b/synapse/storage/databases/main/room_batch.py @@ -18,7 +18,7 @@ class RoomBatchStore(SQLBaseStore): - async def get_insertion_event_by_batch_id( + async def get_insertion_event_id_by_batch_id( self, room_id: str, batch_id: str ) -> Optional[str]: """Retrieve a insertion event ID. From 6ea263b73ba1d0e275ef4fd266c4a9144198ebb9 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 29 Oct 2021 02:43:17 -0500 Subject: [PATCH 20/47] Revert "WIP: Sort events topologically when we receive them over backfill" This reverts commit 5afc264dd54221142b0acd0a56cbe07c3eac2113. --- synapse/handlers/federation_event.py | 125 +------------------ synapse/handlers/message.py | 2 +- synapse/rest/client/room_batch.py | 2 +- synapse/storage/databases/main/room_batch.py | 2 +- 4 files changed, 5 insertions(+), 126 deletions(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 66d3da871900..610a4e48c53b 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -72,7 +72,7 @@ get_domain_from_id, ) from synapse.util.async_helpers import Linearizer, concurrently_execute -from synapse.util.iterutils import batch_iter, sorted_topologically +from synapse.util.iterutils import batch_iter from synapse.util.retryutils import NotRetryingDestination from synapse.util.stringutils import shortstr @@ -665,130 +665,9 @@ async def _process_pulled_events( notification to clients, and validation of device keys.) """ - logger.info( - "backfill events=%s", - [ - "event_id=%s,depth=%d,body=%s,prevs=%s\n" - % ( - event.event_id, - event.depth, - event.content.get("body", event.type), - event.prev_event_ids(), - ) - for event in events - ], - ) - # We want to sort these by depth so we process them and # tell clients about them in order. - # sorted_events = sorted(events, key=lambda x: x.depth) - - event_ids = [event.event_id for event in events] - event_map = {event.event_id: event for event in events} - - # Since the insertion event we try to reference later on might be in the - # backfill chunk itself, we need to make it easy to lookup. Maps a given - # batch_id to the insertion event. - batch_id_map = { - event.content.get( - EventContentFields.MSC2716_NEXT_BATCH_ID, None - ): event.event_id - for event in events - if event.type == EventTypes.MSC2716_INSERTION - } - - successor_event_id_map = {} - for event in events: - for prev_event_id in event.prev_event_ids(): - successor_event_id_map.setdefault(prev_event_id, []).append( - event.event_id - ) - - event_id_graph = {} - for event in events: - # Assign the real edges to the graph. - # Make a copy so we don't modify the actual prev_events when we extend them below. - event_id_graph.setdefault(event.event_id, []).extend( - event.prev_event_ids().copy() - ) - - # We need to make some fake edge connections from the batch event at - # the bottom of the historical batch to the insertion event. This - # way the historical batch topologically sorts in ahead-in-time of - # the event we branched off of. - batch_id = event.content.get(EventContentFields.MSC2716_BATCH_ID, None) - if event.type == EventTypes.MSC2716_BATCH and batch_id: - # Maybe we can get lucky and save ourselves a lookup - # by checking the events in the backfill first - insertion_event_id = batch_id_map[ - batch_id - ] or await self._store.get_insertion_event_id_by_batch_id( - event.room_id, batch_id - ) - - if insertion_event_id: - # Add the insertion event as a fake edge connection to the batch - # event so the historical batch topologically sorts below - # the "live" event we branched off of. - event_id_graph.setdefault(event.event_id, []).append( - insertion_event_id - ) - - # Maybe we can get lucky and save ourselves a lookup - # by checking the events in the backfill first - insertion_event = event_map[ - insertion_event_id - ] or await self._store.get_event( - insertion_event_id, allow_none=True - ) - - if insertion_event: - # Also add some fake edges to connect the insertion - # event to it's prev_event successors so it sorts - # topologically behind-in-time the successor. Nestled - # perfectly between the prev_event and the successor. - for insertion_prev_event_id in insertion_event.prev_event_ids(): - successor_event_ids = successor_event_id_map[ - insertion_prev_event_id - ] - logger.info( - "insertion_event_id=%s successor_event_ids=%s", - insertion_event_id, - successor_event_ids, - ) - if successor_event_ids: - - event_id_graph.setdefault( - insertion_event_id, [] - ).extend( - [ - successor_event_id - for successor_event_id in successor_event_ids - # Don't add itself back as a successor - if successor_event_id != insertion_event_id - ] - ) - - # We want to sort topologically so we process them and tell clients - # about them in order. - sorted_events = [] - for event_id in sorted_topologically(event_ids, event_id_graph): - sorted_events.append(event_map[event_id]) - sorted_events = reversed(sorted_events) - - logger.info( - "backfill sorted_events=%s", - [ - "event_id=%s,depth=%d,body=%s,prevs=%s\n" - % ( - event.event_id, - event.depth, - event.content.get("body", event.type), - event.prev_event_ids(), - ) - for event in sorted_events - ], - ) + sorted_events = sorted(events, key=lambda x: x.depth) for ev in sorted_events: with nested_logging_context(ev.event_id): diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 2f4b458d4564..d6f0b99f5887 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1511,7 +1511,7 @@ async def persist_and_notify_client_event( EventContentFields.MSC2716_NEXT_BATCH_ID ) conflicting_insertion_event_id = ( - await self.store.get_insertion_event_id_by_batch_id( + await self.store.get_insertion_event_by_batch_id( event.room_id, next_batch_id ) ) diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index 5423d39efde1..99f8156ad0ec 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -112,7 +112,7 @@ async def on_POST( # and have the batch connected. if batch_id_from_query: corresponding_insertion_event_id = ( - await self.store.get_insertion_event_id_by_batch_id( + await self.store.get_insertion_event_by_batch_id( room_id, batch_id_from_query ) ) diff --git a/synapse/storage/databases/main/room_batch.py b/synapse/storage/databases/main/room_batch.py index 97b261843782..dcbce8fdcf03 100644 --- a/synapse/storage/databases/main/room_batch.py +++ b/synapse/storage/databases/main/room_batch.py @@ -18,7 +18,7 @@ class RoomBatchStore(SQLBaseStore): - async def get_insertion_event_id_by_batch_id( + async def get_insertion_event_by_batch_id( self, room_id: str, batch_id: str ) -> Optional[str]: """Retrieve a insertion event ID. From 3d387f94fc733775d94ec1dd89b794d1a899ca04 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 29 Oct 2021 02:43:08 -0500 Subject: [PATCH 21/47] WIP: Sort events topologically when we receive them over backfill --- synapse/handlers/federation_event.py | 125 ++++++++++++++++++- synapse/handlers/message.py | 2 +- synapse/rest/client/room_batch.py | 2 +- synapse/storage/databases/main/room_batch.py | 2 +- 4 files changed, 126 insertions(+), 5 deletions(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 610a4e48c53b..66d3da871900 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -72,7 +72,7 @@ get_domain_from_id, ) from synapse.util.async_helpers import Linearizer, concurrently_execute -from synapse.util.iterutils import batch_iter +from synapse.util.iterutils import batch_iter, sorted_topologically from synapse.util.retryutils import NotRetryingDestination from synapse.util.stringutils import shortstr @@ -665,9 +665,130 @@ async def _process_pulled_events( notification to clients, and validation of device keys.) """ + logger.info( + "backfill events=%s", + [ + "event_id=%s,depth=%d,body=%s,prevs=%s\n" + % ( + event.event_id, + event.depth, + event.content.get("body", event.type), + event.prev_event_ids(), + ) + for event in events + ], + ) + # We want to sort these by depth so we process them and # tell clients about them in order. - sorted_events = sorted(events, key=lambda x: x.depth) + # sorted_events = sorted(events, key=lambda x: x.depth) + + event_ids = [event.event_id for event in events] + event_map = {event.event_id: event for event in events} + + # Since the insertion event we try to reference later on might be in the + # backfill chunk itself, we need to make it easy to lookup. Maps a given + # batch_id to the insertion event. + batch_id_map = { + event.content.get( + EventContentFields.MSC2716_NEXT_BATCH_ID, None + ): event.event_id + for event in events + if event.type == EventTypes.MSC2716_INSERTION + } + + successor_event_id_map = {} + for event in events: + for prev_event_id in event.prev_event_ids(): + successor_event_id_map.setdefault(prev_event_id, []).append( + event.event_id + ) + + event_id_graph = {} + for event in events: + # Assign the real edges to the graph. + # Make a copy so we don't modify the actual prev_events when we extend them below. + event_id_graph.setdefault(event.event_id, []).extend( + event.prev_event_ids().copy() + ) + + # We need to make some fake edge connections from the batch event at + # the bottom of the historical batch to the insertion event. This + # way the historical batch topologically sorts in ahead-in-time of + # the event we branched off of. + batch_id = event.content.get(EventContentFields.MSC2716_BATCH_ID, None) + if event.type == EventTypes.MSC2716_BATCH and batch_id: + # Maybe we can get lucky and save ourselves a lookup + # by checking the events in the backfill first + insertion_event_id = batch_id_map[ + batch_id + ] or await self._store.get_insertion_event_id_by_batch_id( + event.room_id, batch_id + ) + + if insertion_event_id: + # Add the insertion event as a fake edge connection to the batch + # event so the historical batch topologically sorts below + # the "live" event we branched off of. + event_id_graph.setdefault(event.event_id, []).append( + insertion_event_id + ) + + # Maybe we can get lucky and save ourselves a lookup + # by checking the events in the backfill first + insertion_event = event_map[ + insertion_event_id + ] or await self._store.get_event( + insertion_event_id, allow_none=True + ) + + if insertion_event: + # Also add some fake edges to connect the insertion + # event to it's prev_event successors so it sorts + # topologically behind-in-time the successor. Nestled + # perfectly between the prev_event and the successor. + for insertion_prev_event_id in insertion_event.prev_event_ids(): + successor_event_ids = successor_event_id_map[ + insertion_prev_event_id + ] + logger.info( + "insertion_event_id=%s successor_event_ids=%s", + insertion_event_id, + successor_event_ids, + ) + if successor_event_ids: + + event_id_graph.setdefault( + insertion_event_id, [] + ).extend( + [ + successor_event_id + for successor_event_id in successor_event_ids + # Don't add itself back as a successor + if successor_event_id != insertion_event_id + ] + ) + + # We want to sort topologically so we process them and tell clients + # about them in order. + sorted_events = [] + for event_id in sorted_topologically(event_ids, event_id_graph): + sorted_events.append(event_map[event_id]) + sorted_events = reversed(sorted_events) + + logger.info( + "backfill sorted_events=%s", + [ + "event_id=%s,depth=%d,body=%s,prevs=%s\n" + % ( + event.event_id, + event.depth, + event.content.get("body", event.type), + event.prev_event_ids(), + ) + for event in sorted_events + ], + ) for ev in sorted_events: with nested_logging_context(ev.event_id): diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index d6f0b99f5887..2f4b458d4564 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1511,7 +1511,7 @@ async def persist_and_notify_client_event( EventContentFields.MSC2716_NEXT_BATCH_ID ) conflicting_insertion_event_id = ( - await self.store.get_insertion_event_by_batch_id( + await self.store.get_insertion_event_id_by_batch_id( event.room_id, next_batch_id ) ) diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index 99f8156ad0ec..5423d39efde1 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -112,7 +112,7 @@ async def on_POST( # and have the batch connected. if batch_id_from_query: corresponding_insertion_event_id = ( - await self.store.get_insertion_event_by_batch_id( + await self.store.get_insertion_event_id_by_batch_id( room_id, batch_id_from_query ) ) diff --git a/synapse/storage/databases/main/room_batch.py b/synapse/storage/databases/main/room_batch.py index dcbce8fdcf03..97b261843782 100644 --- a/synapse/storage/databases/main/room_batch.py +++ b/synapse/storage/databases/main/room_batch.py @@ -18,7 +18,7 @@ class RoomBatchStore(SQLBaseStore): - async def get_insertion_event_by_batch_id( + async def get_insertion_event_id_by_batch_id( self, room_id: str, batch_id: str ) -> Optional[str]: """Retrieve a insertion event ID. From fb8e2814f1ce78ec0c3df719d2b64ddaa1eb7f4d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 29 Oct 2021 14:10:43 -0500 Subject: [PATCH 22/47] Fix direction of fake edges - batch -> insertion fake edge - Connecting insertion events' `prev_events` successors -> insertion event --- - batch -> insertion fake edge - Connecting insertion events' `prev_events` successors -> insertion event ``` backfill sorted_events=[ "event_id=$RQU5oBY9oHveDdx1X4FZs6BWzBqrGnbxQCZfNF4OEmw,depth=11,body=Message 1 (eventIDsAfter),prevs=['$44dIgrZ6WEBaxnCxiulvFQmLRlgIF2c2LHxwVZ39BhY']", "event_id=$44dIgrZ6WEBaxnCxiulvFQmLRlgIF2c2LHxwVZ39BhY,depth=10,body=Message 0 (eventIDsAfter),prevs=['$G00qI6L-Jem6u0rM73WAgtl3e27WFREG_IqXBK3rBVY']", "event_id=$Bb7NKUT_EJEIYJucAJvsm3IGWOHPV5HjT9sFBH4cgJ0,depth=10,body=org.matrix.msc2716.insertion,prevs=['$G00qI6L-Jem6u0rM73WAgtl3e27WFREG_IqXBK3rBVY']", "event_id=$DD2ibvlCDuylHLA5XHpzbWgplgANwmvy_qRGIz6obgU,depth=10,body=org.matrix.msc2716.batch,prevs=['$JfIXG5RA3SubwPx3qekrP7p7TL84M-EMXyrWUPGEJqE']", "event_id=$JfIXG5RA3SubwPx3qekrP7p7TL84M-EMXyrWUPGEJqE,depth=10,body=Historical 99 (batch=0),prevs=['$JMMtj_q0q-o3sgMjGtUDot_vjIBS1fu7pMFIVIKW8XU']", "event_id=$JMMtj_q0q-o3sgMjGtUDot_vjIBS1fu7pMFIVIKW8XU,depth=10,body=Historical 98 (batch=0),prevs=['$lCpKjiG2aZzjSvL4mu3BijwjsuGADKfP6CmB6S_9TeI']", "event_id=$lCpKjiG2aZzjSvL4mu3BijwjsuGADKfP6CmB6S_9TeI,depth=10,body=Historical 97 (batch=0),prevs=['$2e-t0QWqLfVJz2fdwbwAuT7HpT2awCyFhtR35VPUx0Q']", "event_id=$2e-t0QWqLfVJz2fdwbwAuT7HpT2awCyFhtR35VPUx0Q,depth=10,body=Historical 96 (batch=0),prevs=['$3tMaM3-QgXlRaGWzkcKg8VOooUETwADMmvC8LgPHi3k']", "event_id=$3tMaM3-QgXlRaGWzkcKg8VOooUETwADMmvC8LgPHi3k,depth=10,body=Historical 95 (batch=0),prevs=['$sWNiiKCpV6jaMOBrJPKSnmhfm1HvzQX5reT9cNGTgkk']", "event_id=$sWNiiKCpV6jaMOBrJPKSnmhfm1HvzQX5reT9cNGTgkk,depth=10,body=Historical 94 (batch=0),prevs=['$b23cNluaxei15QPBanvua5F690RJdnL4yCq-3MAbKyc']", "event_id=$b23cNluaxei15QPBanvua5F690RJdnL4yCq-3MAbKyc,depth=10,body=Historical 93 (batch=0),prevs=['$mlWvqR2MWITXVBZfeVLcujmVAzBvkkDdP6oSz3eSwUg']", "event_id=$mlWvqR2MWITXVBZfeVLcujmVAzBvkkDdP6oSz3eSwUg,depth=10,body=Historical 92 (batch=0),prevs=['$wlVdFpoXlPWoajb33gizlhlft5k-tVwGUpmMsmcoHPU']", "event_id=$wlVdFpoXlPWoajb33gizlhlft5k-tVwGUpmMsmcoHPU,depth=10,body=Historical 91 (batch=0),prevs=['$v6VMFCnyQkl16tZCVu7J_-Nst_NRXXaQd2efn2TUpEE']", "event_id=$v6VMFCnyQkl16tZCVu7J_-Nst_NRXXaQd2efn2TUpEE,depth=10,body=Historical 90 (batch=0),prevs=['$ooH_Jq5JLVIkIB9TpsltyHtqtn7KNxDIc_xYUcBUsAA']", "event_id=$ooH_Jq5JLVIkIB9TpsltyHtqtn7KNxDIc_xYUcBUsAA,depth=10,body=Historical 89 (batch=0),prevs=['$iK_6DSB9OgXzFNQ1qEZo_42ifWrX1xwUKs_qFKZT8Uo']", "event_id=$iK_6DSB9OgXzFNQ1qEZo_42ifWrX1xwUKs_qFKZT8Uo,depth=10,body=Historical 88 (batch=0),prevs=['$LKnAV4et1stXQRkd_dg0XOK5kNXFyFYQOgx5wdSQB34']", "event_id=$LKnAV4et1stXQRkd_dg0XOK5kNXFyFYQOgx5wdSQB34,depth=10,body=Historical 87 (batch=0),prevs=['$vvtLzYBovfNjm-aU5o5JF6TumBAzrBbzrMVrqA43cmE']", "event_id=$vvtLzYBovfNjm-aU5o5JF6TumBAzrBbzrMVrqA43cmE,depth=10,body=Historical 86 (batch=0),prevs=['$yYyBW-D_3XN6aYyXeRJKQYN1hJyswh9kUe2Q3kk_auA']", "event_id=$yYyBW-D_3XN6aYyXeRJKQYN1hJyswh9kUe2Q3kk_auA,depth=10,body=Historical 85 (batch=0),prevs=['$_1yK-Gzkabr_ZKMRFHlJMat943MjCznY2YN2WPExrtY']", "event_id=$_1yK-Gzkabr_ZKMRFHlJMat943MjCznY2YN2WPExrtY,depth=10,body=Historical 84 (batch=0),prevs=['$CZSLI1NUX9gWptZT_i52oyCfQpH5rOlPFSgmSjD4a-Q']", "event_id=$CZSLI1NUX9gWptZT_i52oyCfQpH5rOlPFSgmSjD4a-Q,depth=10,body=Historical 83 (batch=0),prevs=['$093Wua4ZaJLX4uRYuL89y3t47jntwceo0ReVGM-_WNE']", "event_id=$093Wua4ZaJLX4uRYuL89y3t47jntwceo0ReVGM-_WNE,depth=10,body=Historical 82 (batch=0),prevs=['$wA7OKTWe6BMbXustd4WBLTJ2fBIY4RLeOstoKaaiIwg']", "event_id=$wA7OKTWe6BMbXustd4WBLTJ2fBIY4RLeOstoKaaiIwg,depth=10,body=Historical 81 (batch=0),prevs=['$ZUVoXb9Or3HAeZVM5nKYUp1-mcizUyGuQHuUaRTEf8s']", "event_id=$ZUVoXb9Or3HAeZVM5nKYUp1-mcizUyGuQHuUaRTEf8s,depth=10,body=Historical 80 (batch=0),prevs=['$ul_NFiXrTv6S-YseztgF5fpKx-_gXeQ7C9XF_TdcCqM']", "event_id=$ul_NFiXrTv6S-YseztgF5fpKx-_gXeQ7C9XF_TdcCqM,depth=10,body=Historical 79 (batch=0),prevs=['$9zzm0Mr6wn2W5EKl7t0AmGqJntslGV9Telo6rP9DeZg']", "event_id=$9zzm0Mr6wn2W5EKl7t0AmGqJntslGV9Telo6rP9DeZg,depth=10,body=Historical 78 (batch=0),prevs=['$LsfJ_IpS4y1mWjdLFsod16JBS2B2j0XO3BCMEtXaigk']", "event_id=$LsfJ_IpS4y1mWjdLFsod16JBS2B2j0XO3BCMEtXaigk,depth=10,body=Historical 77 (batch=0),prevs=['$hfdqS5ARU7roPsWgGS8RXsog_m0P7vOc1UMdmKju0aw']", "event_id=$hfdqS5ARU7roPsWgGS8RXsog_m0P7vOc1UMdmKju0aw,depth=10,body=Historical 76 (batch=0),prevs=['$h4ZA91X45b8aNuMjz_xC1457mw2h6_MTSqvv6MVH8tU']", "event_id=$h4ZA91X45b8aNuMjz_xC1457mw2h6_MTSqvv6MVH8tU,depth=10,body=Historical 75 (batch=0),prevs=['$qvxKJlvqC0gaRP7mRjO-yMD4Zzt42dkd7dqZSs-v-Fk']", "event_id=$qvxKJlvqC0gaRP7mRjO-yMD4Zzt42dkd7dqZSs-v-Fk,depth=10,body=Historical 74 (batch=0),prevs=['$kgMcrid7-aqUIzDzjouB3HQKIGh2h-6Iop6WK08KCkc']", "event_id=$kgMcrid7-aqUIzDzjouB3HQKIGh2h-6Iop6WK08KCkc,depth=10,body=Historical 73 (batch=0),prevs=['$_ssYYwoWm7OOjhYwkZxQKOMq7maygenjjLJBypuA3N8']", "event_id=$_ssYYwoWm7OOjhYwkZxQKOMq7maygenjjLJBypuA3N8,depth=10,body=Historical 72 (batch=0),prevs=['$1OAjmvRdC67suR_wlrgbQDzOFJZxAnE4RZyKt4Khq9M']", "event_id=$1OAjmvRdC67suR_wlrgbQDzOFJZxAnE4RZyKt4Khq9M,depth=10,body=Historical 71 (batch=0),prevs=['$1cpjOXZwqsZzZ643qAkcnEc3Mjgl1n_WPfeVaovxE0g']", "event_id=$1cpjOXZwqsZzZ643qAkcnEc3Mjgl1n_WPfeVaovxE0g,depth=10,body=Historical 70 (batch=0),prevs=['$qYsfdlkVwJDtvjGHQtJATjglCIUzj11Cu7Kt70Qpa00']", "event_id=$qYsfdlkVwJDtvjGHQtJATjglCIUzj11Cu7Kt70Qpa00,depth=10,body=Historical 69 (batch=0),prevs=['$xwkLzZQX2w1rS9IbSburCCNy9g_3GA5cxR2Dd1AiDEo']", "event_id=$xwkLzZQX2w1rS9IbSburCCNy9g_3GA5cxR2Dd1AiDEo,depth=10,body=Historical 68 (batch=0),prevs=['$RUjVghSfYx0Rz-HMW7pFUjVL7gjnDh5FhrzmizwRSg4']", "event_id=$RUjVghSfYx0Rz-HMW7pFUjVL7gjnDh5FhrzmizwRSg4,depth=10,body=Historical 67 (batch=0),prevs=['$xFM3GD0jHQ2sV_w-m996lYIj6bBy3e4zoo4zba7KK94']", "event_id=$xFM3GD0jHQ2sV_w-m996lYIj6bBy3e4zoo4zba7KK94,depth=10,body=Historical 66 (batch=0),prevs=['$P9rG_KsdrP29_nrUhVNwDXxEj6TtcAHYMiEPjTECuyI']", "event_id=$P9rG_KsdrP29_nrUhVNwDXxEj6TtcAHYMiEPjTECuyI,depth=10,body=Historical 65 (batch=0),prevs=['$fG9vXCIp3ymBUSQGfxuQyHjXvwfaz0i-Al5PZER5q6Y']", "event_id=$fG9vXCIp3ymBUSQGfxuQyHjXvwfaz0i-Al5PZER5q6Y,depth=10,body=Historical 64 (batch=0),prevs=['$ZppkvSCfh7zKOSOIaAzzOM13xzO3c46HlvAt1ogqxSI']", "event_id=$ZppkvSCfh7zKOSOIaAzzOM13xzO3c46HlvAt1ogqxSI,depth=10,body=Historical 63 (batch=0),prevs=['$XJgjPkRjlTh04CAXr_7doyRn9uP6Q2BRBS5ogIxxgq4']", "event_id=$XJgjPkRjlTh04CAXr_7doyRn9uP6Q2BRBS5ogIxxgq4,depth=10,body=Historical 62 (batch=0),prevs=['$iiC9E-Xbw8DqYLkca_3SYXKP0fyjAO3Rulzl_UMlg_U']", "event_id=$iiC9E-Xbw8DqYLkca_3SYXKP0fyjAO3Rulzl_UMlg_U,depth=10,body=Historical 61 (batch=0),prevs=['$JQrY-1YX4u1WLM8pf-YV45wOig2GZmXyLRzQd16lNRY']", "event_id=$JQrY-1YX4u1WLM8pf-YV45wOig2GZmXyLRzQd16lNRY,depth=10,body=Historical 60 (batch=0),prevs=['$EZwD8zE9aSg6UYGfT93gAB22vf1EbG2m4rXNWYtDoMM']", "event_id=$EZwD8zE9aSg6UYGfT93gAB22vf1EbG2m4rXNWYtDoMM,depth=10,body=Historical 59 (batch=0),prevs=['$MXdmxSkN7gpzy28Rg6Ond0DyVu4a6_NA2PAEh2RGhWE']", "event_id=$MXdmxSkN7gpzy28Rg6Ond0DyVu4a6_NA2PAEh2RGhWE,depth=10,body=Historical 58 (batch=0),prevs=['$3qKPpnyChlt2O24cqwF2zHORIvm4zbVwugRtKxkAQAo']", "event_id=$3qKPpnyChlt2O24cqwF2zHORIvm4zbVwugRtKxkAQAo,depth=10,body=Historical 57 (batch=0),prevs=['$fZwZ3KIY2HpkBh_-GX762uJlvSDjKx_jkKBmO4XMXbU']", "event_id=$fZwZ3KIY2HpkBh_-GX762uJlvSDjKx_jkKBmO4XMXbU,depth=10,body=Historical 56 (batch=0),prevs=['$6S5Ni1dMs9cIWmlGAToAXq9HD19IC3VzPsA6Mpv-99k']", "event_id=$6S5Ni1dMs9cIWmlGAToAXq9HD19IC3VzPsA6Mpv-99k,depth=10,body=Historical 55 (batch=0),prevs=['$H_iqXiAxOxF6cEFG8uBIOA4daNZRS-n5BkyIO2TJB1M']", "event_id=$H_iqXiAxOxF6cEFG8uBIOA4daNZRS-n5BkyIO2TJB1M,depth=10,body=Historical 54 (batch=0),prevs=['$M_jaKYkmwpIOh5WbYKo7OABADOIde2GL_UpYgy5DKJE']", "event_id=$M_jaKYkmwpIOh5WbYKo7OABADOIde2GL_UpYgy5DKJE,depth=10,body=Historical 53 (batch=0),prevs=['$_tY_1OYdYB2e4mX0psdoZXQqbr_nJvdxNErra3vIRHw']", "event_id=$_tY_1OYdYB2e4mX0psdoZXQqbr_nJvdxNErra3vIRHw,depth=10,body=Historical 52 (batch=0),prevs=['$8x0ZzHjqduS1SlqKk1F6x1G71bKCFPWbx0nYU90GS8g']", "event_id=$8x0ZzHjqduS1SlqKk1F6x1G71bKCFPWbx0nYU90GS8g,depth=10,body=Historical 51 (batch=0),prevs=['$hYNHRnZ2AktMkxjbiPP_yofAOMmRJLy4nN5K-81-yj8']", "event_id=$hYNHRnZ2AktMkxjbiPP_yofAOMmRJLy4nN5K-81-yj8,depth=10,body=Historical 50 (batch=0),prevs=['$Aje2Qa0CHNWiAw1HiK-1djoycAhh4UliroN1vAdKIU4']", "event_id=$Aje2Qa0CHNWiAw1HiK-1djoycAhh4UliroN1vAdKIU4,depth=10,body=Historical 49 (batch=0),prevs=['$hbkSA7qbBPpEQH77YE2h-XKVgS3qhI3xse3Yg08smuc']", "event_id=$hbkSA7qbBPpEQH77YE2h-XKVgS3qhI3xse3Yg08smuc,depth=10,body=Historical 48 (batch=0),prevs=['$N80_1es0l8LAcvslhrbTOddyQ76W3_VLPnXnG5IBEn4']", "event_id=$N80_1es0l8LAcvslhrbTOddyQ76W3_VLPnXnG5IBEn4,depth=10,body=Historical 47 (batch=0),prevs=['$rvz1DpQx9HZLbHgZj5Jq3TVoH1WComOP6AWbytDKClQ']", "event_id=$rvz1DpQx9HZLbHgZj5Jq3TVoH1WComOP6AWbytDKClQ,depth=10,body=Historical 46 (batch=0),prevs=['$a6ifNO0AizTsP4HnVPD3Zkuqvo-8PnarenXKDUJZvGE']", "event_id=$a6ifNO0AizTsP4HnVPD3Zkuqvo-8PnarenXKDUJZvGE,depth=10,body=Historical 45 (batch=0),prevs=['$1WL7whQmdncf8CJ5l38dn_J8PVztPTNNMNS82gltKhw']", "event_id=$1WL7whQmdncf8CJ5l38dn_J8PVztPTNNMNS82gltKhw,depth=10,body=Historical 44 (batch=0),prevs=['$yCLNJyVjv2ti71ORpJI1wuyiWWOCG857If5M24eK2c4']", "event_id=$yCLNJyVjv2ti71ORpJI1wuyiWWOCG857If5M24eK2c4,depth=10,body=Historical 43 (batch=0),prevs=['$LV_gqx1bl9NHYdifp_v5JA1QE79nwblPe9dY_MKWuFo']", "event_id=$LV_gqx1bl9NHYdifp_v5JA1QE79nwblPe9dY_MKWuFo,depth=10,body=Historical 42 (batch=0),prevs=['$TT41-KXnRahtw3bBlgFkaLEvnGtC7BPispoy9LVoSTE']", "event_id=$TT41-KXnRahtw3bBlgFkaLEvnGtC7BPispoy9LVoSTE,depth=10,body=Historical 41 (batch=0),prevs=['$X0wtHayfehEhB59jCSI2NSX5LSdgQfMYBy5D5IXerrg']", "event_id=$X0wtHayfehEhB59jCSI2NSX5LSdgQfMYBy5D5IXerrg,depth=10,body=Historical 40 (batch=0),prevs=['$ERk1O6FQFNndKxopemlDrzWV_SZ9zvPaK-7cDWc9hOo']", "event_id=$ERk1O6FQFNndKxopemlDrzWV_SZ9zvPaK-7cDWc9hOo,depth=10,body=Historical 39 (batch=0),prevs=['$8EltMIaFXj0HYOUsfGwDXs6--yrfI5NvQHbv8okNMbI']", "event_id=$8EltMIaFXj0HYOUsfGwDXs6--yrfI5NvQHbv8okNMbI,depth=10,body=Historical 38 (batch=0),prevs=['$8S8FjedOebUYwPFtb7vb-ZzMeYDp7vJRk-OUzau1RlY']", "event_id=$8S8FjedOebUYwPFtb7vb-ZzMeYDp7vJRk-OUzau1RlY,depth=10,body=Historical 37 (batch=0),prevs=['$s06MJYSVrGYW73z6hpTeCPDxnuFLkJXWvyym-4RqTXg']", "event_id=$s06MJYSVrGYW73z6hpTeCPDxnuFLkJXWvyym-4RqTXg,depth=10,body=Historical 36 (batch=0),prevs=['$fXsHFdVTchw-xmD41v9KC2ZxC5j6-VYRHGdRTw4AxRQ']", "event_id=$fXsHFdVTchw-xmD41v9KC2ZxC5j6-VYRHGdRTw4AxRQ,depth=10,body=Historical 35 (batch=0),prevs=['$ETgS3ZdUq57PzCK0ki-oV21XqFBs1nSgv6CHVl1nddw']", "event_id=$ETgS3ZdUq57PzCK0ki-oV21XqFBs1nSgv6CHVl1nddw,depth=10,body=Historical 34 (batch=0),prevs=['$GyrVy70TxEH3ARdy1tf-ZuxyWTqY8M4GeD-e0O4bXTs']", "event_id=$GyrVy70TxEH3ARdy1tf-ZuxyWTqY8M4GeD-e0O4bXTs,depth=10,body=Historical 33 (batch=0),prevs=['$etOVbOEtwqS-iLZfOWwAWeaf7Nq4h6B0aLTcsL3Fqb4']", "event_id=$etOVbOEtwqS-iLZfOWwAWeaf7Nq4h6B0aLTcsL3Fqb4,depth=10,body=Historical 32 (batch=0),prevs=['$jkugW9wPO5-ZTu3pulbPyYUs7MT08q24rSit47W2aiE']", "event_id=$jkugW9wPO5-ZTu3pulbPyYUs7MT08q24rSit47W2aiE,depth=10,body=Historical 31 (batch=0),prevs=['$xl5SAerMsKP7xYH7e9B0QyjNGVu1zYN037WcMl6eM7I']", "event_id=$xl5SAerMsKP7xYH7e9B0QyjNGVu1zYN037WcMl6eM7I,depth=10,body=Historical 30 (batch=0),prevs=['$epCIEu5hsIrjVfAQ8xwG_w7l2L_GVswFGlOgvKuBOv4']", "event_id=$epCIEu5hsIrjVfAQ8xwG_w7l2L_GVswFGlOgvKuBOv4,depth=10,body=Historical 29 (batch=0),prevs=['$W-r7mxy0Yaj7M33JPu3WwCGHZ4zy-r_UsuuSiSbaBNI']", "event_id=$W-r7mxy0Yaj7M33JPu3WwCGHZ4zy-r_UsuuSiSbaBNI,depth=10,body=Historical 28 (batch=0),prevs=['$j4NztVmkOMiN4_drb1Hm0EOEh5kDM1r7Vo1jKPlz5iY']", "event_id=$j4NztVmkOMiN4_drb1Hm0EOEh5kDM1r7Vo1jKPlz5iY,depth=10,body=Historical 27 (batch=0),prevs=['$QhtQWlhor0qS81fOCmF2UmYSZXrCY2YabQ3Ysk1pvQ8']", "event_id=$QhtQWlhor0qS81fOCmF2UmYSZXrCY2YabQ3Ysk1pvQ8,depth=10,body=Historical 26 (batch=0),prevs=['$ICyGlEHUQ2VNNkGL8KfN5v9rbuLZzYQ_LmH-XpKXi-w']", "event_id=$ICyGlEHUQ2VNNkGL8KfN5v9rbuLZzYQ_LmH-XpKXi-w,depth=10,body=Historical 25 (batch=0),prevs=['$4WvejXMuOtnrR_npONQHcEptKDb7oeAdzlguCFafXw8']", "event_id=$4WvejXMuOtnrR_npONQHcEptKDb7oeAdzlguCFafXw8,depth=10,body=Historical 24 (batch=0),prevs=['$1dQvQ9CXC8jvDir8Qv_qWD1P6669n5Dt2e9MQzWMmus']", "event_id=$1dQvQ9CXC8jvDir8Qv_qWD1P6669n5Dt2e9MQzWMmus,depth=10,body=Historical 23 (batch=0),prevs=['$VqYi4klnvQoQapuwcRhVOGpnd8b5JZbhpr0J8bIVq6s']", "event_id=$VqYi4klnvQoQapuwcRhVOGpnd8b5JZbhpr0J8bIVq6s,depth=10,body=Historical 22 (batch=0),prevs=['$caikDgtNzU_-uPQvliFV0LaXeD5KxfT42bbYT-tBuz4']", "event_id=$caikDgtNzU_-uPQvliFV0LaXeD5KxfT42bbYT-tBuz4,depth=10,body=Historical 21 (batch=0),prevs=['$tAmrVFwIE67VdJdTELUy9dCLX7UnwEQWJQ0szLMLj1I']", "event_id=$tAmrVFwIE67VdJdTELUy9dCLX7UnwEQWJQ0szLMLj1I,depth=10,body=Historical 20 (batch=0),prevs=['$moO383nMlO3xQcIcBJRa1ob23aKG_3BboL0VzaEuV9M']", "event_id=$moO383nMlO3xQcIcBJRa1ob23aKG_3BboL0VzaEuV9M,depth=10,body=Historical 19 (batch=0),prevs=['$1ZC20pJmFEnjdsp1WeF3Vb786YGxRUN7lVgBuGtbTh0']", "event_id=$1ZC20pJmFEnjdsp1WeF3Vb786YGxRUN7lVgBuGtbTh0,depth=10,body=Historical 18 (batch=0),prevs=['$CR5eEOqcjN4M-V4z4pmcGuj4DDZvIXsXJuCGKFCL2bo']", "event_id=$CR5eEOqcjN4M-V4z4pmcGuj4DDZvIXsXJuCGKFCL2bo,depth=10,body=Historical 17 (batch=0),prevs=['$5Nt4QouUaxCemiakDvgSau0Awaq2eLiuf_zAotPHrEk']", "event_id=$5Nt4QouUaxCemiakDvgSau0Awaq2eLiuf_zAotPHrEk,depth=10,body=Historical 16 (batch=0),prevs=['$bQGGb7wF69JaRvAgcy8YXA--vZxVwIFoQwBxz1JNcc0']", "event_id=$bQGGb7wF69JaRvAgcy8YXA--vZxVwIFoQwBxz1JNcc0,depth=10,body=Historical 15 (batch=0),prevs=['$5NTrpJdUTplsV5Pv0h81f10Od7r8b7MByhNP-Hgo5XI']", "event_id=$5NTrpJdUTplsV5Pv0h81f10Od7r8b7MByhNP-Hgo5XI,depth=10,body=Historical 14 (batch=0),prevs=['$RlE8jSspaoHviQ64oGYupPalY6iiEOfm6SVKxCFcj5Q']", "event_id=$RlE8jSspaoHviQ64oGYupPalY6iiEOfm6SVKxCFcj5Q,depth=10,body=Historical 13 (batch=0),prevs=['$fAjxrhPzB4YdRFSVUrY4dyUX2rs_Og61PlIxutf4cPw']", "event_id=$fAjxrhPzB4YdRFSVUrY4dyUX2rs_Og61PlIxutf4cPw,depth=10,body=Historical 12 (batch=0),prevs=['$AiIInms6FQzl0GHYZw38stEFC72Bl9uoW5r7sLBOdiw']", "event_id=$AiIInms6FQzl0GHYZw38stEFC72Bl9uoW5r7sLBOdiw,depth=10,body=Historical 11 (batch=0),prevs=['$l5uuVWnK1HYmBpL-X8XGLG-0FzXmrAVDAdFojpD_BnY']", "event_id=$l5uuVWnK1HYmBpL-X8XGLG-0FzXmrAVDAdFojpD_BnY,depth=10,body=Historical 10 (batch=0),prevs=['$bq2Cmy1rMLmJRWRek0azb5SE-CBPj-QJ1GB2F6X6pos']", "event_id=$bq2Cmy1rMLmJRWRek0azb5SE-CBPj-QJ1GB2F6X6pos,depth=10,body=Historical 9 (batch=0),prevs=['$fNL5PyiTg-FuQaq3rBdqAzVLev7MPo89u5nUGdP5CdY']", "event_id=$fNL5PyiTg-FuQaq3rBdqAzVLev7MPo89u5nUGdP5CdY,depth=10,body=Historical 8 (batch=0),prevs=['$FZvdy85_rIB-TgSq2irIGp8TPz3vt3DdEsTlmklxHfE']", "event_id=$FZvdy85_rIB-TgSq2irIGp8TPz3vt3DdEsTlmklxHfE,depth=10,body=Historical 7 (batch=0),prevs=['$gibUQn3wnIHPf2CzXr4rVaDd0ozucmpZLTXi69kT4v0']", "event_id=$gibUQn3wnIHPf2CzXr4rVaDd0ozucmpZLTXi69kT4v0,depth=10,body=Historical 6 (batch=0),prevs=['$EtLYRN3xJoYCG2f6l3ZNzeFDBVgT0sJSO-5_CWfk62E']", "event_id=$G00qI6L-Jem6u0rM73WAgtl3e27WFREG_IqXBK3rBVY,depth=9,body=Message 1 (eventIDsBefore),prevs=['$E9W0RVZ5jf054fUIQPlls4AXPlFcS5sF6iSwdsgpEZ0']", "event_id=$EtLYRN3xJoYCG2f6l3ZNzeFDBVgT0sJSO-5_CWfk62E,depth=10,body=Historical 5 (batch=0),prevs=['$6f7SVRgiDAaiqWanvIsjBfJieMdK8fP_t8qalpWDTKo']" ] ``` When only connecting batch -> insertion event: - batch -> insertion fake edge ``` backfill sorted_events=[ "event_id=$vDdCr0f90ey6Pjpw8Ugl2HQf09Rww5x_M8LanATK8VM,depth=10,body=org.matrix.msc2716.insertion,prevs=['$n9kq5qDGfNxKw7_Cb8kIC-9BrQ0Kg_sTWn9_nLwovA4']", "event_id=$h9x6BgpQVHF8Jr33ovBjTxDvXYuR6aNyxw2sKWKqu5U,depth=10,body=org.matrix.msc2716.batch,prevs=['$KU7hS3FmvoQaZkOjdWqyuBqBwFvcWdqJEbKvsXAxWgg']", "event_id=$KU7hS3FmvoQaZkOjdWqyuBqBwFvcWdqJEbKvsXAxWgg,depth=10,body=Historical 99 (batch=0),prevs=['$N0bxYWNt4qrh3B4ns78nzD_BRSFzMHt1KTynmuIjztQ']", "event_id=$N0bxYWNt4qrh3B4ns78nzD_BRSFzMHt1KTynmuIjztQ,depth=10,body=Historical 98 (batch=0),prevs=['$D9gUbM3vrg7KAVDrXowcP0jmzjX_VM2OHArxGZy6wc8']", "event_id=$D9gUbM3vrg7KAVDrXowcP0jmzjX_VM2OHArxGZy6wc8,depth=10,body=Historical 97 (batch=0),prevs=['$fUs553QBZQ-3MqELA3X6pmCL6F9HsEGc0eAyIQ5w-KQ']", "event_id=$fUs553QBZQ-3MqELA3X6pmCL6F9HsEGc0eAyIQ5w-KQ,depth=10,body=Historical 96 (batch=0),prevs=['$Txz5IQUjGCf1ewtLvxmPKS0MrGD2LYoh6JwoQcXPYYI']", "event_id=$Txz5IQUjGCf1ewtLvxmPKS0MrGD2LYoh6JwoQcXPYYI,depth=10,body=Historical 95 (batch=0),prevs=['$kC7jJEVaeGxRbaGAECrje-j_rUWodeKtXT5TByOR3_0']", "event_id=$kC7jJEVaeGxRbaGAECrje-j_rUWodeKtXT5TByOR3_0,depth=10,body=Historical 94 (batch=0),prevs=['$qQ_Qw4R3XJ65ftHFvt7WyUg1-TrJ5ubgw4ln93O3qSE']", "event_id=$qQ_Qw4R3XJ65ftHFvt7WyUg1-TrJ5ubgw4ln93O3qSE,depth=10,body=Historical 93 (batch=0),prevs=['$z6Sohu4b39SVOiprB26Ke9XNo1EWuzZysQ-jyc80oXs']", "event_id=$z6Sohu4b39SVOiprB26Ke9XNo1EWuzZysQ-jyc80oXs,depth=10,body=Historical 92 (batch=0),prevs=['$CP4JNyEaRikkHIC-1OpoXhw3y3s_0_s-rqnKu_63aFg']", "event_id=$CP4JNyEaRikkHIC-1OpoXhw3y3s_0_s-rqnKu_63aFg,depth=10,body=Historical 91 (batch=0),prevs=['$nCjxeXj_OF05JtxM8YUdUlx_moibUUy6QrosWt8R_Us']", "event_id=$nCjxeXj_OF05JtxM8YUdUlx_moibUUy6QrosWt8R_Us,depth=10,body=Historical 90 (batch=0),prevs=['$8sFfCVGCb97FXd1etQLR70vmJDjk6RNsgbo5ughLXl8']", "event_id=$8sFfCVGCb97FXd1etQLR70vmJDjk6RNsgbo5ughLXl8,depth=10,body=Historical 89 (batch=0),prevs=['$45AU3xVc6xF4qfrdJMh68kZIc-PF9hqkFmuOoxA_j08']", "event_id=$45AU3xVc6xF4qfrdJMh68kZIc-PF9hqkFmuOoxA_j08,depth=10,body=Historical 88 (batch=0),prevs=['$9JhvBwk32ygq52H9mKPryUxX3ildrK47p8bAECnMkcI']", "event_id=$9JhvBwk32ygq52H9mKPryUxX3ildrK47p8bAECnMkcI,depth=10,body=Historical 87 (batch=0),prevs=['$rXBGwIxCzRUCCQpNvMNiePqrKk6daAOb5nN2AuACDcg']", "event_id=$rXBGwIxCzRUCCQpNvMNiePqrKk6daAOb5nN2AuACDcg,depth=10,body=Historical 86 (batch=0),prevs=['$Iebf12p8JQRuVHfWIgVwyc8cqDkPwTvod2HJIAfaaXY']", "event_id=$Iebf12p8JQRuVHfWIgVwyc8cqDkPwTvod2HJIAfaaXY,depth=10,body=Historical 85 (batch=0),prevs=['$8CV6t9Yg5RrYzy1-t72ez47xVAgc45iM_nBcEr0r5u8']", "event_id=$8CV6t9Yg5RrYzy1-t72ez47xVAgc45iM_nBcEr0r5u8,depth=10,body=Historical 84 (batch=0),prevs=['$jF0yFS-VkCntnui9esPytzncMQ4vbpoI4dEtbnqm2zM']", "event_id=$jF0yFS-VkCntnui9esPytzncMQ4vbpoI4dEtbnqm2zM,depth=10,body=Historical 83 (batch=0),prevs=['$rEfXGwUAl6BfF0t_PtOoqnyiC02ACeOKB9zrWaB2k9w']", "event_id=$rEfXGwUAl6BfF0t_PtOoqnyiC02ACeOKB9zrWaB2k9w,depth=10,body=Historical 82 (batch=0),prevs=['$7sjkdcEQOMDA7bXvNadBsscUPsvgwHgF7lHq6qT5SCQ']", "event_id=$7sjkdcEQOMDA7bXvNadBsscUPsvgwHgF7lHq6qT5SCQ,depth=10,body=Historical 81 (batch=0),prevs=['$Ee4Zn_bD2_qgutvjVpmTLduivzstgTmaXoG4QYsk9kI']", "event_id=$Ee4Zn_bD2_qgutvjVpmTLduivzstgTmaXoG4QYsk9kI,depth=10,body=Historical 80 (batch=0),prevs=['$O1NfL_xO7qhf9XtZfwYPQJHRTgcP9UJZ_CU6UfnyTmI']", "event_id=$O1NfL_xO7qhf9XtZfwYPQJHRTgcP9UJZ_CU6UfnyTmI,depth=10,body=Historical 79 (batch=0),prevs=['$lSviYdCQKQye2QgoDaj7Ax1uKSp55bvUlS_Ax7vocPA']", "event_id=$lSviYdCQKQye2QgoDaj7Ax1uKSp55bvUlS_Ax7vocPA,depth=10,body=Historical 78 (batch=0),prevs=['$ES3Mz4RnN5yj2mv6HIloRnAQlryyAToscQP_74X0R3E']", "event_id=$ES3Mz4RnN5yj2mv6HIloRnAQlryyAToscQP_74X0R3E,depth=10,body=Historical 77 (batch=0),prevs=['$G6S-j3AT0tMMHoTXDRYJYGHmVSpy3I7qMPJp71PvqsE']", "event_id=$G6S-j3AT0tMMHoTXDRYJYGHmVSpy3I7qMPJp71PvqsE,depth=10,body=Historical 76 (batch=0),prevs=['$n2IZJ9fG-t9cVol8slIDErGnNdbPeacvAWCDlTGYpJk']", "event_id=$n2IZJ9fG-t9cVol8slIDErGnNdbPeacvAWCDlTGYpJk,depth=10,body=Historical 75 (batch=0),prevs=['$WxYHgWQRH2CKo5fXTiwH5zMrc7xuI3nsiBgDDSFFBbY']", "event_id=$WxYHgWQRH2CKo5fXTiwH5zMrc7xuI3nsiBgDDSFFBbY,depth=10,body=Historical 74 (batch=0),prevs=['$4SxomALNe4fQrzh990LW2MVe2hAh41hefUN96WklLwA']", "event_id=$4SxomALNe4fQrzh990LW2MVe2hAh41hefUN96WklLwA,depth=10,body=Historical 73 (batch=0),prevs=['$7jcugB1rv_Jp4wntQhdW_LrwAHjV4flw_uZ3A0rrKVY']", "event_id=$7jcugB1rv_Jp4wntQhdW_LrwAHjV4flw_uZ3A0rrKVY,depth=10,body=Historical 72 (batch=0),prevs=['$0JOxrvf7ZArMCYzB1qSz76-akAdDtf0B-ferG4uI3vo']", "event_id=$0JOxrvf7ZArMCYzB1qSz76-akAdDtf0B-ferG4uI3vo,depth=10,body=Historical 71 (batch=0),prevs=['$Vd0nksOUDQ92s-Kkn37Zhhj204cAUynybHuHrgtROTs']", "event_id=$Vd0nksOUDQ92s-Kkn37Zhhj204cAUynybHuHrgtROTs,depth=10,body=Historical 70 (batch=0),prevs=['$CCDHZGKctXGPB5bAoEbrNk7cXCUiDtIL7PJ1VQGwIS0']", "event_id=$CCDHZGKctXGPB5bAoEbrNk7cXCUiDtIL7PJ1VQGwIS0,depth=10,body=Historical 69 (batch=0),prevs=['$3w0P2g8VdVb1ar52A_9XXeON3vmWb6sr_Lctjcyp11A']", "event_id=$3w0P2g8VdVb1ar52A_9XXeON3vmWb6sr_Lctjcyp11A,depth=10,body=Historical 68 (batch=0),prevs=['$-fRkdzIZbqczQ85DLuNk9l9DlYgtroT3Sj2hY4PjK3Y']", "event_id=$-fRkdzIZbqczQ85DLuNk9l9DlYgtroT3Sj2hY4PjK3Y,depth=10,body=Historical 67 (batch=0),prevs=['$jX0KrPhWbDIHoxsFZCKm8VJLiRaz7zbFLRpcPmXrNLU']", "event_id=$jX0KrPhWbDIHoxsFZCKm8VJLiRaz7zbFLRpcPmXrNLU,depth=10,body=Historical 66 (batch=0),prevs=['$RZaiBaIb5pXeFu21n3o8agvfPOzYPHZW2pg-NYnK2Wo']", "event_id=$RZaiBaIb5pXeFu21n3o8agvfPOzYPHZW2pg-NYnK2Wo,depth=10,body=Historical 65 (batch=0),prevs=['$Ayy7g5q4SVDDI2n4AjMKEhF7JKYekYoezs42uVlqKQo']", "event_id=$Ayy7g5q4SVDDI2n4AjMKEhF7JKYekYoezs42uVlqKQo,depth=10,body=Historical 64 (batch=0),prevs=['$Qrtts5rHLUrB3SH__uKCz9M_IzT8dwLEi5cZbL5XSrI']", "event_id=$Qrtts5rHLUrB3SH__uKCz9M_IzT8dwLEi5cZbL5XSrI,depth=10,body=Historical 63 (batch=0),prevs=['$GzpO60z90GafGhNpczoL9-_9sA4X9KAIx-oOp3Ak5fs']", "event_id=$GzpO60z90GafGhNpczoL9-_9sA4X9KAIx-oOp3Ak5fs,depth=10,body=Historical 62 (batch=0),prevs=['$FoDpnYsHjsEPKpk1aBTznOupi2Nq0pDcwgPup3EuwHY']", "event_id=$FoDpnYsHjsEPKpk1aBTznOupi2Nq0pDcwgPup3EuwHY,depth=10,body=Historical 61 (batch=0),prevs=['$x22YRSOcrA__Jj1jlAjUQj7_aDJxmzgXfn03nDgNuAs']", "event_id=$x22YRSOcrA__Jj1jlAjUQj7_aDJxmzgXfn03nDgNuAs,depth=10,body=Historical 60 (batch=0),prevs=['$3abKTbz8gCJ7WVGCAX4gdKKjctfq2UBwAq_kZAiMb6o']", "event_id=$3abKTbz8gCJ7WVGCAX4gdKKjctfq2UBwAq_kZAiMb6o,depth=10,body=Historical 59 (batch=0),prevs=['$6JgNxTembrNVYP3Ko6VgpHJNILIqIvTXwPpIsds63BA']", "event_id=$6JgNxTembrNVYP3Ko6VgpHJNILIqIvTXwPpIsds63BA,depth=10,body=Historical 58 (batch=0),prevs=['$NUENCAs9SSA4UUHind2mtrX_-5_H-GTXJ4DjQ8o0hVo']", "event_id=$NUENCAs9SSA4UUHind2mtrX_-5_H-GTXJ4DjQ8o0hVo,depth=10,body=Historical 57 (batch=0),prevs=['$c12ynew3nRtCewk7IvWy1EurV2XHgRh1sRbj9x5OwJA']", "event_id=$c12ynew3nRtCewk7IvWy1EurV2XHgRh1sRbj9x5OwJA,depth=10,body=Historical 56 (batch=0),prevs=['$xcqepPdOhubqAJ2xzBBZg0kuoPnfW4DcTJcj0UiRnDU']", "event_id=$xcqepPdOhubqAJ2xzBBZg0kuoPnfW4DcTJcj0UiRnDU,depth=10,body=Historical 55 (batch=0),prevs=['$BPlbbeAqGAzwTsSA-B0qOOYkZyovhvESdtcANOsobdk']", "event_id=$BPlbbeAqGAzwTsSA-B0qOOYkZyovhvESdtcANOsobdk,depth=10,body=Historical 54 (batch=0),prevs=['$Z40RGtNNkb54tu_EjNyCc3TG3JAzDn2VNFKcogEpquo']", "event_id=$Z40RGtNNkb54tu_EjNyCc3TG3JAzDn2VNFKcogEpquo,depth=10,body=Historical 53 (batch=0),prevs=['$K-50fqiUNw5NkQeGMkg3fgaJCwA9bm5WtTM7AWmfbvw']", "event_id=$K-50fqiUNw5NkQeGMkg3fgaJCwA9bm5WtTM7AWmfbvw,depth=10,body=Historical 52 (batch=0),prevs=['$71c4Wa_ks9TcN4nwj0403YQGRRVj-6rhJkQlwoIiJf4']", "event_id=$71c4Wa_ks9TcN4nwj0403YQGRRVj-6rhJkQlwoIiJf4,depth=10,body=Historical 51 (batch=0),prevs=['$o16ueMl2wNS6UfeDDt1JJDP5GeHDAo3G-mZnc5kFVA8']", "event_id=$o16ueMl2wNS6UfeDDt1JJDP5GeHDAo3G-mZnc5kFVA8,depth=10,body=Historical 50 (batch=0),prevs=['$Tf7hucPlbAIsJSJC_SZTerIDhA601DVXdktkmuR5Kw8']", "event_id=$Tf7hucPlbAIsJSJC_SZTerIDhA601DVXdktkmuR5Kw8,depth=10,body=Historical 49 (batch=0),prevs=['$sCpJSEbWzgIZaVxLlGb_JkxhYFj2s16ZloJPiafivVs']", "event_id=$sCpJSEbWzgIZaVxLlGb_JkxhYFj2s16ZloJPiafivVs,depth=10,body=Historical 48 (batch=0),prevs=['$UrxtBrZ2WrXFjInPAO2dwVtoPwDQxDDQ49GoqT5v6mI']", "event_id=$UrxtBrZ2WrXFjInPAO2dwVtoPwDQxDDQ49GoqT5v6mI,depth=10,body=Historical 47 (batch=0),prevs=['$7PoOmq5lSm_3qEm2B5ggpuEqev14pFjMD-T_4BYqZYo']", "event_id=$7PoOmq5lSm_3qEm2B5ggpuEqev14pFjMD-T_4BYqZYo,depth=10,body=Historical 46 (batch=0),prevs=['$2DhrL57VVj1KJuWeIP1_UZHrD7VQq2AUksvMDiTPD-4']", "event_id=$2DhrL57VVj1KJuWeIP1_UZHrD7VQq2AUksvMDiTPD-4,depth=10,body=Historical 45 (batch=0),prevs=['$8JexHkPI8s_95LGsa-xS7IUA6pPAzl1Wu6HOkezX4hk']", "event_id=$8JexHkPI8s_95LGsa-xS7IUA6pPAzl1Wu6HOkezX4hk,depth=10,body=Historical 44 (batch=0),prevs=['$D1BSCRk1kps83i5-Tm34uZGwTCCxNlmPt-QrVzJ3gKQ']", "event_id=$D1BSCRk1kps83i5-Tm34uZGwTCCxNlmPt-QrVzJ3gKQ,depth=10,body=Historical 43 (batch=0),prevs=['$2sdel6OjRf9AArfA-Lp-6AxAXMEGvus5j9CupJmPfpk']", "event_id=$2sdel6OjRf9AArfA-Lp-6AxAXMEGvus5j9CupJmPfpk,depth=10,body=Historical 42 (batch=0),prevs=['$ts6IiQMG8YyMwDYVUdRplhzpJLQPuzZXE8TIjTm5Qtw']", "event_id=$ts6IiQMG8YyMwDYVUdRplhzpJLQPuzZXE8TIjTm5Qtw,depth=10,body=Historical 41 (batch=0),prevs=['$SxPDbdJ_VYhfcIkJe4sUCvZG71gnhDjgne-WM-gOncQ']", "event_id=$SxPDbdJ_VYhfcIkJe4sUCvZG71gnhDjgne-WM-gOncQ,depth=10,body=Historical 40 (batch=0),prevs=['$AOpiIZHC5mhpwV1RPzQd_29KHsY-niX0uuL3ofWoTyY']", "event_id=$AOpiIZHC5mhpwV1RPzQd_29KHsY-niX0uuL3ofWoTyY,depth=10,body=Historical 39 (batch=0),prevs=['$8vAjOM6qlvKS9JJAXQx4CCOqRSzMP6C9YlNi0ft1Y68']", "event_id=$8vAjOM6qlvKS9JJAXQx4CCOqRSzMP6C9YlNi0ft1Y68,depth=10,body=Historical 38 (batch=0),prevs=['$EUgYbcq2DMNLgzY7SaPtpNQAOKKYLKYIoGfNYQb8Y2A']", "event_id=$EUgYbcq2DMNLgzY7SaPtpNQAOKKYLKYIoGfNYQb8Y2A,depth=10,body=Historical 37 (batch=0),prevs=['$ibThSro6ruPZeNqjf_9UGXfo1kErnQUcRZTDAunDM-4']", "event_id=$ibThSro6ruPZeNqjf_9UGXfo1kErnQUcRZTDAunDM-4,depth=10,body=Historical 36 (batch=0),prevs=['$YcrZi3aSgm17cZUkGZf-MaR8EWzl-RdDf5qYctmnZ1k']", "event_id=$YcrZi3aSgm17cZUkGZf-MaR8EWzl-RdDf5qYctmnZ1k,depth=10,body=Historical 35 (batch=0),prevs=['$0BhcUb0o0wtIE4hwVqAs0AC591TPOx54idyv6TCTiEo']", "event_id=$0BhcUb0o0wtIE4hwVqAs0AC591TPOx54idyv6TCTiEo,depth=10,body=Historical 34 (batch=0),prevs=['$umgo6IT00-E22uIRr_e4AsclGtanloy1Aw5GzNpBNfc']", "event_id=$umgo6IT00-E22uIRr_e4AsclGtanloy1Aw5GzNpBNfc,depth=10,body=Historical 33 (batch=0),prevs=['$aka96B_0Yx9AljoQKr4QitStKb0RkcPucvFKusGdySc']", "event_id=$aka96B_0Yx9AljoQKr4QitStKb0RkcPucvFKusGdySc,depth=10,body=Historical 32 (batch=0),prevs=['$OJJa17fRud2uJLkJ0-mF8iQFsAvimrY_efopy6ctqTg']", "event_id=$OJJa17fRud2uJLkJ0-mF8iQFsAvimrY_efopy6ctqTg,depth=10,body=Historical 31 (batch=0),prevs=['$pLWU_gcJnZGRmruRUcPIqWcjBJ9zmh1lb2jv39RDipc']", "event_id=$pLWU_gcJnZGRmruRUcPIqWcjBJ9zmh1lb2jv39RDipc,depth=10,body=Historical 30 (batch=0),prevs=['$G-kuDTfZY1V23CzLgSXfgdctIvbxmnT0Nx-qUuUhGbw']", "event_id=$G-kuDTfZY1V23CzLgSXfgdctIvbxmnT0Nx-qUuUhGbw,depth=10,body=Historical 29 (batch=0),prevs=['$b5NQ52jYrGKU7-5lJojXF3xAyWcANt3P6CJ1_Mpw9hA']", "event_id=$b5NQ52jYrGKU7-5lJojXF3xAyWcANt3P6CJ1_Mpw9hA,depth=10,body=Historical 28 (batch=0),prevs=['$4oJCt-e8AZYL8WJuY_uV2COvrCzbTvC7RB8xAvxGvEA']", "event_id=$4oJCt-e8AZYL8WJuY_uV2COvrCzbTvC7RB8xAvxGvEA,depth=10,body=Historical 27 (batch=0),prevs=['$lP0kCgxZMRAQL6_zg0JLNvdn1lRf1dLWXaWMa4ZT5Iw']", "event_id=$lP0kCgxZMRAQL6_zg0JLNvdn1lRf1dLWXaWMa4ZT5Iw,depth=10,body=Historical 26 (batch=0),prevs=['$b46KcYyRVIQJ2Mf6lc7UC56ViSxV9h43SVeQ5gYQ0MU']", "event_id=$b46KcYyRVIQJ2Mf6lc7UC56ViSxV9h43SVeQ5gYQ0MU,depth=10,body=Historical 25 (batch=0),prevs=['$b35VNurzabrSP0KvVCXRB1i-FjbDS8Ly-UZcU5m9RC0']", "event_id=$b35VNurzabrSP0KvVCXRB1i-FjbDS8Ly-UZcU5m9RC0,depth=10,body=Historical 24 (batch=0),prevs=['$8b1w7cxe5bdMj9mKrZTD32szDIVDZutWo1iXUsEQBSM']", "event_id=$8b1w7cxe5bdMj9mKrZTD32szDIVDZutWo1iXUsEQBSM,depth=10,body=Historical 23 (batch=0),prevs=['$vqlAlL8FDxucbRlZISrTtvtP4G1tSHNzbZwWEzAMbRM']", "event_id=$vqlAlL8FDxucbRlZISrTtvtP4G1tSHNzbZwWEzAMbRM,depth=10,body=Historical 22 (batch=0),prevs=['$ACWiMQWRjseJTFI_4yFLcrr-U7qzeHeSJpOlWOm7H2c']", "event_id=$ACWiMQWRjseJTFI_4yFLcrr-U7qzeHeSJpOlWOm7H2c,depth=10,body=Historical 21 (batch=0),prevs=['$mkKg3O1vyzZXCJoN0xEDuZYcm-mra3neT8peKSaI-fQ']", "event_id=$mkKg3O1vyzZXCJoN0xEDuZYcm-mra3neT8peKSaI-fQ,depth=10,body=Historical 20 (batch=0),prevs=['$Aro7lP7i_onbj9g5-tgTxKR7Xwo2u6H9xMKVb0Q4OQo']", "event_id=$Aro7lP7i_onbj9g5-tgTxKR7Xwo2u6H9xMKVb0Q4OQo,depth=10,body=Historical 19 (batch=0),prevs=['$ZT0davtwy-6ltpLlA7xCRDoZTbBKrFEm0xywwBqmI74']", "event_id=$ZT0davtwy-6ltpLlA7xCRDoZTbBKrFEm0xywwBqmI74,depth=10,body=Historical 18 (batch=0),prevs=['$LYfu_QQSdd9Murplu_qT56yJXtXNF0PIfJ-4-zj_OK4']", "event_id=$LYfu_QQSdd9Murplu_qT56yJXtXNF0PIfJ-4-zj_OK4,depth=10,body=Historical 17 (batch=0),prevs=['$sCjd4Lz7kNCDqKnV4NpyEbxufPa5ygfhMIXD5tvZIVE']", "event_id=$sCjd4Lz7kNCDqKnV4NpyEbxufPa5ygfhMIXD5tvZIVE,depth=10,body=Historical 16 (batch=0),prevs=['$7b8cgWaAtDefi5UR74eFaIq-OCAozn1z-095TfJYYWc']", "event_id=$7b8cgWaAtDefi5UR74eFaIq-OCAozn1z-095TfJYYWc,depth=10,body=Historical 15 (batch=0),prevs=['$DJ4LuxKnd1pjfsvyqbx5lWFucWgHi-ouH1GoS_8MQXs']", "event_id=$DJ4LuxKnd1pjfsvyqbx5lWFucWgHi-ouH1GoS_8MQXs,depth=10,body=Historical 14 (batch=0),prevs=['$GeABTSXvlqx9Ul4j4vQHQ0xHiZqKjRvZbXQQjPpBQZA']", "event_id=$GeABTSXvlqx9Ul4j4vQHQ0xHiZqKjRvZbXQQjPpBQZA,depth=10,body=Historical 13 (batch=0),prevs=['$KoqVyBic8-LUcPwfk3cN-xgwPvcHmPL8kUrVCVXl6Q8']", "event_id=$KoqVyBic8-LUcPwfk3cN-xgwPvcHmPL8kUrVCVXl6Q8,depth=10,body=Historical 12 (batch=0),prevs=['$Rn3edaeQ2P7DiyOh5y2_dtPK_sg_97neVHHDXCaga0g']", "event_id=$Rn3edaeQ2P7DiyOh5y2_dtPK_sg_97neVHHDXCaga0g,depth=10,body=Historical 11 (batch=0),prevs=['$rRXAVfkswED2m4zGvzd3E3msFjbN1RjP08DblJb9kbY']", "event_id=$rRXAVfkswED2m4zGvzd3E3msFjbN1RjP08DblJb9kbY,depth=10,body=Historical 10 (batch=0),prevs=['$7NE5L9LaMSqUj2gwxB46vZrCIDKklNRJNEnYhhluC3I']", "event_id=$7NE5L9LaMSqUj2gwxB46vZrCIDKklNRJNEnYhhluC3I,depth=10,body=Historical 9 (batch=0),prevs=['$Legp3Fec3qa5Matct10VzlsNU-cCYSfOMpEC1wWRCSw']", "event_id=$Legp3Fec3qa5Matct10VzlsNU-cCYSfOMpEC1wWRCSw,depth=10,body=Historical 8 (batch=0),prevs=['$Ot4kjqVOhGVkEWIshCwP9AmsHX9Dw_OG_rAfzF2YYgk']", "event_id=$Ot4kjqVOhGVkEWIshCwP9AmsHX9Dw_OG_rAfzF2YYgk,depth=10,body=Historical 7 (batch=0),prevs=['$_NgCYX2d-zby256Pt84Aw4WOtYtH5kaG9Qq5nrSISFo']", "event_id=$_NgCYX2d-zby256Pt84Aw4WOtYtH5kaG9Qq5nrSISFo,depth=10,body=Historical 6 (batch=0),prevs=['$wm5gnTie-UwOxwYVM6YYI6qbOO9g_n-IZ900ouJf4yI']", "event_id=$wm5gnTie-UwOxwYVM6YYI6qbOO9g_n-IZ900ouJf4yI,depth=10,body=Historical 5 (batch=0),prevs=['$d8O1CwNO3PfEYgODGj_6eHdxs-8H0Nk3UaoBnrJ8QBs']", "event_id=$rFNqVLF37MMcg7Dy8DjxnpUIyo8q-GBEs35AHKJRe8c,depth=11,body=Message 1 (eventIDsAfter),prevs=['$-VN0Wx6nnEzZLwFeON19jL9hW07xo1rGS08HsmBtA2g']", "event_id=$-VN0Wx6nnEzZLwFeON19jL9hW07xo1rGS08HsmBtA2g,depth=10,body=Message 0 (eventIDsAfter),prevs=['$n9kq5qDGfNxKw7_Cb8kIC-9BrQ0Kg_sTWn9_nLwovA4']", "event_id=$n9kq5qDGfNxKw7_Cb8kIC-9BrQ0Kg_sTWn9_nLwovA4,depth=9,body=Message 1 (eventIDsBefore),prevs=['$JdciLNIztkJyL8kE0GAs_JNRKx00ZYkfkT0Hw9bZmTY']" ] ``` --- synapse/handlers/federation_event.py | 119 ++++++++++++++------------- 1 file changed, 64 insertions(+), 55 deletions(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 66d3da871900..ab2ed53bce2f 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -648,42 +648,9 @@ async def _get_missing_events_for_pdu( logger.info("Got %d prev_events", len(missing_events)) await self._process_pulled_events(origin, missing_events, backfilled=False) - async def _process_pulled_events( - self, origin: str, events: Iterable[EventBase], backfilled: bool - ) -> None: - """Process a batch of events we have pulled from a remote server - - Pulls in any events required to auth the events, persists the received events, - and notifies clients, if appropriate. - - Assumes the events have already had their signatures and hashes checked. - - Params: - origin: The server we received these events from - events: The received events. - backfilled: True if this is part of a historical batch of events (inhibits - notification to clients, and validation of device keys.) - """ - - logger.info( - "backfill events=%s", - [ - "event_id=%s,depth=%d,body=%s,prevs=%s\n" - % ( - event.event_id, - event.depth, - event.content.get("body", event.type), - event.prev_event_ids(), - ) - for event in events - ], - ) - - # We want to sort these by depth so we process them and - # tell clients about them in order. - # sorted_events = sorted(events, key=lambda x: x.depth) - - event_ids = [event.event_id for event in events] + async def generateEventIdGraphFromEvents( + self, events: Iterable[EventBase] + ) -> Dict[str, Iterable[str]]: event_map = {event.event_id: event for event in events} # Since the insertion event we try to reference later on might be in the @@ -697,6 +664,7 @@ async def _process_pulled_events( if event.type == EventTypes.MSC2716_INSERTION } + # Map a given event to it's successors (backwards prev_events) successor_event_id_map = {} for event in events: for prev_event_id in event.prev_event_ids(): @@ -727,11 +695,11 @@ async def _process_pulled_events( ) if insertion_event_id: - # Add the insertion event as a fake edge connection to the batch - # event so the historical batch topologically sorts below - # the "live" event we branched off of. - event_id_graph.setdefault(event.event_id, []).append( - insertion_event_id + # Connect the insertion event via a fake edge pointing to the + # batch event so the historical batch topologically sorts + # behind-in-time the insertion event. + event_id_graph.setdefault(insertion_event_id, []).append( + event.event_id ) # Maybe we can get lucky and save ourselves a lookup @@ -743,10 +711,11 @@ async def _process_pulled_events( ) if insertion_event: - # Also add some fake edges to connect the insertion - # event to it's prev_event successors so it sorts - # topologically behind-in-time the successor. Nestled - # perfectly between the prev_event and the successor. + # Connect the insertion events' `prev_event` successors + # via fake edges pointing to the insertion event itself + # so the insertion event sorts topologically + # behind-in-time the successor. Nestled perfectly + # between the prev_event and the successor. for insertion_prev_event_id in insertion_event.prev_event_ids(): successor_event_ids = successor_event_id_map[ insertion_prev_event_id @@ -757,21 +726,61 @@ async def _process_pulled_events( successor_event_ids, ) if successor_event_ids: + for successor_event_id in successor_event_ids: + # Don't add itself back as a successor + if successor_event_id != insertion_event_id: + # Fake edge to point the successor back + # at the insertion event + event_id_graph.setdefault( + successor_event_id, [] + ).append(insertion_event_id) + + # TODO: We also need to add fake edges to connect the oldest-in-time messages + # in the batch to the event we branched off of, see https://github.com/matrix-org/synapse/pull/11114#discussion_r739300985 - event_id_graph.setdefault( - insertion_event_id, [] - ).extend( - [ - successor_event_id - for successor_event_id in successor_event_ids - # Don't add itself back as a successor - if successor_event_id != insertion_event_id - ] - ) + return event_id_graph + + async def _process_pulled_events( + self, origin: str, events: Iterable[EventBase], backfilled: bool + ) -> None: + """Process a batch of events we have pulled from a remote server + + Pulls in any events required to auth the events, persists the received events, + and notifies clients, if appropriate. + + Assumes the events have already had their signatures and hashes checked. + + Params: + origin: The server we received these events from + events: The received events. + backfilled: True if this is part of a historical batch of events (inhibits + notification to clients, and validation of device keys.) + """ + + logger.info( + "backfill events=%s", + [ + "event_id=%s,depth=%d,body=%s,prevs=%s\n" + % ( + event.event_id, + event.depth, + event.content.get("body", event.type), + event.prev_event_ids(), + ) + for event in events + ], + ) + + # We want to sort these by depth so we process them and + # tell clients about them in order. + # sorted_events = sorted(events, key=lambda x: x.depth) # We want to sort topologically so we process them and tell clients # about them in order. sorted_events = [] + event_ids = [event.event_id for event in events] + event_map = {event.event_id: event for event in events} + event_id_graph = await self.generateEventIdGraphFromEvents(events) for event_id in sorted_topologically(event_ids, event_id_graph): sorted_events.append(event_map[event_id]) sorted_events = reversed(sorted_events) From c772b35a257afaeb989a0c9b2690e2a1b62520ee Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 29 Oct 2021 15:34:38 -0500 Subject: [PATCH 23/47] Implement backfill in handler so we can do fetching later Old and new implementation seem to perform the same :) ``` old implementation backfill events=[ "event_id=$BU66ZRqOvMEH-wRgx3hruEBKH1tcUUD0rWdGnUEAcuQ,depth=11,body=Message 1 (eventIDsAfter),prevs=['$r7UcvfwqKAnhqAesYovjSEWaC4aOcXWJMWQ0W7LhBok']", "event_id=$r7UcvfwqKAnhqAesYovjSEWaC4aOcXWJMWQ0W7LhBok,depth=10,body=Message 0 (eventIDsAfter),prevs=['$9JuhSj0h0AbHvky0T3tSRfq_lxg1k2LTHY-J-MW6FfA']", "event_id=$jX7MRv7WeXx79_r44I45EC8peZ0eKgbN12wolz0_ueA,depth=10,body=org.matrix.msc2716.insertion,prevs=['$9JuhSj0h0AbHvky0T3tSRfq_lxg1k2LTHY-J-MW6FfA']", "event_id=$HG6XjFmFFO5O2-JW-kaayz2qJTwhzrtICjHdV9j0RMU,depth=10,body=org.matrix.msc2716.batch,prevs=['$d-z0eDSnU4PhbnisRLC7OIvoQFXlfnwhElh5mA1gFFo']", "event_id=$d-z0eDSnU4PhbnisRLC7OIvoQFXlfnwhElh5mA1gFFo,depth=10,body=Historical 99 (batch=0),prevs=['$z8iHeSZupiT5NOYzhhyOEFwrPMFI3vN2iDFpijniw6Q']", "event_id=$z8iHeSZupiT5NOYzhhyOEFwrPMFI3vN2iDFpijniw6Q,depth=10,body=Historical 98 (batch=0),prevs=['$ddn3uR4QbOH8imNzXy-_xz9Kn63qAJhC9NjFOUvu-_w']", "event_id=$ddn3uR4QbOH8imNzXy-_xz9Kn63qAJhC9NjFOUvu-_w,depth=10,body=Historical 97 (batch=0),prevs=['$BRTaYbuvXTnBpC-Dhee2UUBY-LePaWLTMd60sa82c_I']", "event_id=$BRTaYbuvXTnBpC-Dhee2UUBY-LePaWLTMd60sa82c_I,depth=10,body=Historical 96 (batch=0),prevs=['$HC1catch1Ti3NWcGWWXJ7doMinWfU-SNkCYJC7v_RxA']", "event_id=$HC1catch1Ti3NWcGWWXJ7doMinWfU-SNkCYJC7v_RxA,depth=10,body=Historical 95 (batch=0),prevs=['$DL3D99BYS_Cn047L6XxUvHN86icB69Yi9jttmxnepVo']", "event_id=$DL3D99BYS_Cn047L6XxUvHN86icB69Yi9jttmxnepVo,depth=10,body=Historical 94 (batch=0),prevs=['$bqDvvvMj2a4x4FUh_Iy6aA9sejqunlPQxjgDhM9-2CY']", "event_id=$bqDvvvMj2a4x4FUh_Iy6aA9sejqunlPQxjgDhM9-2CY,depth=10,body=Historical 93 (batch=0),prevs=['$CUhTzLqLabAzjqNbh9_SKSX1Re7z581D7BF90a3jE-0']", "event_id=$CUhTzLqLabAzjqNbh9_SKSX1Re7z581D7BF90a3jE-0,depth=10,body=Historical 92 (batch=0),prevs=['$3l6A4siERlLreAmXGH1KFagDjb9G8i4_TrxLErZS4FY']", "event_id=$3l6A4siERlLreAmXGH1KFagDjb9G8i4_TrxLErZS4FY,depth=10,body=Historical 91 (batch=0),prevs=['$BSosTk20sS031k33sDi_kMkrGMVjSUnCzT1wvZo0dcg']", "event_id=$BSosTk20sS031k33sDi_kMkrGMVjSUnCzT1wvZo0dcg,depth=10,body=Historical 90 (batch=0),prevs=['$XoyyYG9ngSu_sJ_npwDgrwLVQt0E6kDAxW_f0MzaIKs']", "event_id=$XoyyYG9ngSu_sJ_npwDgrwLVQt0E6kDAxW_f0MzaIKs,depth=10,body=Historical 89 (batch=0),prevs=['$8j7g564Kma7kqrgkqru2Nw21Ez2xZoyqBXtgrwYtbIU']", "event_id=$8j7g564Kma7kqrgkqru2Nw21Ez2xZoyqBXtgrwYtbIU,depth=10,body=Historical 88 (batch=0),prevs=['$FlIO19qRDZggQ3ZIMd4OhF4kGNpiipB4BR7sqQnG5xU']", "event_id=$FlIO19qRDZggQ3ZIMd4OhF4kGNpiipB4BR7sqQnG5xU,depth=10,body=Historical 87 (batch=0),prevs=['$reiGLF3R8Q5wqm_KJKOTQGFQ6PHIExRrV1nmB7lw5WA']", "event_id=$reiGLF3R8Q5wqm_KJKOTQGFQ6PHIExRrV1nmB7lw5WA,depth=10,body=Historical 86 (batch=0),prevs=['$7JRXCFtNfXY_TNZTZSjr7dPnKAMz39E8K2DQfhrxk0s']", "event_id=$7JRXCFtNfXY_TNZTZSjr7dPnKAMz39E8K2DQfhrxk0s,depth=10,body=Historical 85 (batch=0),prevs=['$8Z6-6uE22VHh3KoN2wGa9SAPhf0_MXPz3y_O2G8A9Bo']", "event_id=$8Z6-6uE22VHh3KoN2wGa9SAPhf0_MXPz3y_O2G8A9Bo,depth=10,body=Historical 84 (batch=0),prevs=['$CETAr2-SIsfT_exUgUY9El4PL6WOFnr7_6Hv86BuZUQ']", "event_id=$CETAr2-SIsfT_exUgUY9El4PL6WOFnr7_6Hv86BuZUQ,depth=10,body=Historical 83 (batch=0),prevs=['$dMOfVXcWxuoNS9di9AHyocIzAAucNG2v2aXqZWK7oA0']", "event_id=$dMOfVXcWxuoNS9di9AHyocIzAAucNG2v2aXqZWK7oA0,depth=10,body=Historical 82 (batch=0),prevs=['$4UjX4-4EejV7e0umXczxoqx9eHMvu2fFDIPGlY4zdrQ']", "event_id=$4UjX4-4EejV7e0umXczxoqx9eHMvu2fFDIPGlY4zdrQ,depth=10,body=Historical 81 (batch=0),prevs=['$e5d87r41P1lFrfM8iEp3waCi21lKx-bFBTEMhUMDJhE']", "event_id=$e5d87r41P1lFrfM8iEp3waCi21lKx-bFBTEMhUMDJhE,depth=10,body=Historical 80 (batch=0),prevs=['$ez0-Q4agOvUQbWUELiTJEEJWwZYj-TnaQTnNc3Vil8M']", "event_id=$ez0-Q4agOvUQbWUELiTJEEJWwZYj-TnaQTnNc3Vil8M,depth=10,body=Historical 79 (batch=0),prevs=['$zaMM-ZtQMADqFNsjrQrl_u0FIjxVHSEbdICkI5kph14']", "event_id=$zaMM-ZtQMADqFNsjrQrl_u0FIjxVHSEbdICkI5kph14,depth=10,body=Historical 78 (batch=0),prevs=['$jhDwZ4C8aCS7xm0D7xwQYKYXxE9yrnV_jnXWs7yj97A']", "event_id=$jhDwZ4C8aCS7xm0D7xwQYKYXxE9yrnV_jnXWs7yj97A,depth=10,body=Historical 77 (batch=0),prevs=['$794zi7lECGaZqgi-mVDTuAcCVxQRcasLEWHNQk_T5Qo']", "event_id=$794zi7lECGaZqgi-mVDTuAcCVxQRcasLEWHNQk_T5Qo,depth=10,body=Historical 76 (batch=0),prevs=['$RYc7Epu2beVamNOISGEPPzYPnqNr8OPpy6srKUDCNaY']", "event_id=$RYc7Epu2beVamNOISGEPPzYPnqNr8OPpy6srKUDCNaY,depth=10,body=Historical 75 (batch=0),prevs=['$NxzF542rsM0WMKZonV9YxmaJ5PLpVWK1TUfcLFP4VT8']", "event_id=$NxzF542rsM0WMKZonV9YxmaJ5PLpVWK1TUfcLFP4VT8,depth=10,body=Historical 74 (batch=0),prevs=['$YbcfMO7DOlq6owFILKIGpLSrvpMiDmIp77TCmafFxUU']", "event_id=$YbcfMO7DOlq6owFILKIGpLSrvpMiDmIp77TCmafFxUU,depth=10,body=Historical 73 (batch=0),prevs=['$RmlFFGojIl8VSc115lgQwk0NeAIAQT7CuLv3ajIv2X0']", "event_id=$RmlFFGojIl8VSc115lgQwk0NeAIAQT7CuLv3ajIv2X0,depth=10,body=Historical 72 (batch=0),prevs=['$lGGzpQOPdmzBqXIFF5HZfyGQCmXJOnmkZCRFRzxqJkw']", "event_id=$lGGzpQOPdmzBqXIFF5HZfyGQCmXJOnmkZCRFRzxqJkw,depth=10,body=Historical 71 (batch=0),prevs=['$TcSzi68MSFf1W0843Zi0iRq5vXFs8FhdtI9NfNCY5DM']", "event_id=$TcSzi68MSFf1W0843Zi0iRq5vXFs8FhdtI9NfNCY5DM,depth=10,body=Historical 70 (batch=0),prevs=['$iKaFgZ_aeN-ZJUNJEYj4WjY31RPm8PXs27n-u2LnlCE']", "event_id=$iKaFgZ_aeN-ZJUNJEYj4WjY31RPm8PXs27n-u2LnlCE,depth=10,body=Historical 69 (batch=0),prevs=['$n52R-hna5danKau87AHkwXDV5CuUTIZ960gtHr9X8hw']", "event_id=$n52R-hna5danKau87AHkwXDV5CuUTIZ960gtHr9X8hw,depth=10,body=Historical 68 (batch=0),prevs=['$Fr3wnWXII8LpE3VyF1pLWuLpSCyxu7jC1i4ggMqH1kE']", "event_id=$Fr3wnWXII8LpE3VyF1pLWuLpSCyxu7jC1i4ggMqH1kE,depth=10,body=Historical 67 (batch=0),prevs=['$ebCIN2TL4fFQeTEahzeX-VCkh_YEohAItvRd6LAVP2E']", "event_id=$ebCIN2TL4fFQeTEahzeX-VCkh_YEohAItvRd6LAVP2E,depth=10,body=Historical 66 (batch=0),prevs=['$CFeaKxHxgPCSUufJChertFT4au4P9Y1a_tTdN2wmo3Q']", "event_id=$CFeaKxHxgPCSUufJChertFT4au4P9Y1a_tTdN2wmo3Q,depth=10,body=Historical 65 (batch=0),prevs=['$MctpfuMG5AhhCb9aPRG-4V6VKxxNUzALNLq3BVJKejU']", "event_id=$MctpfuMG5AhhCb9aPRG-4V6VKxxNUzALNLq3BVJKejU,depth=10,body=Historical 64 (batch=0),prevs=['$ikjtTCZ7KXg1qtwgURuBZ-P5C1RG1oY8U3k2rm9RGxA']", "event_id=$ikjtTCZ7KXg1qtwgURuBZ-P5C1RG1oY8U3k2rm9RGxA,depth=10,body=Historical 63 (batch=0),prevs=['$KDbI-rBuoAI7rRhtAjcoDY2PbLQ9gDsK5fRts3OihSA']", "event_id=$KDbI-rBuoAI7rRhtAjcoDY2PbLQ9gDsK5fRts3OihSA,depth=10,body=Historical 62 (batch=0),prevs=['$_ZwqRJxNwgcZn4Trb5nrTjRP8ssQuLDq5ThguCm9KzE']", "event_id=$_ZwqRJxNwgcZn4Trb5nrTjRP8ssQuLDq5ThguCm9KzE,depth=10,body=Historical 61 (batch=0),prevs=['$hq1xDHEWodibkwlNg73931wjYlD5M29dkFHU0dQB2dk']", "event_id=$hq1xDHEWodibkwlNg73931wjYlD5M29dkFHU0dQB2dk,depth=10,body=Historical 60 (batch=0),prevs=['$XPRt_CuI_AXJylRQYbQ0ItkRkMy4ey0XxqS7iIdGLfQ']", "event_id=$XPRt_CuI_AXJylRQYbQ0ItkRkMy4ey0XxqS7iIdGLfQ,depth=10,body=Historical 59 (batch=0),prevs=['$Ucd44nYFkYVe_zs5wEqU5slG74IIPdzq_mqEHinIoIo']", "event_id=$Ucd44nYFkYVe_zs5wEqU5slG74IIPdzq_mqEHinIoIo,depth=10,body=Historical 58 (batch=0),prevs=['$CtM1TqWoFt3KabPNRKYi1vrpOPGwdpjx-15TpfKgYYA']", "event_id=$CtM1TqWoFt3KabPNRKYi1vrpOPGwdpjx-15TpfKgYYA,depth=10,body=Historical 57 (batch=0),prevs=['$Jo8T-m9nSs77uF4uFtkd22CifwnvDlr13Fs14NHcCMw']", "event_id=$Jo8T-m9nSs77uF4uFtkd22CifwnvDlr13Fs14NHcCMw,depth=10,body=Historical 56 (batch=0),prevs=['$WW45KevKriYRET0tOmKk8Ha9fFqxJJS_IC8Ih6I3L2A']", "event_id=$WW45KevKriYRET0tOmKk8Ha9fFqxJJS_IC8Ih6I3L2A,depth=10,body=Historical 55 (batch=0),prevs=['$G60oGyHZvp-Gim48qnC_epu2JRseKQUCTI5vqZEIbaI']", "event_id=$G60oGyHZvp-Gim48qnC_epu2JRseKQUCTI5vqZEIbaI,depth=10,body=Historical 54 (batch=0),prevs=['$R9Z6YaR1K9ulUzF9d6-ZL0QwJmwaEj1wTUPHrTJqFUs']", "event_id=$R9Z6YaR1K9ulUzF9d6-ZL0QwJmwaEj1wTUPHrTJqFUs,depth=10,body=Historical 53 (batch=0),prevs=['$CqedFlSaUBYWSHtkJJYmoc-DnxWCAaKVKOai0u5rawM']", "event_id=$CqedFlSaUBYWSHtkJJYmoc-DnxWCAaKVKOai0u5rawM,depth=10,body=Historical 52 (batch=0),prevs=['$3Ol33IgZ5lBuVwgELhRBYoFLb6GwkFTha3j6uxvHToY']", "event_id=$3Ol33IgZ5lBuVwgELhRBYoFLb6GwkFTha3j6uxvHToY,depth=10,body=Historical 51 (batch=0),prevs=['$wUcLTd2pGhliQunqWxFGvT6yiEZrUXA920-hcjaloSw']", "event_id=$wUcLTd2pGhliQunqWxFGvT6yiEZrUXA920-hcjaloSw,depth=10,body=Historical 50 (batch=0),prevs=['$gaO95ghwbwpGttuqBbhMrZJzz9Z440U7L2WOtDy7Hr8']", "event_id=$gaO95ghwbwpGttuqBbhMrZJzz9Z440U7L2WOtDy7Hr8,depth=10,body=Historical 49 (batch=0),prevs=['$4TSr_VvZzCV17JYxAob3DN2nQiYDqgW2S_QkW27v7Bg']", "event_id=$4TSr_VvZzCV17JYxAob3DN2nQiYDqgW2S_QkW27v7Bg,depth=10,body=Historical 48 (batch=0),prevs=['$bJaLResm_4aAwSjZ0POTvtVx4lnV9HKt8HwiYgP21lM']", "event_id=$bJaLResm_4aAwSjZ0POTvtVx4lnV9HKt8HwiYgP21lM,depth=10,body=Historical 47 (batch=0),prevs=['$BouN8LDmbFWiWo9Wc4wQi48YsC-icXewa05KR7byzhM']", "event_id=$BouN8LDmbFWiWo9Wc4wQi48YsC-icXewa05KR7byzhM,depth=10,body=Historical 46 (batch=0),prevs=['$n9Cv6EUu7i25OZLCllOE2PU4xjRthDKhkdtp5OtMuVk']", "event_id=$n9Cv6EUu7i25OZLCllOE2PU4xjRthDKhkdtp5OtMuVk,depth=10,body=Historical 45 (batch=0),prevs=['$MLVpmxVSSRRpxlcTkwIJT3neo7maqOp_ZLvwXRBY8A8']", "event_id=$MLVpmxVSSRRpxlcTkwIJT3neo7maqOp_ZLvwXRBY8A8,depth=10,body=Historical 44 (batch=0),prevs=['$tNv8Se5-lgfW-9MqE_upeBzq3QSPpsn7qOHz49WFs_c']", "event_id=$tNv8Se5-lgfW-9MqE_upeBzq3QSPpsn7qOHz49WFs_c,depth=10,body=Historical 43 (batch=0),prevs=['$XCDKybStk9PsquzrY-biUlKOmjk3wTXnsD1htiMdgAM']", "event_id=$XCDKybStk9PsquzrY-biUlKOmjk3wTXnsD1htiMdgAM,depth=10,body=Historical 42 (batch=0),prevs=['$iUDAE7nT6DQaNJSemwUpqQs8KvKTPFexcBJLTs2-gnY']", "event_id=$iUDAE7nT6DQaNJSemwUpqQs8KvKTPFexcBJLTs2-gnY,depth=10,body=Historical 41 (batch=0),prevs=['$T8NTrB9vQMLW-a0tT3corPl0g6O3UjGIoaHBjkjCD_w']", "event_id=$T8NTrB9vQMLW-a0tT3corPl0g6O3UjGIoaHBjkjCD_w,depth=10,body=Historical 40 (batch=0),prevs=['$N7jCLhKYDejMTejtrjJfM2lMRES5OPeowtvN-L4utsE']", "event_id=$N7jCLhKYDejMTejtrjJfM2lMRES5OPeowtvN-L4utsE,depth=10,body=Historical 39 (batch=0),prevs=['$Oy9aTfjL0LSnbF_6MRq7-5baaEbfxyRTK1kbVGeNoaU']", "event_id=$Oy9aTfjL0LSnbF_6MRq7-5baaEbfxyRTK1kbVGeNoaU,depth=10,body=Historical 38 (batch=0),prevs=['$V4qFDnqO1LrboSWwCoQpQ4dXLyrc74qZbbsaHxqAj4Q']", "event_id=$V4qFDnqO1LrboSWwCoQpQ4dXLyrc74qZbbsaHxqAj4Q,depth=10,body=Historical 37 (batch=0),prevs=['$vWdLNDGwj_mBJ6JZ2s01OhsxRuPNSSJAM_bMa6J9Q2c']", "event_id=$vWdLNDGwj_mBJ6JZ2s01OhsxRuPNSSJAM_bMa6J9Q2c,depth=10,body=Historical 36 (batch=0),prevs=['$JZt2EdPtKJX6f2MMbj_gSEseVAMa-H5r_J_yvfhHs5k']", "event_id=$JZt2EdPtKJX6f2MMbj_gSEseVAMa-H5r_J_yvfhHs5k,depth=10,body=Historical 35 (batch=0),prevs=['$NPzUfUgNfvkezhUeLmcp2utVA_XnW8YzDqbZlznsEN0']", "event_id=$NPzUfUgNfvkezhUeLmcp2utVA_XnW8YzDqbZlznsEN0,depth=10,body=Historical 34 (batch=0),prevs=['$NNUjzVtvOSFDYRID_PsMJrUC8gx5p1mPvIoMB7ibDn0']", "event_id=$NNUjzVtvOSFDYRID_PsMJrUC8gx5p1mPvIoMB7ibDn0,depth=10,body=Historical 33 (batch=0),prevs=['$dONbCTAYR0kOMVSmxOnM5aQ4bgx9PbKza98_e2Lxxw4']", "event_id=$dONbCTAYR0kOMVSmxOnM5aQ4bgx9PbKza98_e2Lxxw4,depth=10,body=Historical 32 (batch=0),prevs=['$dwxZHePgzbtQMO3caex0O9Zdl-gT3fpWRZD5dJ43JmE']", "event_id=$dwxZHePgzbtQMO3caex0O9Zdl-gT3fpWRZD5dJ43JmE,depth=10,body=Historical 31 (batch=0),prevs=['$5JOpa9222Y0__S8yfaZBT7ESuRaPeeRz80jleMoa5hE']", "event_id=$5JOpa9222Y0__S8yfaZBT7ESuRaPeeRz80jleMoa5hE,depth=10,body=Historical 30 (batch=0),prevs=['$7yIEFZZFs91wjKScsjbG854DUOqomPnV4iGG-EDWK6w']", "event_id=$7yIEFZZFs91wjKScsjbG854DUOqomPnV4iGG-EDWK6w,depth=10,body=Historical 29 (batch=0),prevs=['$9pzIrDRB1cuweDiHaPGB6AtTtGeGEOmqyoP59UAa0pI']", "event_id=$9pzIrDRB1cuweDiHaPGB6AtTtGeGEOmqyoP59UAa0pI,depth=10,body=Historical 28 (batch=0),prevs=['$EBu0Y6-OdqNUumOlHK__K2eXsN8mAYV21eCRKwGmkxU']", "event_id=$EBu0Y6-OdqNUumOlHK__K2eXsN8mAYV21eCRKwGmkxU,depth=10,body=Historical 27 (batch=0),prevs=['$95FzYeNZk31wyfaCojognGOAkzLhHLrv4_j3eoJawwU']", "event_id=$95FzYeNZk31wyfaCojognGOAkzLhHLrv4_j3eoJawwU,depth=10,body=Historical 26 (batch=0),prevs=['$QDGDnSNRXjNZG2lia2gjeD1aEfgfZ0gxI1BzIgjyZq8']", "event_id=$QDGDnSNRXjNZG2lia2gjeD1aEfgfZ0gxI1BzIgjyZq8,depth=10,body=Historical 25 (batch=0),prevs=['$tf1GB8ydPyNTNAaGwcTUXaTOSCmNohK6e6MmOnFwrJA']", "event_id=$tf1GB8ydPyNTNAaGwcTUXaTOSCmNohK6e6MmOnFwrJA,depth=10,body=Historical 24 (batch=0),prevs=['$n40ks3dofH6DioQlfe84n3exnYrboy8GHHYJ_qBFJMg']", "event_id=$n40ks3dofH6DioQlfe84n3exnYrboy8GHHYJ_qBFJMg,depth=10,body=Historical 23 (batch=0),prevs=['$xl8xG3MJiiNP21WNFmDLxOR8Rhy_FLws0snsmDUoAYQ']", "event_id=$xl8xG3MJiiNP21WNFmDLxOR8Rhy_FLws0snsmDUoAYQ,depth=10,body=Historical 22 (batch=0),prevs=['$6MxFONk8t5shoQpjjZyxd0ypmfj76LPs9hWKkPAxMcI']", "event_id=$6MxFONk8t5shoQpjjZyxd0ypmfj76LPs9hWKkPAxMcI,depth=10,body=Historical 21 (batch=0),prevs=['$XAAn4gMQe4ShIZKvZntEUBRljDM4IxHDVeNOXp3K2Bk']", "event_id=$XAAn4gMQe4ShIZKvZntEUBRljDM4IxHDVeNOXp3K2Bk,depth=10,body=Historical 20 (batch=0),prevs=['$Gj3RlKLUMb6hI_JyXdfwuoneWDs1b63qF7WdYYiNDG0']", "event_id=$Gj3RlKLUMb6hI_JyXdfwuoneWDs1b63qF7WdYYiNDG0,depth=10,body=Historical 19 (batch=0),prevs=['$yiRe3l3_APWuZt1wfFBxXQp2orQJZJvwv3yk4QNiPxo']", "event_id=$yiRe3l3_APWuZt1wfFBxXQp2orQJZJvwv3yk4QNiPxo,depth=10,body=Historical 18 (batch=0),prevs=['$v04Ud2zPqFe5M3jPbiLrqQ6r_QXgFlmRTMMLyVvAsN8']", "event_id=$v04Ud2zPqFe5M3jPbiLrqQ6r_QXgFlmRTMMLyVvAsN8,depth=10,body=Historical 17 (batch=0),prevs=['$tO5NiLH2mPBIMRVmrTIza6CUv7v6R0e_tgj0ZKEtpG0']", "event_id=$tO5NiLH2mPBIMRVmrTIza6CUv7v6R0e_tgj0ZKEtpG0,depth=10,body=Historical 16 (batch=0),prevs=['$fyQ-akREpz3vpQNqTZ47yxAs_APAw2L_bQ1iAudsPWk']", "event_id=$fyQ-akREpz3vpQNqTZ47yxAs_APAw2L_bQ1iAudsPWk,depth=10,body=Historical 15 (batch=0),prevs=['$qddW2HRZA0BudaIWuun6Odt6wDbz7ETRJmSY8wtjMnY']", "event_id=$qddW2HRZA0BudaIWuun6Odt6wDbz7ETRJmSY8wtjMnY,depth=10,body=Historical 14 (batch=0),prevs=['$fxl98r0B-sa2T8LDR2q6Wi01Zw6xCPZB5FcMf5x_vxI']", "event_id=$fxl98r0B-sa2T8LDR2q6Wi01Zw6xCPZB5FcMf5x_vxI,depth=10,body=Historical 13 (batch=0),prevs=['$1PML_QTo1MK_EuUz3OYyc2vDjfGKKYt-0sUSN9jTeHs']", "event_id=$1PML_QTo1MK_EuUz3OYyc2vDjfGKKYt-0sUSN9jTeHs,depth=10,body=Historical 12 (batch=0),prevs=['$YTz-q29BhUU_Ox7_CR7gao1v7mmMqLwbz51WWI0CjHs']", "event_id=$YTz-q29BhUU_Ox7_CR7gao1v7mmMqLwbz51WWI0CjHs,depth=10,body=Historical 11 (batch=0),prevs=['$Y3HUk4SiCWUrV5V_ZUcwC1IkVZEPM3V8oGWrWKRcv3I']", "event_id=$Y3HUk4SiCWUrV5V_ZUcwC1IkVZEPM3V8oGWrWKRcv3I,depth=10,body=Historical 10 (batch=0),prevs=['$uV5daep6tIdbImy2XEIxdQIuGYc547ulzxMplDyBWfY']", "event_id=$uV5daep6tIdbImy2XEIxdQIuGYc547ulzxMplDyBWfY,depth=10,body=Historical 9 (batch=0),prevs=['$bVKZ6uRROzC7aNcZc9cUsfhHl5dU_-Vm6Qz5V9X8GsI']", "event_id=$bVKZ6uRROzC7aNcZc9cUsfhHl5dU_-Vm6Qz5V9X8GsI,depth=10,body=Historical 8 (batch=0),prevs=['$6zjql7LGgIa3gFpf9owZsKWeFU4FoMUKDtVPpMo7xsM']", "event_id=$6zjql7LGgIa3gFpf9owZsKWeFU4FoMUKDtVPpMo7xsM,depth=10,body=Historical 7 (batch=0),prevs=['$w5t7UuOHrrsfZRebkX4adeEeJpPPGv9y6RyhKpxToMc']", "event_id=$w5t7UuOHrrsfZRebkX4adeEeJpPPGv9y6RyhKpxToMc,depth=10,body=Historical 6 (batch=0),prevs=['$649goNxH4B0DMta75_xs_TTib6C268A2JiUHvpUMu8g']", "event_id=$649goNxH4B0DMta75_xs_TTib6C268A2JiUHvpUMu8g,depth=10,body=Historical 5 (batch=0),prevs=['$mtUwpP0KSNwsn73yxBfQ7imKUe955v_SvI427PC5y2I']", "event_id=$9JuhSj0h0AbHvky0T3tSRfq_lxg1k2LTHY-J-MW6FfA,depth=9,body=Message 1 (eventIDsBefore),prevs=['$FcPmU5Fjg57eO_2J4VOK9jOcqRclEED5B9i1FsHNjaE']\" ] ``` ``` new implementation backfill events=[ "event_id=$BU66ZRqOvMEH-wRgx3hruEBKH1tcUUD0rWdGnUEAcuQ,depth=11,body=Message 1 (eventIDsAfter),prevs=['$r7UcvfwqKAnhqAesYovjSEWaC4aOcXWJMWQ0W7LhBok']", "event_id=$r7UcvfwqKAnhqAesYovjSEWaC4aOcXWJMWQ0W7LhBok,depth=10,body=Message 0 (eventIDsAfter),prevs=['$9JuhSj0h0AbHvky0T3tSRfq_lxg1k2LTHY-J-MW6FfA']", "event_id=$jX7MRv7WeXx79_r44I45EC8peZ0eKgbN12wolz0_ueA,depth=10,body=org.matrix.msc2716.insertion,prevs=['$9JuhSj0h0AbHvky0T3tSRfq_lxg1k2LTHY-J-MW6FfA']", "event_id=$HG6XjFmFFO5O2-JW-kaayz2qJTwhzrtICjHdV9j0RMU,depth=10,body=org.matrix.msc2716.batch,prevs=['$d-z0eDSnU4PhbnisRLC7OIvoQFXlfnwhElh5mA1gFFo']", "event_id=$d-z0eDSnU4PhbnisRLC7OIvoQFXlfnwhElh5mA1gFFo,depth=10,body=Historical 99 (batch=0),prevs=['$z8iHeSZupiT5NOYzhhyOEFwrPMFI3vN2iDFpijniw6Q']", "event_id=$z8iHeSZupiT5NOYzhhyOEFwrPMFI3vN2iDFpijniw6Q,depth=10,body=Historical 98 (batch=0),prevs=['$ddn3uR4QbOH8imNzXy-_xz9Kn63qAJhC9NjFOUvu-_w']", "event_id=$ddn3uR4QbOH8imNzXy-_xz9Kn63qAJhC9NjFOUvu-_w,depth=10,body=Historical 97 (batch=0),prevs=['$BRTaYbuvXTnBpC-Dhee2UUBY-LePaWLTMd60sa82c_I']", "event_id=$BRTaYbuvXTnBpC-Dhee2UUBY-LePaWLTMd60sa82c_I,depth=10,body=Historical 96 (batch=0),prevs=['$HC1catch1Ti3NWcGWWXJ7doMinWfU-SNkCYJC7v_RxA']", "event_id=$HC1catch1Ti3NWcGWWXJ7doMinWfU-SNkCYJC7v_RxA,depth=10,body=Historical 95 (batch=0),prevs=['$DL3D99BYS_Cn047L6XxUvHN86icB69Yi9jttmxnepVo']", "event_id=$DL3D99BYS_Cn047L6XxUvHN86icB69Yi9jttmxnepVo,depth=10,body=Historical 94 (batch=0),prevs=['$bqDvvvMj2a4x4FUh_Iy6aA9sejqunlPQxjgDhM9-2CY']", "event_id=$bqDvvvMj2a4x4FUh_Iy6aA9sejqunlPQxjgDhM9-2CY,depth=10,body=Historical 93 (batch=0),prevs=['$CUhTzLqLabAzjqNbh9_SKSX1Re7z581D7BF90a3jE-0']", "event_id=$CUhTzLqLabAzjqNbh9_SKSX1Re7z581D7BF90a3jE-0,depth=10,body=Historical 92 (batch=0),prevs=['$3l6A4siERlLreAmXGH1KFagDjb9G8i4_TrxLErZS4FY']", "event_id=$3l6A4siERlLreAmXGH1KFagDjb9G8i4_TrxLErZS4FY,depth=10,body=Historical 91 (batch=0),prevs=['$BSosTk20sS031k33sDi_kMkrGMVjSUnCzT1wvZo0dcg']", "event_id=$BSosTk20sS031k33sDi_kMkrGMVjSUnCzT1wvZo0dcg,depth=10,body=Historical 90 (batch=0),prevs=['$XoyyYG9ngSu_sJ_npwDgrwLVQt0E6kDAxW_f0MzaIKs']", "event_id=$XoyyYG9ngSu_sJ_npwDgrwLVQt0E6kDAxW_f0MzaIKs,depth=10,body=Historical 89 (batch=0),prevs=['$8j7g564Kma7kqrgkqru2Nw21Ez2xZoyqBXtgrwYtbIU']", "event_id=$8j7g564Kma7kqrgkqru2Nw21Ez2xZoyqBXtgrwYtbIU,depth=10,body=Historical 88 (batch=0),prevs=['$FlIO19qRDZggQ3ZIMd4OhF4kGNpiipB4BR7sqQnG5xU']", "event_id=$FlIO19qRDZggQ3ZIMd4OhF4kGNpiipB4BR7sqQnG5xU,depth=10,body=Historical 87 (batch=0),prevs=['$reiGLF3R8Q5wqm_KJKOTQGFQ6PHIExRrV1nmB7lw5WA']", "event_id=$reiGLF3R8Q5wqm_KJKOTQGFQ6PHIExRrV1nmB7lw5WA,depth=10,body=Historical 86 (batch=0),prevs=['$7JRXCFtNfXY_TNZTZSjr7dPnKAMz39E8K2DQfhrxk0s']", "event_id=$7JRXCFtNfXY_TNZTZSjr7dPnKAMz39E8K2DQfhrxk0s,depth=10,body=Historical 85 (batch=0),prevs=['$8Z6-6uE22VHh3KoN2wGa9SAPhf0_MXPz3y_O2G8A9Bo']", "event_id=$8Z6-6uE22VHh3KoN2wGa9SAPhf0_MXPz3y_O2G8A9Bo,depth=10,body=Historical 84 (batch=0),prevs=['$CETAr2-SIsfT_exUgUY9El4PL6WOFnr7_6Hv86BuZUQ']", "event_id=$CETAr2-SIsfT_exUgUY9El4PL6WOFnr7_6Hv86BuZUQ,depth=10,body=Historical 83 (batch=0),prevs=['$dMOfVXcWxuoNS9di9AHyocIzAAucNG2v2aXqZWK7oA0']", "event_id=$dMOfVXcWxuoNS9di9AHyocIzAAucNG2v2aXqZWK7oA0,depth=10,body=Historical 82 (batch=0),prevs=['$4UjX4-4EejV7e0umXczxoqx9eHMvu2fFDIPGlY4zdrQ']", "event_id=$4UjX4-4EejV7e0umXczxoqx9eHMvu2fFDIPGlY4zdrQ,depth=10,body=Historical 81 (batch=0),prevs=['$e5d87r41P1lFrfM8iEp3waCi21lKx-bFBTEMhUMDJhE']", "event_id=$e5d87r41P1lFrfM8iEp3waCi21lKx-bFBTEMhUMDJhE,depth=10,body=Historical 80 (batch=0),prevs=['$ez0-Q4agOvUQbWUELiTJEEJWwZYj-TnaQTnNc3Vil8M']", "event_id=$ez0-Q4agOvUQbWUELiTJEEJWwZYj-TnaQTnNc3Vil8M,depth=10,body=Historical 79 (batch=0),prevs=['$zaMM-ZtQMADqFNsjrQrl_u0FIjxVHSEbdICkI5kph14']", "event_id=$zaMM-ZtQMADqFNsjrQrl_u0FIjxVHSEbdICkI5kph14,depth=10,body=Historical 78 (batch=0),prevs=['$jhDwZ4C8aCS7xm0D7xwQYKYXxE9yrnV_jnXWs7yj97A']", "event_id=$jhDwZ4C8aCS7xm0D7xwQYKYXxE9yrnV_jnXWs7yj97A,depth=10,body=Historical 77 (batch=0),prevs=['$794zi7lECGaZqgi-mVDTuAcCVxQRcasLEWHNQk_T5Qo']", "event_id=$794zi7lECGaZqgi-mVDTuAcCVxQRcasLEWHNQk_T5Qo,depth=10,body=Historical 76 (batch=0),prevs=['$RYc7Epu2beVamNOISGEPPzYPnqNr8OPpy6srKUDCNaY']", "event_id=$RYc7Epu2beVamNOISGEPPzYPnqNr8OPpy6srKUDCNaY,depth=10,body=Historical 75 (batch=0),prevs=['$NxzF542rsM0WMKZonV9YxmaJ5PLpVWK1TUfcLFP4VT8']", "event_id=$NxzF542rsM0WMKZonV9YxmaJ5PLpVWK1TUfcLFP4VT8,depth=10,body=Historical 74 (batch=0),prevs=['$YbcfMO7DOlq6owFILKIGpLSrvpMiDmIp77TCmafFxUU']", "event_id=$YbcfMO7DOlq6owFILKIGpLSrvpMiDmIp77TCmafFxUU,depth=10,body=Historical 73 (batch=0),prevs=['$RmlFFGojIl8VSc115lgQwk0NeAIAQT7CuLv3ajIv2X0']", "event_id=$RmlFFGojIl8VSc115lgQwk0NeAIAQT7CuLv3ajIv2X0,depth=10,body=Historical 72 (batch=0),prevs=['$lGGzpQOPdmzBqXIFF5HZfyGQCmXJOnmkZCRFRzxqJkw']", "event_id=$lGGzpQOPdmzBqXIFF5HZfyGQCmXJOnmkZCRFRzxqJkw,depth=10,body=Historical 71 (batch=0),prevs=['$TcSzi68MSFf1W0843Zi0iRq5vXFs8FhdtI9NfNCY5DM']", "event_id=$TcSzi68MSFf1W0843Zi0iRq5vXFs8FhdtI9NfNCY5DM,depth=10,body=Historical 70 (batch=0),prevs=['$iKaFgZ_aeN-ZJUNJEYj4WjY31RPm8PXs27n-u2LnlCE']", "event_id=$iKaFgZ_aeN-ZJUNJEYj4WjY31RPm8PXs27n-u2LnlCE,depth=10,body=Historical 69 (batch=0),prevs=['$n52R-hna5danKau87AHkwXDV5CuUTIZ960gtHr9X8hw']", "event_id=$n52R-hna5danKau87AHkwXDV5CuUTIZ960gtHr9X8hw,depth=10,body=Historical 68 (batch=0),prevs=['$Fr3wnWXII8LpE3VyF1pLWuLpSCyxu7jC1i4ggMqH1kE']", "event_id=$Fr3wnWXII8LpE3VyF1pLWuLpSCyxu7jC1i4ggMqH1kE,depth=10,body=Historical 67 (batch=0),prevs=['$ebCIN2TL4fFQeTEahzeX-VCkh_YEohAItvRd6LAVP2E']", "event_id=$ebCIN2TL4fFQeTEahzeX-VCkh_YEohAItvRd6LAVP2E,depth=10,body=Historical 66 (batch=0),prevs=['$CFeaKxHxgPCSUufJChertFT4au4P9Y1a_tTdN2wmo3Q']", "event_id=$CFeaKxHxgPCSUufJChertFT4au4P9Y1a_tTdN2wmo3Q,depth=10,body=Historical 65 (batch=0),prevs=['$MctpfuMG5AhhCb9aPRG-4V6VKxxNUzALNLq3BVJKejU']", "event_id=$MctpfuMG5AhhCb9aPRG-4V6VKxxNUzALNLq3BVJKejU,depth=10,body=Historical 64 (batch=0),prevs=['$ikjtTCZ7KXg1qtwgURuBZ-P5C1RG1oY8U3k2rm9RGxA']", "event_id=$ikjtTCZ7KXg1qtwgURuBZ-P5C1RG1oY8U3k2rm9RGxA,depth=10,body=Historical 63 (batch=0),prevs=['$KDbI-rBuoAI7rRhtAjcoDY2PbLQ9gDsK5fRts3OihSA']", "event_id=$KDbI-rBuoAI7rRhtAjcoDY2PbLQ9gDsK5fRts3OihSA,depth=10,body=Historical 62 (batch=0),prevs=['$_ZwqRJxNwgcZn4Trb5nrTjRP8ssQuLDq5ThguCm9KzE']", "event_id=$_ZwqRJxNwgcZn4Trb5nrTjRP8ssQuLDq5ThguCm9KzE,depth=10,body=Historical 61 (batch=0),prevs=['$hq1xDHEWodibkwlNg73931wjYlD5M29dkFHU0dQB2dk']", "event_id=$hq1xDHEWodibkwlNg73931wjYlD5M29dkFHU0dQB2dk,depth=10,body=Historical 60 (batch=0),prevs=['$XPRt_CuI_AXJylRQYbQ0ItkRkMy4ey0XxqS7iIdGLfQ']", "event_id=$XPRt_CuI_AXJylRQYbQ0ItkRkMy4ey0XxqS7iIdGLfQ,depth=10,body=Historical 59 (batch=0),prevs=['$Ucd44nYFkYVe_zs5wEqU5slG74IIPdzq_mqEHinIoIo']", "event_id=$Ucd44nYFkYVe_zs5wEqU5slG74IIPdzq_mqEHinIoIo,depth=10,body=Historical 58 (batch=0),prevs=['$CtM1TqWoFt3KabPNRKYi1vrpOPGwdpjx-15TpfKgYYA']", "event_id=$CtM1TqWoFt3KabPNRKYi1vrpOPGwdpjx-15TpfKgYYA,depth=10,body=Historical 57 (batch=0),prevs=['$Jo8T-m9nSs77uF4uFtkd22CifwnvDlr13Fs14NHcCMw']", "event_id=$Jo8T-m9nSs77uF4uFtkd22CifwnvDlr13Fs14NHcCMw,depth=10,body=Historical 56 (batch=0),prevs=['$WW45KevKriYRET0tOmKk8Ha9fFqxJJS_IC8Ih6I3L2A']", "event_id=$WW45KevKriYRET0tOmKk8Ha9fFqxJJS_IC8Ih6I3L2A,depth=10,body=Historical 55 (batch=0),prevs=['$G60oGyHZvp-Gim48qnC_epu2JRseKQUCTI5vqZEIbaI']", "event_id=$G60oGyHZvp-Gim48qnC_epu2JRseKQUCTI5vqZEIbaI,depth=10,body=Historical 54 (batch=0),prevs=['$R9Z6YaR1K9ulUzF9d6-ZL0QwJmwaEj1wTUPHrTJqFUs']", "event_id=$R9Z6YaR1K9ulUzF9d6-ZL0QwJmwaEj1wTUPHrTJqFUs,depth=10,body=Historical 53 (batch=0),prevs=['$CqedFlSaUBYWSHtkJJYmoc-DnxWCAaKVKOai0u5rawM']", "event_id=$CqedFlSaUBYWSHtkJJYmoc-DnxWCAaKVKOai0u5rawM,depth=10,body=Historical 52 (batch=0),prevs=['$3Ol33IgZ5lBuVwgELhRBYoFLb6GwkFTha3j6uxvHToY']", "event_id=$3Ol33IgZ5lBuVwgELhRBYoFLb6GwkFTha3j6uxvHToY,depth=10,body=Historical 51 (batch=0),prevs=['$wUcLTd2pGhliQunqWxFGvT6yiEZrUXA920-hcjaloSw']", "event_id=$wUcLTd2pGhliQunqWxFGvT6yiEZrUXA920-hcjaloSw,depth=10,body=Historical 50 (batch=0),prevs=['$gaO95ghwbwpGttuqBbhMrZJzz9Z440U7L2WOtDy7Hr8']", "event_id=$gaO95ghwbwpGttuqBbhMrZJzz9Z440U7L2WOtDy7Hr8,depth=10,body=Historical 49 (batch=0),prevs=['$4TSr_VvZzCV17JYxAob3DN2nQiYDqgW2S_QkW27v7Bg']", "event_id=$4TSr_VvZzCV17JYxAob3DN2nQiYDqgW2S_QkW27v7Bg,depth=10,body=Historical 48 (batch=0),prevs=['$bJaLResm_4aAwSjZ0POTvtVx4lnV9HKt8HwiYgP21lM']", "event_id=$bJaLResm_4aAwSjZ0POTvtVx4lnV9HKt8HwiYgP21lM,depth=10,body=Historical 47 (batch=0),prevs=['$BouN8LDmbFWiWo9Wc4wQi48YsC-icXewa05KR7byzhM']", "event_id=$BouN8LDmbFWiWo9Wc4wQi48YsC-icXewa05KR7byzhM,depth=10,body=Historical 46 (batch=0),prevs=['$n9Cv6EUu7i25OZLCllOE2PU4xjRthDKhkdtp5OtMuVk']", "event_id=$n9Cv6EUu7i25OZLCllOE2PU4xjRthDKhkdtp5OtMuVk,depth=10,body=Historical 45 (batch=0),prevs=['$MLVpmxVSSRRpxlcTkwIJT3neo7maqOp_ZLvwXRBY8A8']", "event_id=$MLVpmxVSSRRpxlcTkwIJT3neo7maqOp_ZLvwXRBY8A8,depth=10,body=Historical 44 (batch=0),prevs=['$tNv8Se5-lgfW-9MqE_upeBzq3QSPpsn7qOHz49WFs_c']", "event_id=$tNv8Se5-lgfW-9MqE_upeBzq3QSPpsn7qOHz49WFs_c,depth=10,body=Historical 43 (batch=0),prevs=['$XCDKybStk9PsquzrY-biUlKOmjk3wTXnsD1htiMdgAM']", "event_id=$XCDKybStk9PsquzrY-biUlKOmjk3wTXnsD1htiMdgAM,depth=10,body=Historical 42 (batch=0),prevs=['$iUDAE7nT6DQaNJSemwUpqQs8KvKTPFexcBJLTs2-gnY']", "event_id=$iUDAE7nT6DQaNJSemwUpqQs8KvKTPFexcBJLTs2-gnY,depth=10,body=Historical 41 (batch=0),prevs=['$T8NTrB9vQMLW-a0tT3corPl0g6O3UjGIoaHBjkjCD_w']", "event_id=$T8NTrB9vQMLW-a0tT3corPl0g6O3UjGIoaHBjkjCD_w,depth=10,body=Historical 40 (batch=0),prevs=['$N7jCLhKYDejMTejtrjJfM2lMRES5OPeowtvN-L4utsE']", "event_id=$N7jCLhKYDejMTejtrjJfM2lMRES5OPeowtvN-L4utsE,depth=10,body=Historical 39 (batch=0),prevs=['$Oy9aTfjL0LSnbF_6MRq7-5baaEbfxyRTK1kbVGeNoaU']", "event_id=$Oy9aTfjL0LSnbF_6MRq7-5baaEbfxyRTK1kbVGeNoaU,depth=10,body=Historical 38 (batch=0),prevs=['$V4qFDnqO1LrboSWwCoQpQ4dXLyrc74qZbbsaHxqAj4Q']", "event_id=$V4qFDnqO1LrboSWwCoQpQ4dXLyrc74qZbbsaHxqAj4Q,depth=10,body=Historical 37 (batch=0),prevs=['$vWdLNDGwj_mBJ6JZ2s01OhsxRuPNSSJAM_bMa6J9Q2c']", "event_id=$vWdLNDGwj_mBJ6JZ2s01OhsxRuPNSSJAM_bMa6J9Q2c,depth=10,body=Historical 36 (batch=0),prevs=['$JZt2EdPtKJX6f2MMbj_gSEseVAMa-H5r_J_yvfhHs5k']", "event_id=$JZt2EdPtKJX6f2MMbj_gSEseVAMa-H5r_J_yvfhHs5k,depth=10,body=Historical 35 (batch=0),prevs=['$NPzUfUgNfvkezhUeLmcp2utVA_XnW8YzDqbZlznsEN0']", "event_id=$NPzUfUgNfvkezhUeLmcp2utVA_XnW8YzDqbZlznsEN0,depth=10,body=Historical 34 (batch=0),prevs=['$NNUjzVtvOSFDYRID_PsMJrUC8gx5p1mPvIoMB7ibDn0']", "event_id=$NNUjzVtvOSFDYRID_PsMJrUC8gx5p1mPvIoMB7ibDn0,depth=10,body=Historical 33 (batch=0),prevs=['$dONbCTAYR0kOMVSmxOnM5aQ4bgx9PbKza98_e2Lxxw4']", "event_id=$dONbCTAYR0kOMVSmxOnM5aQ4bgx9PbKza98_e2Lxxw4,depth=10,body=Historical 32 (batch=0),prevs=['$dwxZHePgzbtQMO3caex0O9Zdl-gT3fpWRZD5dJ43JmE']", "event_id=$dwxZHePgzbtQMO3caex0O9Zdl-gT3fpWRZD5dJ43JmE,depth=10,body=Historical 31 (batch=0),prevs=['$5JOpa9222Y0__S8yfaZBT7ESuRaPeeRz80jleMoa5hE']", "event_id=$5JOpa9222Y0__S8yfaZBT7ESuRaPeeRz80jleMoa5hE,depth=10,body=Historical 30 (batch=0),prevs=['$7yIEFZZFs91wjKScsjbG854DUOqomPnV4iGG-EDWK6w']", "event_id=$7yIEFZZFs91wjKScsjbG854DUOqomPnV4iGG-EDWK6w,depth=10,body=Historical 29 (batch=0),prevs=['$9pzIrDRB1cuweDiHaPGB6AtTtGeGEOmqyoP59UAa0pI']", "event_id=$9pzIrDRB1cuweDiHaPGB6AtTtGeGEOmqyoP59UAa0pI,depth=10,body=Historical 28 (batch=0),prevs=['$EBu0Y6-OdqNUumOlHK__K2eXsN8mAYV21eCRKwGmkxU']", "event_id=$EBu0Y6-OdqNUumOlHK__K2eXsN8mAYV21eCRKwGmkxU,depth=10,body=Historical 27 (batch=0),prevs=['$95FzYeNZk31wyfaCojognGOAkzLhHLrv4_j3eoJawwU']", "event_id=$95FzYeNZk31wyfaCojognGOAkzLhHLrv4_j3eoJawwU,depth=10,body=Historical 26 (batch=0),prevs=['$QDGDnSNRXjNZG2lia2gjeD1aEfgfZ0gxI1BzIgjyZq8']", "event_id=$QDGDnSNRXjNZG2lia2gjeD1aEfgfZ0gxI1BzIgjyZq8,depth=10,body=Historical 25 (batch=0),prevs=['$tf1GB8ydPyNTNAaGwcTUXaTOSCmNohK6e6MmOnFwrJA']", "event_id=$tf1GB8ydPyNTNAaGwcTUXaTOSCmNohK6e6MmOnFwrJA,depth=10,body=Historical 24 (batch=0),prevs=['$n40ks3dofH6DioQlfe84n3exnYrboy8GHHYJ_qBFJMg']", "event_id=$n40ks3dofH6DioQlfe84n3exnYrboy8GHHYJ_qBFJMg,depth=10,body=Historical 23 (batch=0),prevs=['$xl8xG3MJiiNP21WNFmDLxOR8Rhy_FLws0snsmDUoAYQ']", "event_id=$xl8xG3MJiiNP21WNFmDLxOR8Rhy_FLws0snsmDUoAYQ,depth=10,body=Historical 22 (batch=0),prevs=['$6MxFONk8t5shoQpjjZyxd0ypmfj76LPs9hWKkPAxMcI']", "event_id=$6MxFONk8t5shoQpjjZyxd0ypmfj76LPs9hWKkPAxMcI,depth=10,body=Historical 21 (batch=0),prevs=['$XAAn4gMQe4ShIZKvZntEUBRljDM4IxHDVeNOXp3K2Bk']", "event_id=$XAAn4gMQe4ShIZKvZntEUBRljDM4IxHDVeNOXp3K2Bk,depth=10,body=Historical 20 (batch=0),prevs=['$Gj3RlKLUMb6hI_JyXdfwuoneWDs1b63qF7WdYYiNDG0']", "event_id=$Gj3RlKLUMb6hI_JyXdfwuoneWDs1b63qF7WdYYiNDG0,depth=10,body=Historical 19 (batch=0),prevs=['$yiRe3l3_APWuZt1wfFBxXQp2orQJZJvwv3yk4QNiPxo']", "event_id=$yiRe3l3_APWuZt1wfFBxXQp2orQJZJvwv3yk4QNiPxo,depth=10,body=Historical 18 (batch=0),prevs=['$v04Ud2zPqFe5M3jPbiLrqQ6r_QXgFlmRTMMLyVvAsN8']", "event_id=$v04Ud2zPqFe5M3jPbiLrqQ6r_QXgFlmRTMMLyVvAsN8,depth=10,body=Historical 17 (batch=0),prevs=['$tO5NiLH2mPBIMRVmrTIza6CUv7v6R0e_tgj0ZKEtpG0']", "event_id=$tO5NiLH2mPBIMRVmrTIza6CUv7v6R0e_tgj0ZKEtpG0,depth=10,body=Historical 16 (batch=0),prevs=['$fyQ-akREpz3vpQNqTZ47yxAs_APAw2L_bQ1iAudsPWk']", "event_id=$fyQ-akREpz3vpQNqTZ47yxAs_APAw2L_bQ1iAudsPWk,depth=10,body=Historical 15 (batch=0),prevs=['$qddW2HRZA0BudaIWuun6Odt6wDbz7ETRJmSY8wtjMnY']", "event_id=$qddW2HRZA0BudaIWuun6Odt6wDbz7ETRJmSY8wtjMnY,depth=10,body=Historical 14 (batch=0),prevs=['$fxl98r0B-sa2T8LDR2q6Wi01Zw6xCPZB5FcMf5x_vxI']", "event_id=$fxl98r0B-sa2T8LDR2q6Wi01Zw6xCPZB5FcMf5x_vxI,depth=10,body=Historical 13 (batch=0),prevs=['$1PML_QTo1MK_EuUz3OYyc2vDjfGKKYt-0sUSN9jTeHs']", "event_id=$1PML_QTo1MK_EuUz3OYyc2vDjfGKKYt-0sUSN9jTeHs,depth=10,body=Historical 12 (batch=0),prevs=['$YTz-q29BhUU_Ox7_CR7gao1v7mmMqLwbz51WWI0CjHs']", "event_id=$YTz-q29BhUU_Ox7_CR7gao1v7mmMqLwbz51WWI0CjHs,depth=10,body=Historical 11 (batch=0),prevs=['$Y3HUk4SiCWUrV5V_ZUcwC1IkVZEPM3V8oGWrWKRcv3I']", "event_id=$Y3HUk4SiCWUrV5V_ZUcwC1IkVZEPM3V8oGWrWKRcv3I,depth=10,body=Historical 10 (batch=0),prevs=['$uV5daep6tIdbImy2XEIxdQIuGYc547ulzxMplDyBWfY']", "event_id=$uV5daep6tIdbImy2XEIxdQIuGYc547ulzxMplDyBWfY,depth=10,body=Historical 9 (batch=0),prevs=['$bVKZ6uRROzC7aNcZc9cUsfhHl5dU_-Vm6Qz5V9X8GsI']", "event_id=$bVKZ6uRROzC7aNcZc9cUsfhHl5dU_-Vm6Qz5V9X8GsI,depth=10,body=Historical 8 (batch=0),prevs=['$6zjql7LGgIa3gFpf9owZsKWeFU4FoMUKDtVPpMo7xsM']", "event_id=$6zjql7LGgIa3gFpf9owZsKWeFU4FoMUKDtVPpMo7xsM,depth=10,body=Historical 7 (batch=0),prevs=['$w5t7UuOHrrsfZRebkX4adeEeJpPPGv9y6RyhKpxToMc']", "event_id=$w5t7UuOHrrsfZRebkX4adeEeJpPPGv9y6RyhKpxToMc,depth=10,body=Historical 6 (batch=0),prevs=['$649goNxH4B0DMta75_xs_TTib6C268A2JiUHvpUMu8g']", "event_id=$649goNxH4B0DMta75_xs_TTib6C268A2JiUHvpUMu8g,depth=10,body=Historical 5 (batch=0),prevs=['$mtUwpP0KSNwsn73yxBfQ7imKUe955v_SvI427PC5y2I']", "event_id=$9JuhSj0h0AbHvky0T3tSRfq_lxg1k2LTHY-J-MW6FfA,depth=9,body=Message 1 (eventIDsBefore),prevs=['$FcPmU5Fjg57eO_2J4VOK9jOcqRclEED5B9i1FsHNjaE']" ] ``` --- synapse/handlers/federation.py | 158 ++++++++++++++++++ synapse/handlers/federation_event.py | 86 +++++----- .../databases/main/event_federation.py | 121 +++++++++++++- 3 files changed, 321 insertions(+), 44 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index c478e0bc5c21..e28c74daf00a 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -16,6 +16,7 @@ """Contains handlers for federation events.""" import logging +from queue import Empty, PriorityQueue from http import HTTPStatus from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Union @@ -1041,6 +1042,135 @@ async def get_state_ids_for_pdu(self, room_id: str, event_id: str) -> List[str]: else: return [] + async def get_backfill_events( + self, room_id: str, event_id_list: list, limit: int + ) -> List[EventBase]: + event_id_results = set() + + # In a PriorityQueue, the lowest valued entries are retrieved first. + # We're using depth as the priority in the queue and tie-break based on + # stream_ordering. Depth is lowest at the oldest-in-time message and + # highest and newest-in-time message. We add events to the queue with a + # negative depth so that we process the newest-in-time messages first + # going backwards in time. stream_ordering follows the same pattern. + queue = PriorityQueue() + + seed_events = await self.store.get_events_as_list(event_id_list) + for seed_event in seed_events: + # Make sure the seed event actually pertains to this room. We also + # need to make sure the depth is available since our whole DAG + # navigation here depends on depth. + if seed_event.room_id == room_id and seed_event.depth: + queue.put( + ( + -seed_event.depth, + -seed_event.internal_metadata.stream_ordering, + seed_event.event_id, + seed_event.type, + ) + ) + + while not queue.empty() and len(event_id_results) < limit: + try: + _, _, event_id, event_type = queue.get_nowait() + except Empty: + break + + if event_id in event_id_results: + continue + + event_id_results.add(event_id) + + if self.hs.config.experimental.msc2716_enabled: + # Try and find any potential historical batches of message history. + # + # First we look for an insertion event connected to the current + # event (by prev_event). If we find any, we'll add them to the queue + # and navigate up the DAG like normal in the next iteration of the + # loop. + connected_insertion_event_backfill_results = ( + await self.store.get_connected_insertion_event_backfill_results( + event_id, limit - len(event_id_results) + ) + ) + logger.debug( + "_get_backfill_events: connected_insertion_event_backfill_results=%s", + connected_insertion_event_backfill_results, + ) + for ( + connected_insertion_event_backfill_item + ) in connected_insertion_event_backfill_results: + if ( + connected_insertion_event_backfill_item.event_id + not in event_id_results + ): + queue.put( + ( + -connected_insertion_event_backfill_item.depth, + -connected_insertion_event_backfill_item.stream_ordering, + connected_insertion_event_backfill_item.event_id, + connected_insertion_event_backfill_item.type, + ) + ) + + # Second, we need to go and try to find any batch events connected + # to a given insertion event (by batch_id). If we find any, we'll + # add them to the queue and navigate up the DAG like normal in the + # next iteration of the loop. + if event_type == EventTypes.MSC2716_INSERTION: + connected_batch_event_backfill_results = ( + await self.store.get_connected_batch_event_backfill_results( + event_id, limit - len(event_id_results) + ) + ) + logger.debug( + "_get_backfill_events: connected_batch_event_backfill_results %s", + connected_batch_event_backfill_results, + ) + for ( + connected_batch_event_backfill_item + ) in connected_batch_event_backfill_results: + if ( + connected_batch_event_backfill_item.event_id + not in event_id_results + ): + queue.put( + ( + -connected_batch_event_backfill_item.depth, + -connected_batch_event_backfill_item.stream_ordering, + connected_batch_event_backfill_item.event_id, + connected_batch_event_backfill_item.type, + ) + ) + + # Now we just look up the DAG by prev_events as normal + connected_prev_event_backfill_results = ( + await self.store.get_connected_prev_event_backfill_results( + event_id, limit - len(event_id_results) + ) + ) + logger.debug( + "_get_backfill_events: prev_event_ids %s", + connected_prev_event_backfill_results, + ) + for ( + connected_prev_event_backfill_item + ) in connected_prev_event_backfill_results: + if connected_prev_event_backfill_item.event_id not in event_id_results: + queue.put( + ( + -connected_prev_event_backfill_item.depth, + -connected_prev_event_backfill_item.stream_ordering, + connected_prev_event_backfill_item.event_id, + connected_prev_event_backfill_item.type, + ) + ) + + events = await self.store.get_events_as_list(event_id_results) + return sorted( + events, key=lambda e: (-e.depth, -e.internal_metadata.stream_ordering) + ) + @log_function async def on_backfill_request( self, origin: str, room_id: str, pdu_list: List[str], limit: int @@ -1053,6 +1183,34 @@ async def on_backfill_request( limit = min(limit, 100) events = await self.store.get_backfill_events(room_id, pdu_list, limit) + logger.info( + "old implementation backfill events=%s", + [ + "event_id=%s,depth=%d,body=%s,prevs=%s\n" + % ( + event.event_id, + event.depth, + event.content.get("body", event.type), + event.prev_event_ids(), + ) + for event in events + ], + ) + + events = await self.get_backfill_events(room_id, pdu_list, limit) + logger.info( + "new implementation backfill events=%s", + [ + "event_id=%s,depth=%d,body=%s,prevs=%s\n" + % ( + event.event_id, + event.depth, + event.content.get("body", event.type), + event.prev_event_ids(), + ) + for event in events + ], + ) events = await filter_events_for_server(self.storage, origin, events) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index ab2ed53bce2f..c9060a594f09 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -702,38 +702,38 @@ async def generateEventIdGraphFromEvents( event.event_id ) - # Maybe we can get lucky and save ourselves a lookup - # by checking the events in the backfill first - insertion_event = event_map[ - insertion_event_id - ] or await self._store.get_event( - insertion_event_id, allow_none=True - ) - - if insertion_event: - # Connect the insertion events' `prev_event` successors - # via fake edges pointing to the insertion event itself - # so the insertion event sorts topologically - # behind-in-time the successor. Nestled perfectly - # between the prev_event and the successor. - for insertion_prev_event_id in insertion_event.prev_event_ids(): - successor_event_ids = successor_event_id_map[ - insertion_prev_event_id - ] - logger.info( - "insertion_event_id=%s successor_event_ids=%s", - insertion_event_id, - successor_event_ids, - ) - if successor_event_ids: - for successor_event_id in successor_event_ids: - # Don't add itself back as a successor - if successor_event_id != insertion_event_id: - # Fake edge to point the successor back - # at the insertion event - event_id_graph.setdefault( - successor_event_id, [] - ).append(insertion_event_id) + # # Maybe we can get lucky and save ourselves a lookup + # # by checking the events in the backfill first + # insertion_event = event_map[ + # insertion_event_id + # ] or await self._store.get_event( + # insertion_event_id, allow_none=True + # ) + + # if insertion_event: + # # Connect the insertion events' `prev_event` successors + # # via fake edges pointing to the insertion event itself + # # so the insertion event sorts topologically + # # behind-in-time the successor. Nestled perfectly + # # between the prev_event and the successor. + # for insertion_prev_event_id in insertion_event.prev_event_ids(): + # successor_event_ids = successor_event_id_map[ + # insertion_prev_event_id + # ] + # logger.info( + # "insertion_event_id=%s successor_event_ids=%s", + # insertion_event_id, + # successor_event_ids, + # ) + # if successor_event_ids: + # for successor_event_id in successor_event_ids: + # # Don't add itself back as a successor + # if successor_event_id != insertion_event_id: + # # Fake edge to point the successor back + # # at the insertion event + # event_id_graph.setdefault( + # successor_event_id, [] + # ).append(insertion_event_id) # TODO: We also need to add fake edges to connect the oldest-in-time messages # in the batch to the event we branched off of, see https://github.com/matrix-org/synapse/pull/11114#discussion_r739300985 @@ -773,17 +773,17 @@ async def _process_pulled_events( # We want to sort these by depth so we process them and # tell clients about them in order. - # sorted_events = sorted(events, key=lambda x: x.depth) - - # We want to sort topologically so we process them and tell clients - # about them in order. - sorted_events = [] - event_ids = [event.event_id for event in events] - event_map = {event.event_id: event for event in events} - event_id_graph = await self.generateEventIdGraphFromEvents(events) - for event_id in sorted_topologically(event_ids, event_id_graph): - sorted_events.append(event_map[event_id]) - sorted_events = reversed(sorted_events) + sorted_events = sorted(events, key=lambda x: x.depth) + + # # We want to sort topologically so we process them and tell clients + # # about them in order. + # sorted_events = [] + # event_ids = [event.event_id for event in events] + # event_map = {event.event_id: event for event in events} + # event_id_graph = await self.generateEventIdGraphFromEvents(events) + # for event_id in sorted_topologically(event_ids, event_id_graph): + # sorted_events.append(event_map[event_id]) + # sorted_events = reversed(sorted_events) logger.info( "backfill sorted_events=%s", diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 4a4d35f77c5e..a569e8146ab1 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -14,7 +14,7 @@ import itertools import logging from queue import Empty, PriorityQueue -from typing import Collection, Dict, Iterable, List, Optional, Set, Tuple +from typing import Collection, Dict, Iterable, List, Optional, Set, Tuple, NamedTuple from prometheus_client import Counter, Gauge @@ -53,6 +53,14 @@ logger = logging.getLogger(__name__) +# All the info we need while iterating the DAG while backfilling +class BackfillQueueNavigationItem(NamedTuple): + depth: int + stream_ordering: int + event_id: str + type: str + + class _NoChainCoverIndex(Exception): def __init__(self, room_id: str): super().__init__("Unexpectedly no chain cover for events in %s" % (room_id,)) @@ -987,6 +995,117 @@ def get_forward_extremeties_for_room_txn(txn): "get_forward_extremeties_for_room", get_forward_extremeties_for_room_txn ) + async def get_connected_insertion_event_backfill_results( + self, event_id: str, limit: int + ) -> List[BackfillQueueNavigationItem]: + def _get_connected_insertion_event_backfill_results_txn(txn): + # Look for the "insertion" events connected to the given event_id + connected_insertion_event_query = """ + SELECT e.depth, e.stream_ordering, i.event_id, e.type FROM insertion_event_edges AS i + /* Get the depth of the insertion event from the events table */ + INNER JOIN events AS e USING (event_id) + /* Find an insertion event which points via prev_events to the given event_id */ + WHERE i.insertion_prev_event_id = ? + LIMIT ? + """ + + txn.execute( + connected_insertion_event_query, + (event_id, limit), + ) + connected_insertion_event_id_results = txn.fetchall() + return [ + BackfillQueueNavigationItem( + depth=row[0], + stream_ordering=row[1], + event_id=row[2], + type=row[3], + ) + for row in connected_insertion_event_id_results + ] + + return await self.db_pool.runInteraction( + "get_connected_insertion_event_backfill_results", + _get_connected_insertion_event_backfill_results_txn, + ) + + async def get_connected_batch_event_backfill_results( + self, insertion_event_id: str, limit: int + ) -> List[BackfillQueueNavigationItem]: + def _get_connected_batch_event_backfill_results_txn(txn): + # Find any batch connections of a given insertion event + batch_connection_query = """ + SELECT e.depth, e.stream_ordering, c.event_id, e.type FROM insertion_events AS i + /* Find the batch that connects to the given insertion event */ + INNER JOIN batch_events AS c + ON i.next_batch_id = c.batch_id + /* Get the depth of the batch start event from the events table */ + INNER JOIN events AS e USING (event_id) + /* Find an insertion event which matches the given event_id */ + WHERE i.event_id = ? + LIMIT ? + """ + + # Find any batch connections for the given insertion event + txn.execute( + batch_connection_query, + (insertion_event_id, limit), + ) + batch_start_event_id_results = txn.fetchall() + return [ + BackfillQueueNavigationItem( + depth=row[0], + stream_ordering=row[1], + event_id=row[2], + type=row[3], + ) + for row in batch_start_event_id_results + ] + + return await self.db_pool.runInteraction( + "get_connected_batch_event_backfill_results", + _get_connected_batch_event_backfill_results_txn, + ) + + async def get_connected_prev_event_backfill_results( + self, event_id: str, limit: int + ) -> List[BackfillQueueNavigationItem]: + def _get_connected_prev_event_backfill_results_txn(txn): + # Look for the prev_event_id connected to the given event_id + connected_prev_event_query = """ + SELECT depth, stream_ordering, prev_event_id, events.type FROM event_edges + /* Get the depth and stream_ordering of the prev_event_id from the events table */ + INNER JOIN events + ON prev_event_id = events.event_id + /* Look for an edge which matches the given event_id */ + WHERE event_edges.event_id = ? + AND event_edges.is_state = ? + /* Because we can have many events at the same depth, + * we want to also tie-break and sort on stream_ordering */ + ORDER BY depth DESC, stream_ordering DESC + LIMIT ? + """ + + txn.execute( + connected_prev_event_query, + (event_id, False, limit), + ) + prev_event_id_results = txn.fetchall() + return [ + BackfillQueueNavigationItem( + depth=row[0], + stream_ordering=row[1], + event_id=row[2], + type=row[3], + ) + for row in prev_event_id_results + ] + + return await self.db_pool.runInteraction( + "get_connected_prev_event_backfill_results", + _get_connected_prev_event_backfill_results_txn, + ) + async def get_backfill_events(self, room_id: str, event_list: list, limit: int): """Get a list of Events for a given topic that occurred before (and including) the events in event_list. Return a list of max size `limit` From e0ff66dfd9949d83d9be13f62faf96b68446065e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 29 Oct 2021 17:27:38 -0500 Subject: [PATCH 24/47] Fix backfill being able to cleanly branch into history and back to "live" Fix https://github.com/matrix-org/synapse/pull/11114#discussion_r731363046 - When we see a connected insertion event indicating a branch of history, we now don't accidentally leak the base event we're branching from in the backfill chunk until all of the events in the historical brach are exhausted. - Backfill will now check whether the federating homeserver asking has a given insertion event when we see one connected to the DAG. This allows backfill to initially decide to give all the historical messages in the branch, then next time when the base event we branched from comes up in backfill again, it sees that federated homeserver already has the insertion event, and we can continue down the normal DAG prev_event path instead. Example Backfill response (in order yay): Overview: ``` eventIDsAfter batch0 batch1 eventIDsBefore ``` ``` new implementation backfill events(100)=[ "event_id=$q3w2dF5mWBkgUF1ADdVgS_bX7YQuqODJ2HC33bln4Ho,depth=11,body=Message 1 (eventIDsAfter),prevs=['$Com3-ZIR35FvYcBQvHdpxGsS1iW-DO_Z5x1zZ5R5Az8']", "event_id=$Com3-ZIR35FvYcBQvHdpxGsS1iW-DO_Z5x1zZ5R5Az8,depth=10,body=Message 0 (eventIDsAfter),prevs=['$T5-OUXAW3CCYAHskqk5s0V2ZzJoQy5D8lQJ7LZFWvTI']", "event_id=$78Yy5bqa9XuQ5fhcw9AF36Xf4W5EDcjsraSInif7nYU,depth=10,body=org.matrix.msc2716.insertion,prevs=['$T5-OUXAW3CCYAHskqk5s0V2ZzJoQy5D8lQJ7LZFWvTI']", "event_id=$GEzF4euV8StAtdCDCtWmpdhxRE3KQWgreyr1_ePqlpM,depth=10,body=org.matrix.msc2716.batch,prevs=['$ot97LEPqM55EV1ka8YM445c6q-ny4jBBRxjgQnGxihQ']", "event_id=$ot97LEPqM55EV1ka8YM445c6q-ny4jBBRxjgQnGxihQ,depth=10,body=Historical 99 (batch=0),prevs=['$i53GQczFiX-tqXgcxudp5AWR9C-A0yJiET_ZXe9D49I']", "event_id=$i53GQczFiX-tqXgcxudp5AWR9C-A0yJiET_ZXe9D49I,depth=10,body=Historical 98 (batch=0),prevs=['$lqjhSBLzbi2WfWNAIm1mngStqsPfQJUwPO0NAFtB8lo']", "event_id=$lqjhSBLzbi2WfWNAIm1mngStqsPfQJUwPO0NAFtB8lo,depth=10,body=Historical 97 (batch=0),prevs=['$k8OC791p_vB-Hz36Byn-o3vK9x_rc0qkVufBpxYRcaA']", "event_id=$k8OC791p_vB-Hz36Byn-o3vK9x_rc0qkVufBpxYRcaA,depth=10,body=Historical 96 (batch=0),prevs=['$9obrnkfP2HpDfKUkWyHrtHML5_31MZ3T9sKQOb6eUV8']", "event_id=$9obrnkfP2HpDfKUkWyHrtHML5_31MZ3T9sKQOb6eUV8,depth=10,body=Historical 95 (batch=0),prevs=['$PSNLMD1RhBvR9ffpbEOKtUHE9PUUiacJ2g8_DA9nSjM']", "event_id=$PSNLMD1RhBvR9ffpbEOKtUHE9PUUiacJ2g8_DA9nSjM,depth=10,body=Historical 94 (batch=0),prevs=['$vGkUiP049MLeapDGHegqdiGZRkuc6WxEJ-iOAN7mqr4']", "event_id=$vGkUiP049MLeapDGHegqdiGZRkuc6WxEJ-iOAN7mqr4,depth=10,body=Historical 93 (batch=0),prevs=['$reYXzhTXHlqqk6XMLK_mEIFQGIesn4NJ70JYa7EziQc']", "event_id=$reYXzhTXHlqqk6XMLK_mEIFQGIesn4NJ70JYa7EziQc,depth=10,body=Historical 92 (batch=0),prevs=['$dg8ZPPPqh1SbIhcg-GMvEGz49h53TUMoZ8yO4zbfA4U']", "event_id=$dg8ZPPPqh1SbIhcg-GMvEGz49h53TUMoZ8yO4zbfA4U,depth=10,body=Historical 91 (batch=0),prevs=['$QW7wied5SyHtJElJTo4SUkc-zNLlAh6tbCiAa5EJXg4']", "event_id=$QW7wied5SyHtJElJTo4SUkc-zNLlAh6tbCiAa5EJXg4,depth=10,body=Historical 90 (batch=0),prevs=['$COQuVmW8z0geXFZ2fPdo_kW0q1NK1LLZSI7fZNIwvgM']", "event_id=$COQuVmW8z0geXFZ2fPdo_kW0q1NK1LLZSI7fZNIwvgM,depth=10,body=Historical 89 (batch=0),prevs=['$04k2wOdm-L41JDQ73KPpDe1njG3L9FYjOsNVamFsDQ0']", "event_id=$04k2wOdm-L41JDQ73KPpDe1njG3L9FYjOsNVamFsDQ0,depth=10,body=Historical 88 (batch=0),prevs=['$70BsBGyeg4Pf3VhGhXjtk4N5XKNNSit4vjnrirwTmic']", "event_id=$70BsBGyeg4Pf3VhGhXjtk4N5XKNNSit4vjnrirwTmic,depth=10,body=Historical 87 (batch=0),prevs=['$Y_6AET6hMZY42rPki4r0GxMfs8E_TCJyepNL1Vfwnh4']", "event_id=$Y_6AET6hMZY42rPki4r0GxMfs8E_TCJyepNL1Vfwnh4,depth=10,body=Historical 86 (batch=0),prevs=['$jSk0bO6PFUcqxHsjTvFPyJZoXL4N9nEGex7I-yyvBj8']", "event_id=$jSk0bO6PFUcqxHsjTvFPyJZoXL4N9nEGex7I-yyvBj8,depth=10,body=Historical 85 (batch=0),prevs=['$wPV_BB23RIolkHz0yGTvwt1-TJbqpGPqZc33vR5DUMw']", "event_id=$wPV_BB23RIolkHz0yGTvwt1-TJbqpGPqZc33vR5DUMw,depth=10,body=Historical 84 (batch=0),prevs=['$X2FJ_Jmoy5sx9u0UWTN1bkds6diO0uDxxRGco4r9Msw']", "event_id=$X2FJ_Jmoy5sx9u0UWTN1bkds6diO0uDxxRGco4r9Msw,depth=10,body=Historical 83 (batch=0),prevs=['$jrmzKa3aDmpZg5HgDHQqQ_lgl9L4rDWiWk7xd1igfAE']", "event_id=$jrmzKa3aDmpZg5HgDHQqQ_lgl9L4rDWiWk7xd1igfAE,depth=10,body=Historical 82 (batch=0),prevs=['$8Ov9djlzX3ixwCWaULf6-SJNBuV8eeslfBVvt5b1Uds']", "event_id=$8Ov9djlzX3ixwCWaULf6-SJNBuV8eeslfBVvt5b1Uds,depth=10,body=Historical 81 (batch=0),prevs=['$Xflk7Gara-ISOAblxE3Vh5qbRDxJal8NLWSD6BC3wfY']", "event_id=$Xflk7Gara-ISOAblxE3Vh5qbRDxJal8NLWSD6BC3wfY,depth=10,body=Historical 80 (batch=0),prevs=['$cd4vq5xB0kqHa7HziMmI1U5njSwDr6NOz0qkvCrUeiA']", "event_id=$cd4vq5xB0kqHa7HziMmI1U5njSwDr6NOz0qkvCrUeiA,depth=10,body=Historical 79 (batch=0),prevs=['$xblwBKeHnnXlcK9nusq0mWw712br9ySr3elpt5vqYG8']", "event_id=$xblwBKeHnnXlcK9nusq0mWw712br9ySr3elpt5vqYG8,depth=10,body=Historical 78 (batch=0),prevs=['$cAnav2OUp8QsmQRbPc-PcX6A4rRW9UUsGFEGb1lgSyI']", "event_id=$cAnav2OUp8QsmQRbPc-PcX6A4rRW9UUsGFEGb1lgSyI,depth=10,body=Historical 77 (batch=0),prevs=['$3janYqE-QmrenClP9K2jxr-TM2AvG5H-kcgkM6F9UgA']", "event_id=$3janYqE-QmrenClP9K2jxr-TM2AvG5H-kcgkM6F9UgA,depth=10,body=Historical 76 (batch=0),prevs=['$UqMZVF90ZR0kzwt5q2noHEGCS8UFp_IAN4EbiMYVl4o']", "event_id=$UqMZVF90ZR0kzwt5q2noHEGCS8UFp_IAN4EbiMYVl4o,depth=10,body=Historical 75 (batch=0),prevs=['$D6IL83wLN4nkNwwikin8olaPEZCpzUh7-1PAOdD7HSY']", "event_id=$D6IL83wLN4nkNwwikin8olaPEZCpzUh7-1PAOdD7HSY,depth=10,body=Historical 74 (batch=0),prevs=['$MWVNPcLeQ_INUKlCdoOaySGlGeP5WwDt0myistqnTVk']", "event_id=$MWVNPcLeQ_INUKlCdoOaySGlGeP5WwDt0myistqnTVk,depth=10,body=Historical 73 (batch=0),prevs=['$Co1eNgILBaxBR_Gt7Vz_tIs0LutTc9CPvtYRyElNMj8']", "event_id=$Co1eNgILBaxBR_Gt7Vz_tIs0LutTc9CPvtYRyElNMj8,depth=10,body=Historical 72 (batch=0),prevs=['$ahJyDyEY1Oo8OAkrMZAeYDeTWNzx0B36GsQxcZWchFE']", "event_id=$ahJyDyEY1Oo8OAkrMZAeYDeTWNzx0B36GsQxcZWchFE,depth=10,body=Historical 71 (batch=0),prevs=['$JfGy5KmG-3KFMJFfcHsci6YvhvLq9pWok6GgcZlaXX8']", "event_id=$JfGy5KmG-3KFMJFfcHsci6YvhvLq9pWok6GgcZlaXX8,depth=10,body=Historical 70 (batch=0),prevs=['$VEfiWOEPPyCpN9z305k4IJySpEj5tl4InJb5mhFs7EA']", "event_id=$VEfiWOEPPyCpN9z305k4IJySpEj5tl4InJb5mhFs7EA,depth=10,body=Historical 69 (batch=0),prevs=['$FZswJ09DFvJSzGDFiA3JIdLuzFnG_TfObeEOES1fVTA']", "event_id=$FZswJ09DFvJSzGDFiA3JIdLuzFnG_TfObeEOES1fVTA,depth=10,body=Historical 68 (batch=0),prevs=['$fnzsLJ8duVIJ_S74QIozUaEwOp4G7j7fN5yRV-bzDHQ']", "event_id=$fnzsLJ8duVIJ_S74QIozUaEwOp4G7j7fN5yRV-bzDHQ,depth=10,body=Historical 67 (batch=0),prevs=['$vBWTNsLkJHP9bK0L1Wx3HdND6Rh8HhPmxBtXP7klwvE']", "event_id=$vBWTNsLkJHP9bK0L1Wx3HdND6Rh8HhPmxBtXP7klwvE,depth=10,body=Historical 66 (batch=0),prevs=['$y5PGi9vY9UcTEwTzeE18JB6-bjhf1GVeSEOph4ko-SE']", "event_id=$y5PGi9vY9UcTEwTzeE18JB6-bjhf1GVeSEOph4ko-SE,depth=10,body=Historical 65 (batch=0),prevs=['$7pM8GWYvhzxZ3zGkc76RLtsw7w02IT-sAiV8JkKeQCU']", "event_id=$7pM8GWYvhzxZ3zGkc76RLtsw7w02IT-sAiV8JkKeQCU,depth=10,body=Historical 64 (batch=0),prevs=['$wKaeK8hHhwLb1zHZaRJFE1C-n2kOtWnf076sNeWqaMc']", "event_id=$wKaeK8hHhwLb1zHZaRJFE1C-n2kOtWnf076sNeWqaMc,depth=10,body=Historical 63 (batch=0),prevs=['$8OHC-bni0whbdGfis5cfWL3--JACd0fx-pUBs5LuBXM']", "event_id=$8OHC-bni0whbdGfis5cfWL3--JACd0fx-pUBs5LuBXM,depth=10,body=Historical 62 (batch=0),prevs=['$nZ1WO04o85g1lkLQYlb72bZfx195YwuA-mQ3SsLEsuI']", "event_id=$nZ1WO04o85g1lkLQYlb72bZfx195YwuA-mQ3SsLEsuI,depth=10,body=Historical 61 (batch=0),prevs=['$rdA6ksM5YpvBrm9vHREi-d7x5AJCPZzrAAqxNN5I2ms']", "event_id=$rdA6ksM5YpvBrm9vHREi-d7x5AJCPZzrAAqxNN5I2ms,depth=10,body=Historical 60 (batch=0),prevs=['$v-3prgvdtmX8ySVzXby4Lw6zrncPxKdiLrDnM94y3K4']", "event_id=$v-3prgvdtmX8ySVzXby4Lw6zrncPxKdiLrDnM94y3K4,depth=10,body=Historical 59 (batch=0),prevs=['$ahhYJXd-_gSSZ_mROiEvBI37nYgYCi4WsrvG8sfEQ6g']", "event_id=$ahhYJXd-_gSSZ_mROiEvBI37nYgYCi4WsrvG8sfEQ6g,depth=10,body=Historical 58 (batch=0),prevs=['$JHFT5ugpxzbU6MmFJZ3wXN0hK33EX3-9As0MemxjsX0']", "event_id=$JHFT5ugpxzbU6MmFJZ3wXN0hK33EX3-9As0MemxjsX0,depth=10,body=Historical 57 (batch=0),prevs=['$JYLcmedG2JdmtBQnnXbIEI34QyeaJrMGLAJt_Mi_vuY']", "event_id=$JYLcmedG2JdmtBQnnXbIEI34QyeaJrMGLAJt_Mi_vuY,depth=10,body=Historical 56 (batch=0),prevs=['$nFeaEcPilAYFoXcid_5mz5jDEqIyvTZLYX4erRf2jlM']", "event_id=$nFeaEcPilAYFoXcid_5mz5jDEqIyvTZLYX4erRf2jlM,depth=10,body=Historical 55 (batch=0),prevs=['$ig5cEWTkkNG-ox84UUjG35G6WEHu-qpqAwYKQSOwHxQ']", "event_id=$ig5cEWTkkNG-ox84UUjG35G6WEHu-qpqAwYKQSOwHxQ,depth=10,body=Historical 54 (batch=0),prevs=['$2MDm0b5ztPCyLlAThSBT38Ie3F0ZG3sAE0r3to3yhtc']", "event_id=$2MDm0b5ztPCyLlAThSBT38Ie3F0ZG3sAE0r3to3yhtc,depth=10,body=Historical 53 (batch=0),prevs=['$gZ_e1ws9bh_uKzY4UskhVCO4IFikMi-waYooJbSJAOI']", "event_id=$gZ_e1ws9bh_uKzY4UskhVCO4IFikMi-waYooJbSJAOI,depth=10,body=Historical 52 (batch=0),prevs=['$pOBdCutp83GhxQkP9_tCyYeaN7YvEo0fuf5MiRsu_ew']", "event_id=$pOBdCutp83GhxQkP9_tCyYeaN7YvEo0fuf5MiRsu_ew,depth=10,body=Historical 51 (batch=0),prevs=['$HCC02P42Zfcn-vYaZrhQu1IdHOMVQrsc6XsfIGPrWNE']", "event_id=$HCC02P42Zfcn-vYaZrhQu1IdHOMVQrsc6XsfIGPrWNE,depth=10,body=Historical 50 (batch=0),prevs=['$R4sIu0EhjycD9c7qHb1Sfdw34PqzIFOiTzCsPs_Z-Fk']", "event_id=$R4sIu0EhjycD9c7qHb1Sfdw34PqzIFOiTzCsPs_Z-Fk,depth=10,body=Historical 49 (batch=0),prevs=['$Gdo7IK1ZUK3pHvHvOETByF8J_PLqVP6BDk1qI_3AZ90']", "event_id=$Gdo7IK1ZUK3pHvHvOETByF8J_PLqVP6BDk1qI_3AZ90,depth=10,body=Historical 48 (batch=0),prevs=['$VJ5qpulzoG4vkhkWnbwpgtBMHAMQSmhUB76lrI_2KXc']", "event_id=$VJ5qpulzoG4vkhkWnbwpgtBMHAMQSmhUB76lrI_2KXc,depth=10,body=Historical 47 (batch=0),prevs=['$-h0DqfdRxhEi6RMiaoKEHqzRDqA5Qy8ZskPGAp5rUI0']", "event_id=$-h0DqfdRxhEi6RMiaoKEHqzRDqA5Qy8ZskPGAp5rUI0,depth=10,body=Historical 46 (batch=0),prevs=['$_L-oB9rLJsXOnJkjnUQNDUL1C3jw2-YNbTBDiKG958Q']", "event_id=$_L-oB9rLJsXOnJkjnUQNDUL1C3jw2-YNbTBDiKG958Q,depth=10,body=Historical 45 (batch=0),prevs=['$iQeszxPn42bPGUQknk5TCYIWxVqnGDG-h4Bk1JFH_-4']", "event_id=$iQeszxPn42bPGUQknk5TCYIWxVqnGDG-h4Bk1JFH_-4,depth=10,body=Historical 44 (batch=0),prevs=['$raFQyAel7PT5FUymMwF3SIOXxuMpX7aJyRFDfsyFTpo']", "event_id=$raFQyAel7PT5FUymMwF3SIOXxuMpX7aJyRFDfsyFTpo,depth=10,body=Historical 43 (batch=0),prevs=['$bYfZTSvNiI6nOE7NvnwsaJBlDt8eGifjNWiBoOd-4JQ']", "event_id=$bYfZTSvNiI6nOE7NvnwsaJBlDt8eGifjNWiBoOd-4JQ,depth=10,body=Historical 42 (batch=0),prevs=['$iflmqGrv2ERr58y8QWMW8cAYjIwctoOqc_S8Zpxf2LE']", "event_id=$iflmqGrv2ERr58y8QWMW8cAYjIwctoOqc_S8Zpxf2LE,depth=10,body=Historical 41 (batch=0),prevs=['$84bhYzG__UwK-Rc1T4UGKzQUcLGOTMB9cWikG6Np_hM']", "event_id=$84bhYzG__UwK-Rc1T4UGKzQUcLGOTMB9cWikG6Np_hM,depth=10,body=Historical 40 (batch=0),prevs=['$xqS8HMfkjk_9j2aHTisNv8oeocOq38nxxU8MrepqgMU']", "event_id=$xqS8HMfkjk_9j2aHTisNv8oeocOq38nxxU8MrepqgMU,depth=10,body=Historical 39 (batch=0),prevs=['$EChQxibb9iOk8GqNnZRL3dQ0EfcXJpbl942hiEf1uxo']", "event_id=$EChQxibb9iOk8GqNnZRL3dQ0EfcXJpbl942hiEf1uxo,depth=10,body=Historical 38 (batch=0),prevs=['$bzBMsp8mhPIhbhv8RBjSP8zwvJ6ix2HYoEeourkeRJE']", "event_id=$bzBMsp8mhPIhbhv8RBjSP8zwvJ6ix2HYoEeourkeRJE,depth=10,body=Historical 37 (batch=0),prevs=['$lDNy6ds20jjt077dA_f9nFJrAu6reMkA-a9cZaW6cpI']", "event_id=$lDNy6ds20jjt077dA_f9nFJrAu6reMkA-a9cZaW6cpI,depth=10,body=Historical 36 (batch=0),prevs=['$yMQY-EQTPizmc_vOIsWIcY-Acp7dBNJew27NlfDHduo']", "event_id=$yMQY-EQTPizmc_vOIsWIcY-Acp7dBNJew27NlfDHduo,depth=10,body=Historical 35 (batch=0),prevs=['$Y9NaQWbjDCwqc8TxXbCduEp2FWGNDwUA_ElYWF148T8']", "event_id=$Y9NaQWbjDCwqc8TxXbCduEp2FWGNDwUA_ElYWF148T8,depth=10,body=Historical 34 (batch=0),prevs=['$_if1SSgNjDIT1YefAeoQ0z2aH6pe2jynJUDraBZOCjc']", "event_id=$_if1SSgNjDIT1YefAeoQ0z2aH6pe2jynJUDraBZOCjc,depth=10,body=Historical 33 (batch=0),prevs=['$_JgwcI13PE8e2MuebXSLBIyd_MaIorlEZtF_z_uIVHo']", "event_id=$_JgwcI13PE8e2MuebXSLBIyd_MaIorlEZtF_z_uIVHo,depth=10,body=Historical 32 (batch=0),prevs=['$CRZ4J3otfULJ7Iw1MaAQvIeakECsoZQs1Ehh-xeaC_k']", "event_id=$CRZ4J3otfULJ7Iw1MaAQvIeakECsoZQs1Ehh-xeaC_k,depth=10,body=Historical 31 (batch=0),prevs=['$qaIW_OYBoW7I03J5DnXMEJkRMKtnOOO5ttQjniYzHnI']", "event_id=$qaIW_OYBoW7I03J5DnXMEJkRMKtnOOO5ttQjniYzHnI,depth=10,body=Historical 30 (batch=0),prevs=['$9VNFmDvQZiJo50kQf1qPTAQyCPsdHjx23qC0DT34wY0']", "event_id=$9VNFmDvQZiJo50kQf1qPTAQyCPsdHjx23qC0DT34wY0,depth=10,body=Historical 29 (batch=0),prevs=['$gFbNkb60KtE_SzR9oi16tpkKE9k4hijC2A6dt66Al4k']", "event_id=$gFbNkb60KtE_SzR9oi16tpkKE9k4hijC2A6dt66Al4k,depth=10,body=Historical 28 (batch=0),prevs=['$Mq-dMSTxKEwHS3gPvYwU-Obqs3-Tp3SWjRPUq2VDvJg']", "event_id=$Mq-dMSTxKEwHS3gPvYwU-Obqs3-Tp3SWjRPUq2VDvJg,depth=10,body=Historical 27 (batch=0),prevs=['$5EC7XJFkyMLzhujTLI_m9UXNEnn-4NKZj-DCouBgyt8']", "event_id=$5EC7XJFkyMLzhujTLI_m9UXNEnn-4NKZj-DCouBgyt8,depth=10,body=Historical 26 (batch=0),prevs=['$UxlPuejLS1jHkUNCMRe2vNmc2tcuut-G2N8q6RFTfVQ']", "event_id=$UxlPuejLS1jHkUNCMRe2vNmc2tcuut-G2N8q6RFTfVQ,depth=10,body=Historical 25 (batch=0),prevs=['$BNqTjoMam0rgznru1lqk1U8cCqlJRA4GwORVLBeFYTU']", "event_id=$BNqTjoMam0rgznru1lqk1U8cCqlJRA4GwORVLBeFYTU,depth=10,body=Historical 24 (batch=0),prevs=['$7SdwqRy6BHBnNb_sQtj2Rph9era4F68ZM6mGPzjJthA']", "event_id=$7SdwqRy6BHBnNb_sQtj2Rph9era4F68ZM6mGPzjJthA,depth=10,body=Historical 23 (batch=0),prevs=['$HiveBkhuxr5NIgx9wGhfrmttnGOi0VpeesmqtjpkqE4']", "event_id=$HiveBkhuxr5NIgx9wGhfrmttnGOi0VpeesmqtjpkqE4,depth=10,body=Historical 22 (batch=0),prevs=['$m7iJp9s_Stodzk1I1Lf8egDLbIdyvgJPjAxomFguHb4']", "event_id=$m7iJp9s_Stodzk1I1Lf8egDLbIdyvgJPjAxomFguHb4,depth=10,body=Historical 21 (batch=0),prevs=['$KRSKtmQJHl80Hn7UR3emVczq4-QgKycVop3lkSeppIs']", "event_id=$KRSKtmQJHl80Hn7UR3emVczq4-QgKycVop3lkSeppIs,depth=10,body=Historical 20 (batch=0),prevs=['$ZCZKQrCeoKEC6oKqwjhK_8EQqwq75b7fEop-GgYCTiU']", "event_id=$ZCZKQrCeoKEC6oKqwjhK_8EQqwq75b7fEop-GgYCTiU,depth=10,body=Historical 19 (batch=0),prevs=['$9pZEcOtzeGWbnmRS_zAQAhdCIE8DZ6_cJAR0tm2AG1k']", "event_id=$9pZEcOtzeGWbnmRS_zAQAhdCIE8DZ6_cJAR0tm2AG1k,depth=10,body=Historical 18 (batch=0),prevs=['$FFXzuSEKR53_PpJ6EBGp8FEBi5Aig2MIq-F1opufK10']", "event_id=$FFXzuSEKR53_PpJ6EBGp8FEBi5Aig2MIq-F1opufK10,depth=10,body=Historical 17 (batch=0),prevs=['$hr0MEvZerIeshwxLzAGbqMXQGOXU_u9H1wUPtzqa6gY']", "event_id=$hr0MEvZerIeshwxLzAGbqMXQGOXU_u9H1wUPtzqa6gY,depth=10,body=Historical 16 (batch=0),prevs=['$hjP2ryNx-yqznW3EKrBZE1DaGSbvK8lB_3cWUDDGTVk']", "event_id=$hjP2ryNx-yqznW3EKrBZE1DaGSbvK8lB_3cWUDDGTVk,depth=10,body=Historical 15 (batch=0),prevs=['$E24xBJItDTqteFc11s2kk2JuvpQjZ2fd-vEWJ7QS784']", "event_id=$E24xBJItDTqteFc11s2kk2JuvpQjZ2fd-vEWJ7QS784,depth=10,body=Historical 14 (batch=0),prevs=['$eOuhBaOKitgobbzyU5XVieMLzgoCITj9eG-ewSMDB9I']", "event_id=$eOuhBaOKitgobbzyU5XVieMLzgoCITj9eG-ewSMDB9I,depth=10,body=Historical 13 (batch=0),prevs=['$4F4Xhu7QKpLM-n_aFmCLrKRP6zw7lrDLAYFTPKn8jYo']", "event_id=$4F4Xhu7QKpLM-n_aFmCLrKRP6zw7lrDLAYFTPKn8jYo,depth=10,body=Historical 12 (batch=0),prevs=['$e0wqSykOpA8Y0yipT9643SVVQ3DMOnW11yxvqAj9JrE']", "event_id=$e0wqSykOpA8Y0yipT9643SVVQ3DMOnW11yxvqAj9JrE,depth=10,body=Historical 11 (batch=0),prevs=['$u5gxt1eo59odS1XqVS-kV2GcaitrzorppcqVj3scevI']", "event_id=$u5gxt1eo59odS1XqVS-kV2GcaitrzorppcqVj3scevI,depth=10,body=Historical 10 (batch=0),prevs=['$Z1E79_z_FcAnw4G9NOvx3WhyL2ACyYPgy6zLhomKVks']", "event_id=$Z1E79_z_FcAnw4G9NOvx3WhyL2ACyYPgy6zLhomKVks,depth=10,body=Historical 9 (batch=0),prevs=['$USyjgdL7fwX4BvmblVs2cicGO3WDRB-ox56298yZmAA']", "event_id=$USyjgdL7fwX4BvmblVs2cicGO3WDRB-ox56298yZmAA,depth=10,body=Historical 8 (batch=0),prevs=['$tggu2uQEL2BWVJiwqsqwCJUca3SEFeylCAbPw5DrIdo']", "event_id=$tggu2uQEL2BWVJiwqsqwCJUca3SEFeylCAbPw5DrIdo,depth=10,body=Historical 7 (batch=0),prevs=['$M0iINx5TifxVTJ6pz-lio7xIeSla1RIV_N9WIK5sf4c']", "event_id=$M0iINx5TifxVTJ6pz-lio7xIeSla1RIV_N9WIK5sf4c,depth=10,body=Historical 6 (batch=0),prevs=['$hEs6MN2xyjXqHRcWV53_23NeNm--al9b_5BnMYYDPMY']", "event_id=$hEs6MN2xyjXqHRcWV53_23NeNm--al9b_5BnMYYDPMY,depth=10,body=Historical 5 (batch=0),prevs=['$L0BJu5ZOx2l8AMivXMBCqrvymVu_nCQM4ZO23uv5tBU']", "event_id=$L0BJu5ZOx2l8AMivXMBCqrvymVu_nCQM4ZO23uv5tBU,depth=10,body=Historical 4 (batch=0),prevs=['$V5BZU5LpCbW6XvPLGls0QGRbo98uuf1KpMZ0fg8k_-o']"] new implementation backfill events(100)=[ "event_id=$V5BZU5LpCbW6XvPLGls0QGRbo98uuf1KpMZ0fg8k_-o,depth=10,body=Historical 3 (batch=0),prevs=['$lF2XlCU-QmhC0YF8Wn8Q-EfQXqDxF_usgPIZ28rNhII']", "event_id=$lF2XlCU-QmhC0YF8Wn8Q-EfQXqDxF_usgPIZ28rNhII,depth=10,body=Historical 2 (batch=0),prevs=['$Ghnwsm4CqaWopAMGjKm8piFuTEqSaWgl1X7esubZOco']", "event_id=$Ghnwsm4CqaWopAMGjKm8piFuTEqSaWgl1X7esubZOco,depth=10,body=Historical 1 (batch=0),prevs=['$WRV7zPTr4KknEcYUvPlfxMdxcmlrQSsXAfTTlSBpjko']", "event_id=$WRV7zPTr4KknEcYUvPlfxMdxcmlrQSsXAfTTlSBpjko,depth=10,body=Historical 0 (batch=0),prevs=['$30VmrbC0AauqzWQ9F2g_KTPUdJhH_r79olOmfZYe0Pg']", "event_id=$30VmrbC0AauqzWQ9F2g_KTPUdJhH_r79olOmfZYe0Pg,depth=10,body=org.matrix.msc2716.insertion,prevs=['$an2lLzRXApWnCuRNrb1jTfMY9lWbz909AxyIRWy5-C0']", "event_id=$CmUx1RcXqn9QwVSKFKGmwVXwEFE9sHpnac5xvCgm-Co,depth=10,body=org.matrix.msc2716.batch,prevs=['$Kt2_lR2GlI6YoDY7q1ttaoIUv_s8WsjKgce6PT8utIA']", "event_id=$Kt2_lR2GlI6YoDY7q1ttaoIUv_s8WsjKgce6PT8utIA,depth=10,body=Historical 99 (batch=1),prevs=['$eaZav6ZipscONXQMtzaWQrTmiw9yo6_ifeaiI2VTJJM']", "event_id=$eaZav6ZipscONXQMtzaWQrTmiw9yo6_ifeaiI2VTJJM,depth=10,body=Historical 98 (batch=1),prevs=['$sSIhVeKMkHlwvQmCdUxouZyc0A7oBK_IcTYYPhQHniY']", "event_id=$sSIhVeKMkHlwvQmCdUxouZyc0A7oBK_IcTYYPhQHniY,depth=10,body=Historical 97 (batch=1),prevs=['$tQ6z1P58SRR3qqS6UnjWn85zP6CoU882ehdCkNcwhA8']", "event_id=$tQ6z1P58SRR3qqS6UnjWn85zP6CoU882ehdCkNcwhA8,depth=10,body=Historical 96 (batch=1),prevs=['$mh2o2cBnVjU5uQr0UAMPAWPpGsRzIKKpzc54Q2Z9JGY']", "event_id=$mh2o2cBnVjU5uQr0UAMPAWPpGsRzIKKpzc54Q2Z9JGY,depth=10,body=Historical 95 (batch=1),prevs=['$O_SPFvep-jjnHsW2484GzRYIDZkJhZYVgmCstDR3Qn0']", "event_id=$O_SPFvep-jjnHsW2484GzRYIDZkJhZYVgmCstDR3Qn0,depth=10,body=Historical 94 (batch=1),prevs=['$MkZU6zPnF5L4Mb9Y0AkOlWSG6S5yKhyVsFPm_WPc7EU']", "event_id=$MkZU6zPnF5L4Mb9Y0AkOlWSG6S5yKhyVsFPm_WPc7EU,depth=10,body=Historical 93 (batch=1),prevs=['$lts-3h-4-93qb12QQ8PwkvSu4bSJVKulvj_N2UPRhvs']", "event_id=$lts-3h-4-93qb12QQ8PwkvSu4bSJVKulvj_N2UPRhvs,depth=10,body=Historical 92 (batch=1),prevs=['$abLL8g1D70BE9EqtgSGoVjtbVK6QKG0KZhgBeJmEAuw']", "event_id=$abLL8g1D70BE9EqtgSGoVjtbVK6QKG0KZhgBeJmEAuw,depth=10,body=Historical 91 (batch=1),prevs=['$H2-ZSHXeJII1Jc3XvEjFfmjGUZgpsmlay7YuKKlF7JQ']", "event_id=$H2-ZSHXeJII1Jc3XvEjFfmjGUZgpsmlay7YuKKlF7JQ,depth=10,body=Historical 90 (batch=1),prevs=['$VsrkCzgyKUyPT4XpS0pa-K-CwRJ7MeNu9ictxOHPZo4']", "event_id=$VsrkCzgyKUyPT4XpS0pa-K-CwRJ7MeNu9ictxOHPZo4,depth=10,body=Historical 89 (batch=1),prevs=['$NSVKzqK5ucXWwWq7j4yfADj5m-8GqWFCWeqo2hvUDWg']", "event_id=$NSVKzqK5ucXWwWq7j4yfADj5m-8GqWFCWeqo2hvUDWg,depth=10,body=Historical 88 (batch=1),prevs=['$y5lPUOYBw0g12eIgDUdTn5QnmXD2QeFks61Ivpha1XA']", "event_id=$y5lPUOYBw0g12eIgDUdTn5QnmXD2QeFks61Ivpha1XA,depth=10,body=Historical 87 (batch=1),prevs=['$BdehWtfVHIxjlD3-pWh-6kcb1w2oPhAhgCWxNUkKAD8']", "event_id=$BdehWtfVHIxjlD3-pWh-6kcb1w2oPhAhgCWxNUkKAD8,depth=10,body=Historical 86 (batch=1),prevs=['$B0afYanjEywl1q1fYru7nL2RmjjZpF0gjTVQ7n9fY6k']", "event_id=$B0afYanjEywl1q1fYru7nL2RmjjZpF0gjTVQ7n9fY6k,depth=10,body=Historical 85 (batch=1),prevs=['$F2cOk9oSDKNSlrMoJioWtLi8AdhMkS-QpVwZw2qhWgM']", "event_id=$F2cOk9oSDKNSlrMoJioWtLi8AdhMkS-QpVwZw2qhWgM,depth=10,body=Historical 84 (batch=1),prevs=['$QGCvZWFsgW4KTskjaSqxjTenOa3TbbpBDWVyXESXDGI']", "event_id=$QGCvZWFsgW4KTskjaSqxjTenOa3TbbpBDWVyXESXDGI,depth=10,body=Historical 83 (batch=1),prevs=['$5evMhA_hLx02z7ydMm_cRfSUsC9OX0MJLayMydLrzN0']", "event_id=$5evMhA_hLx02z7ydMm_cRfSUsC9OX0MJLayMydLrzN0,depth=10,body=Historical 82 (batch=1),prevs=['$4jmsKkx-6NRVMCflN_s6sOfC053Zw1wOfCCM0bWlIX8']", "event_id=$4jmsKkx-6NRVMCflN_s6sOfC053Zw1wOfCCM0bWlIX8,depth=10,body=Historical 81 (batch=1),prevs=['$sTtp0fKA9BG4GDkJ8CUBQVNqpDDcf_a-PkGhJkGqzrw']", "event_id=$sTtp0fKA9BG4GDkJ8CUBQVNqpDDcf_a-PkGhJkGqzrw,depth=10,body=Historical 80 (batch=1),prevs=['$96go7TqZzPq75eOOLiDY0FkylHrmAGly0MTwX4_McMs']", "event_id=$96go7TqZzPq75eOOLiDY0FkylHrmAGly0MTwX4_McMs,depth=10,body=Historical 79 (batch=1),prevs=['$kUiJWkPHn1nPrUDzZJvWhTz1T8UBVELMlEdJGOEB_Qo']", "event_id=$kUiJWkPHn1nPrUDzZJvWhTz1T8UBVELMlEdJGOEB_Qo,depth=10,body=Historical 78 (batch=1),prevs=['$5GCJZEmchNvy0ikP7K7ZlvFb9FS6aVKRedx-08ZAF3s']", "event_id=$5GCJZEmchNvy0ikP7K7ZlvFb9FS6aVKRedx-08ZAF3s,depth=10,body=Historical 77 (batch=1),prevs=['$A3r2LkTKaHxBQh1Ailt4dCtaESyo53Ss1u1s3QiE_k0']", "event_id=$A3r2LkTKaHxBQh1Ailt4dCtaESyo53Ss1u1s3QiE_k0,depth=10,body=Historical 76 (batch=1),prevs=['$R7RRyXo6Msc3-368vZpiG84DfqOAiKLiSbV0ECGv6Oo']", "event_id=$R7RRyXo6Msc3-368vZpiG84DfqOAiKLiSbV0ECGv6Oo,depth=10,body=Historical 75 (batch=1),prevs=['$jmkYlZN6-oiQOi-bJfs0whVS7kKeJJUN6YJQfubbaEU']", "event_id=$jmkYlZN6-oiQOi-bJfs0whVS7kKeJJUN6YJQfubbaEU,depth=10,body=Historical 74 (batch=1),prevs=['$IuGVv6TZj5ZoOP4HHqZsKPWQ9HDTHS4QV7oFHk4kl7o']", "event_id=$IuGVv6TZj5ZoOP4HHqZsKPWQ9HDTHS4QV7oFHk4kl7o,depth=10,body=Historical 73 (batch=1),prevs=['$sNds_pVGUFf41AJwHmA1Sy7MUX1N9MtHGfw9ye6EItM']", "event_id=$sNds_pVGUFf41AJwHmA1Sy7MUX1N9MtHGfw9ye6EItM,depth=10,body=Historical 72 (batch=1),prevs=['$u0R7zUNs4BpneteeM7PWgGZE4XzQJuAwUtFSQGwTNI0']", "event_id=$u0R7zUNs4BpneteeM7PWgGZE4XzQJuAwUtFSQGwTNI0,depth=10,body=Historical 71 (batch=1),prevs=['$VthGtqcIDd04US7pMNTr0gSmR7PIOPLmDOljx1qNleQ']", "event_id=$VthGtqcIDd04US7pMNTr0gSmR7PIOPLmDOljx1qNleQ,depth=10,body=Historical 70 (batch=1),prevs=['$qYlzglfNCN1Br-4UkajF4c-SKYI0By5gExUxczeDsg4']", "event_id=$qYlzglfNCN1Br-4UkajF4c-SKYI0By5gExUxczeDsg4,depth=10,body=Historical 69 (batch=1),prevs=['$BsxtZqcTIs1aqFRsXtapKXYF6w2GiUEswpl6kH6S_4o']", "event_id=$BsxtZqcTIs1aqFRsXtapKXYF6w2GiUEswpl6kH6S_4o,depth=10,body=Historical 68 (batch=1),prevs=['$PmOzgtCDJCyGowxB8gWcYbfA2KUX0MCJ3xTE48FhAyY']", "event_id=$PmOzgtCDJCyGowxB8gWcYbfA2KUX0MCJ3xTE48FhAyY,depth=10,body=Historical 67 (batch=1),prevs=['$vZq13XMjZRJdFefxBg0L1U9shZq1TbArtR43YkQG7i8']", "event_id=$vZq13XMjZRJdFefxBg0L1U9shZq1TbArtR43YkQG7i8,depth=10,body=Historical 66 (batch=1),prevs=['$526Rm38ribM0XL3HltTbjVCUa3cDUdBh0V-9AMPtsYY']", "event_id=$526Rm38ribM0XL3HltTbjVCUa3cDUdBh0V-9AMPtsYY,depth=10,body=Historical 65 (batch=1),prevs=['$-sPB5z3uyN5tr4KezPKt6xu_yoaqgSDKhkzTJ0Um7yI']", "event_id=$-sPB5z3uyN5tr4KezPKt6xu_yoaqgSDKhkzTJ0Um7yI,depth=10,body=Historical 64 (batch=1),prevs=['$y5mX5SlScEAyPbUx1UA2-_ml0abbh0nGFYgJzF0XzZ0']", "event_id=$y5mX5SlScEAyPbUx1UA2-_ml0abbh0nGFYgJzF0XzZ0,depth=10,body=Historical 63 (batch=1),prevs=['$erT1ogi6tp9tF33_N3ZBLpIMBXOunszhADMYjs8U5AE']", "event_id=$erT1ogi6tp9tF33_N3ZBLpIMBXOunszhADMYjs8U5AE,depth=10,body=Historical 62 (batch=1),prevs=['$d9qTo4Ray7GJWJPm-CHkfZmZC0WmD8-K6Z1Riyot3fw']", "event_id=$d9qTo4Ray7GJWJPm-CHkfZmZC0WmD8-K6Z1Riyot3fw,depth=10,body=Historical 61 (batch=1),prevs=['$TVgdscs2fWtvx0ak6V0ljabU_4cc6UH18Y1r3OvDLYc']", "event_id=$TVgdscs2fWtvx0ak6V0ljabU_4cc6UH18Y1r3OvDLYc,depth=10,body=Historical 60 (batch=1),prevs=['$eMi_3TfsgYw2Fs99AJpr_9KAapZq3uwtv2fUDQ2weTc']", "event_id=$eMi_3TfsgYw2Fs99AJpr_9KAapZq3uwtv2fUDQ2weTc,depth=10,body=Historical 59 (batch=1),prevs=['$UBL_4l45DMvVTE12gyRaSDMw_fi_biDx1dL_gWz_v-g']", "event_id=$UBL_4l45DMvVTE12gyRaSDMw_fi_biDx1dL_gWz_v-g,depth=10,body=Historical 58 (batch=1),prevs=['$A6-UMuau--ffdnOA0NqWQK9uO0NBCB_rwVCvu_hSAOw']", "event_id=$A6-UMuau--ffdnOA0NqWQK9uO0NBCB_rwVCvu_hSAOw,depth=10,body=Historical 57 (batch=1),prevs=['$Eia3O1ptxGtKhINTEVX_FxBU3TjL0MBp1XdeFCStcCg']", "event_id=$Eia3O1ptxGtKhINTEVX_FxBU3TjL0MBp1XdeFCStcCg,depth=10,body=Historical 56 (batch=1),prevs=['$Pb5OyRAHNrXDyPt8F1dTWVGi-Zvqkf-HqzCwcu0salY']", "event_id=$Pb5OyRAHNrXDyPt8F1dTWVGi-Zvqkf-HqzCwcu0salY,depth=10,body=Historical 55 (batch=1),prevs=['$c1KPN_U0VF8G3jGnD6UpQob_h9X_KQfXv4lciJUb4PQ']", "event_id=$c1KPN_U0VF8G3jGnD6UpQob_h9X_KQfXv4lciJUb4PQ,depth=10,body=Historical 54 (batch=1),prevs=['$fwWir53iSGaGEMRt-mQe6hYmkwIQGjDojFO08zlzgX4']", "event_id=$fwWir53iSGaGEMRt-mQe6hYmkwIQGjDojFO08zlzgX4,depth=10,body=Historical 53 (batch=1),prevs=['$TmacWSD6J8y8vERZfA-IdRpDeMS_4A1HFNUBg_W5MaA']", "event_id=$TmacWSD6J8y8vERZfA-IdRpDeMS_4A1HFNUBg_W5MaA,depth=10,body=Historical 52 (batch=1),prevs=['$_xBvb_mIUJgyvA-rgBppyjXVZ3SXn03isQ0axMOtThc']", "event_id=$_xBvb_mIUJgyvA-rgBppyjXVZ3SXn03isQ0axMOtThc,depth=10,body=Historical 51 (batch=1),prevs=['$AYD-cd4lrJLbW35mCK8dKZEZI705gOsA1iiyW3uqDWk']", "event_id=$AYD-cd4lrJLbW35mCK8dKZEZI705gOsA1iiyW3uqDWk,depth=10,body=Historical 50 (batch=1),prevs=['$KK1BXAZ3E-U7qKWhO6Y6DfXedQ04BhwuA_bNI5Nee8g']", "event_id=$KK1BXAZ3E-U7qKWhO6Y6DfXedQ04BhwuA_bNI5Nee8g,depth=10,body=Historical 49 (batch=1),prevs=['$_oLPTYdg92q_l3c6vxV4tO9ioR9nPrk1lyb2qiLIPOk']", "event_id=$_oLPTYdg92q_l3c6vxV4tO9ioR9nPrk1lyb2qiLIPOk,depth=10,body=Historical 48 (batch=1),prevs=['$RZ0Mzvftn99rU6XFl32jKHs0R6YYaMPy_HJy41tK4ig']", "event_id=$RZ0Mzvftn99rU6XFl32jKHs0R6YYaMPy_HJy41tK4ig,depth=10,body=Historical 47 (batch=1),prevs=['$mloZAG1tKqfjIBQhJk8Mn7M5ER84cJ7bC0TW3rOsKpU']", "event_id=$mloZAG1tKqfjIBQhJk8Mn7M5ER84cJ7bC0TW3rOsKpU,depth=10,body=Historical 46 (batch=1),prevs=['$RY6YWeTkTc8aQP6ZkPq_vL0qs1kg53QeDcKmrLi0ASw']", "event_id=$RY6YWeTkTc8aQP6ZkPq_vL0qs1kg53QeDcKmrLi0ASw,depth=10,body=Historical 45 (batch=1),prevs=['$5A2e6qJj9n9ejvUW6TcVkm6-cuHghnRKQDW3T3a4eqI']", "event_id=$5A2e6qJj9n9ejvUW6TcVkm6-cuHghnRKQDW3T3a4eqI,depth=10,body=Historical 44 (batch=1),prevs=['$KDMWnrkM1ezjDXlSrXjvhZS-RRuJc6_hrX-Msh0gP7o']", "event_id=$KDMWnrkM1ezjDXlSrXjvhZS-RRuJc6_hrX-Msh0gP7o,depth=10,body=Historical 43 (batch=1),prevs=['$9Q6SSr_YMaQ6oc9w7lYs-bRw0cNESLPTE45Fg2_Zzv4']", "event_id=$9Q6SSr_YMaQ6oc9w7lYs-bRw0cNESLPTE45Fg2_Zzv4,depth=10,body=Historical 42 (batch=1),prevs=['$b2QYayqOOaCxQqs655Tve2OKeXrT9CJeVIchVUn00hI']", "event_id=$b2QYayqOOaCxQqs655Tve2OKeXrT9CJeVIchVUn00hI,depth=10,body=Historical 41 (batch=1),prevs=['$xQQBp4Dwm-3lxCWkJsck9bXuQoCKsdPIortpc79nTtw']", "event_id=$xQQBp4Dwm-3lxCWkJsck9bXuQoCKsdPIortpc79nTtw,depth=10,body=Historical 40 (batch=1),prevs=['$RgTwPhV6RzdVBmuqoYBNblHPZ-qpYuTCshDBrfpwvds']", "event_id=$RgTwPhV6RzdVBmuqoYBNblHPZ-qpYuTCshDBrfpwvds,depth=10,body=Historical 39 (batch=1),prevs=['$RWyv_RX_EoNzCDnHSCfx6fDnJrhWl46iWYomZSS2GsQ']", "event_id=$RWyv_RX_EoNzCDnHSCfx6fDnJrhWl46iWYomZSS2GsQ,depth=10,body=Historical 38 (batch=1),prevs=['$Sj78rQcr9DuCnhtcYuZjWS6o1ZiFDd3ZQxDtJKxjeNg']", "event_id=$Sj78rQcr9DuCnhtcYuZjWS6o1ZiFDd3ZQxDtJKxjeNg,depth=10,body=Historical 37 (batch=1),prevs=['$KePkqP3sJgnC8wVMyRFd0v7Fsu-8LiLTZbhx0JsWfnY']", "event_id=$KePkqP3sJgnC8wVMyRFd0v7Fsu-8LiLTZbhx0JsWfnY,depth=10,body=Historical 36 (batch=1),prevs=['$QdyPRKCNju-Rzm1u530j3N7AEcRFA5mAO3sVbBZunkQ']", "event_id=$QdyPRKCNju-Rzm1u530j3N7AEcRFA5mAO3sVbBZunkQ,depth=10,body=Historical 35 (batch=1),prevs=['$dd3HF7wo7YRhA6e0j5I9eGZtaH1rMK7dAriiNTQd7-Y']", "event_id=$dd3HF7wo7YRhA6e0j5I9eGZtaH1rMK7dAriiNTQd7-Y,depth=10,body=Historical 34 (batch=1),prevs=['$l6q9M7E_yWQID_v4YZMYZfNevVqhK0Pm6qIDABcx7ag']", "event_id=$l6q9M7E_yWQID_v4YZMYZfNevVqhK0Pm6qIDABcx7ag,depth=10,body=Historical 33 (batch=1),prevs=['$GAdAhlUMns9H8jeNH6yrfNp1c4_-lhQKyBnmAkQNZ5A']", "event_id=$GAdAhlUMns9H8jeNH6yrfNp1c4_-lhQKyBnmAkQNZ5A,depth=10,body=Historical 32 (batch=1),prevs=['$6liLB-IPWtTynCd27je2vCoHfDK_S3pwlRe5GoUsLY8']", "event_id=$6liLB-IPWtTynCd27je2vCoHfDK_S3pwlRe5GoUsLY8,depth=10,body=Historical 31 (batch=1),prevs=['$9C_ywoPrhy68x4-MurcodrKje7FOw5GuG9yMJ6W2ObM']", "event_id=$9C_ywoPrhy68x4-MurcodrKje7FOw5GuG9yMJ6W2ObM,depth=10,body=Historical 30 (batch=1),prevs=['$QyvDk3PQ4dEDxHb0ihFqAwWpqH-NQ86GV_w5njGNOSY']", "event_id=$QyvDk3PQ4dEDxHb0ihFqAwWpqH-NQ86GV_w5njGNOSY,depth=10,body=Historical 29 (batch=1),prevs=['$c2ee8wW8tAvev6UB1yOZMA56j-BFcxZJQqPbXxiqmeg']", "event_id=$c2ee8wW8tAvev6UB1yOZMA56j-BFcxZJQqPbXxiqmeg,depth=10,body=Historical 28 (batch=1),prevs=['$psirWB7VB7WUVg7IjiG1XX2WMOAiN_FFcdo8crQ3KF0']", "event_id=$psirWB7VB7WUVg7IjiG1XX2WMOAiN_FFcdo8crQ3KF0,depth=10,body=Historical 27 (batch=1),prevs=['$AMyblWA-Jny9Nobd-ghaFit6ZDG6OOg3rTxwEIn6zkQ']", "event_id=$AMyblWA-Jny9Nobd-ghaFit6ZDG6OOg3rTxwEIn6zkQ,depth=10,body=Historical 26 (batch=1),prevs=['$EtDL8yGxA03ZO5pTeztYOQvHxUaMCCz9Nj7oGgM0CL8']", "event_id=$EtDL8yGxA03ZO5pTeztYOQvHxUaMCCz9Nj7oGgM0CL8,depth=10,body=Historical 25 (batch=1),prevs=['$zmqDOTVLDS_khXDmPiPvJ7745wI_yuEAsxS_fyDroIo']", "event_id=$zmqDOTVLDS_khXDmPiPvJ7745wI_yuEAsxS_fyDroIo,depth=10,body=Historical 24 (batch=1),prevs=['$lMcNfcLZKCtmrGISzGo3M_lxJapImAmdky9Lezij6TI']", "event_id=$lMcNfcLZKCtmrGISzGo3M_lxJapImAmdky9Lezij6TI,depth=10,body=Historical 23 (batch=1),prevs=['$i4vpa73GXD7wZB2r2t-8nV--pGq_GdKCotM2-wpVJ10']", "event_id=$i4vpa73GXD7wZB2r2t-8nV--pGq_GdKCotM2-wpVJ10,depth=10,body=Historical 22 (batch=1),prevs=['$9BA_P9uGh-cLjLau5nToTmpsnxdaPkT2RjMf9ZYglNI']", "event_id=$9BA_P9uGh-cLjLau5nToTmpsnxdaPkT2RjMf9ZYglNI,depth=10,body=Historical 21 (batch=1),prevs=['$ySO1DPJTl_IiFwv4d6jbNy43a1go9rD_86A2EPIGVZE']", "event_id=$ySO1DPJTl_IiFwv4d6jbNy43a1go9rD_86A2EPIGVZE,depth=10,body=Historical 20 (batch=1),prevs=['$XOyDGiQEhlEoT8UGl4HIod5Zb29R3Op2BFIaVeiCRyY']", "event_id=$XOyDGiQEhlEoT8UGl4HIod5Zb29R3Op2BFIaVeiCRyY,depth=10,body=Historical 19 (batch=1),prevs=['$XDqJHr3kmi2Z9hpNX2Vz7o524cLEMkjKwk-_V2A3U1U']", "event_id=$XDqJHr3kmi2Z9hpNX2Vz7o524cLEMkjKwk-_V2A3U1U,depth=10,body=Historical 18 (batch=1),prevs=['$hlXaBZDCY4XSg-qjuzkAWhYqoBpdX8ehIFZd5M_9rRM']", "event_id=$hlXaBZDCY4XSg-qjuzkAWhYqoBpdX8ehIFZd5M_9rRM,depth=10,body=Historical 17 (batch=1),prevs=['$LMLomCl3U7zYXi4sIHHDnfznJkns0-DLRDGMbhJ1AAo']", "event_id=$LMLomCl3U7zYXi4sIHHDnfznJkns0-DLRDGMbhJ1AAo,depth=10,body=Historical 16 (batch=1),prevs=['$d0pjrKi7Ws1QM-XyB1_d9Yrg0AGY8MD6JLJqrnaaoJM']", "event_id=$d0pjrKi7Ws1QM-XyB1_d9Yrg0AGY8MD6JLJqrnaaoJM,depth=10,body=Historical 15 (batch=1),prevs=['$hx2dBPyOh6KnEzdpw7CnvoSOkiYTKJp82MXaWA_77N0']", "event_id=$hx2dBPyOh6KnEzdpw7CnvoSOkiYTKJp82MXaWA_77N0,depth=10,body=Historical 14 (batch=1),prevs=['$-y0R8tPCI4n4L0t2j4Ua3QIfau7n4kqPXgnQjb4e1qk']", "event_id=$-y0R8tPCI4n4L0t2j4Ua3QIfau7n4kqPXgnQjb4e1qk,depth=10,body=Historical 13 (batch=1),prevs=['$Af85_d8_UkHp8r8KzOnjCCI0whyMQB7U3G5u8NlMk4I']", "event_id=$Af85_d8_UkHp8r8KzOnjCCI0whyMQB7U3G5u8NlMk4I,depth=10,body=Historical 12 (batch=1),prevs=['$1nQJC9ULuCVGYWU_umE58wq5wHmmFx2GdgF6nmQt5VU']", "event_id=$1nQJC9ULuCVGYWU_umE58wq5wHmmFx2GdgF6nmQt5VU,depth=10,body=Historical 11 (batch=1),prevs=['$SdAp6h2pjv7aIxDSZJxwQbolYrqzUGf1gHOeBnqAULU']", "event_id=$SdAp6h2pjv7aIxDSZJxwQbolYrqzUGf1gHOeBnqAULU,depth=10,body=Historical 10 (batch=1),prevs=['$YDMkSKMoYDtwMd-nKvgAIk38uzc1UejIyBvXNoJ_RFM']", "event_id=$YDMkSKMoYDtwMd-nKvgAIk38uzc1UejIyBvXNoJ_RFM,depth=10,body=Historical 9 (batch=1),prevs=['$a8nvUo95cOf-cO4hcgfO1sT0kyWKq8XPUDmJe0DbxNE']", "event_id=$a8nvUo95cOf-cO4hcgfO1sT0kyWKq8XPUDmJe0DbxNE,depth=10,body=Historical 8 (batch=1),prevs=['$LunwOlIdMiDjrcQazQ6I3n0T8aLfmSA24fIEeth1hHs']", "event_id=$LunwOlIdMiDjrcQazQ6I3n0T8aLfmSA24fIEeth1hHs,depth=10,body=Historical 7 (batch=1),prevs=['$UOvHOp4SKtZqSDWZvD0s3VGdU23-6YqiWy_742cqY-8']", "event_id=$UOvHOp4SKtZqSDWZvD0s3VGdU23-6YqiWy_742cqY-8,depth=10,body=Historical 6 (batch=1),prevs=['$W-c9syEfTTufHnNBpuMUIk621ecs9W6bTeLW0OgWFfY']\n"] new implementation backfill events(17)=[ "event_id=$W-c9syEfTTufHnNBpuMUIk621ecs9W6bTeLW0OgWFfY,depth=10,body=Historical 5 (batch=1),prevs=['$Gq8-foARgSoEzcEwXhOsiey2pefpCX3vjnCLtsIdvU0']", "event_id=$Gq8-foARgSoEzcEwXhOsiey2pefpCX3vjnCLtsIdvU0,depth=10,body=Historical 4 (batch=1),prevs=['$toIu3hsoBDXkVJs91nUG0hodf3nuFkbidY7ztVPrtCM']", "event_id=$toIu3hsoBDXkVJs91nUG0hodf3nuFkbidY7ztVPrtCM,depth=10,body=Historical 3 (batch=1),prevs=['$SRcKQ-ggRujFzHbJC44rWvslb4YBKgieWahZakx3zMw']", "event_id=$SRcKQ-ggRujFzHbJC44rWvslb4YBKgieWahZakx3zMw,depth=10,body=Historical 2 (batch=1),prevs=['$Pgqvjn_rkQb6eEdQDeKclO-l_ODlnplF7S-rT1i_-64']", "event_id=$Pgqvjn_rkQb6eEdQDeKclO-l_ODlnplF7S-rT1i_-64,depth=10,body=Historical 1 (batch=1),prevs=['$jEW2Smmgh5tIh1QMtEqmluy-o3FrcVhNAH5e3R4jMoo']", "event_id=$jEW2Smmgh5tIh1QMtEqmluy-o3FrcVhNAH5e3R4jMoo,depth=10,body=Historical 0 (batch=1),prevs=['$1bbHCIrQLToHvum-PGxpIKhCmZ5MYLusAy_gJm7IWTw']", "event_id=$1bbHCIrQLToHvum-PGxpIKhCmZ5MYLusAy_gJm7IWTw,depth=10,body=org.matrix.msc2716.insertion,prevs=['$758TrMVR2MlE4EhpRUvCYZFcp11cbE27edQcwKaUSI8']", "event_id=$T5-OUXAW3CCYAHskqk5s0V2ZzJoQy5D8lQJ7LZFWvTI,depth=9,body=Message 1 (eventIDsBefore),prevs=['$WYEMee2J5PcH1yM2XDnD2XIhyMS_HHDdYXop6BOL69E']", "event_id=$WYEMee2J5PcH1yM2XDnD2XIhyMS_HHDdYXop6BOL69E,depth=8,body=Message 0 (eventIDsBefore),prevs=['$jTD4_ZLQQgXienBOsHMwSqyuuYtXQq4WYj7hnPbRC0o']", "event_id=$jTD4_ZLQQgXienBOsHMwSqyuuYtXQq4WYj7hnPbRC0o,depth=7,body=m.room.member,prevs=['$d9EI1fE4ArZLYeQPs3P6dlqTmtskwjJWtlfNEZTdSoA']", "event_id=$d9EI1fE4ArZLYeQPs3P6dlqTmtskwjJWtlfNEZTdSoA,depth=6,body=m.room.name,prevs=['$VvA6Hhp67rDLix7Pg9tZs6DDDh0gq_Mw-qr9wd5cO2k']", "event_id=$VvA6Hhp67rDLix7Pg9tZs6DDDh0gq_Mw-qr9wd5cO2k,depth=5,body=m.room.history_visibility,prevs=['$7fkTDT3EgbweX381_HInASE-jAzhxcQfkzOWuXxXO7U']", "event_id=$7fkTDT3EgbweX381_HInASE-jAzhxcQfkzOWuXxXO7U,depth=4,body=m.room.join_rules,prevs=['$uvO6_KB-CJtOtomqSgSAWLL59mJK0JW2U33ebFTuoK4']", "event_id=$uvO6_KB-CJtOtomqSgSAWLL59mJK0JW2U33ebFTuoK4,depth=3,body=m.room.power_levels,prevs=['$HjRQ8RpCPPWO4Ufq6bSCTqa_tSlKez15KvZRMpvvskc']", "event_id=$HjRQ8RpCPPWO4Ufq6bSCTqa_tSlKez15KvZRMpvvskc,depth=2,body=m.room.member,prevs=['$yXLeDjVhdX5jmjLVxmsxKhG_rjfA9ujL8Cd8eHNha6U']", 'event_id=$758TrMVR2MlE4EhpRUvCYZFcp11cbE27edQcwKaUSI8,depth=1,body=m.room.member,prevs=[]', 'event_id=$yXLeDjVhdX5jmjLVxmsxKhG_rjfA9ujL8Cd8eHNha6U,depth=1,body=m.room.create,prevs=[]' ] ``` --- synapse/federation/federation_client.py | 84 ++++++++--- synapse/handlers/federation.py | 184 +++++++++++++++++------- 2 files changed, 196 insertions(+), 72 deletions(-) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 2ab4dec88fe6..eafffff2bcc5 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -264,6 +264,62 @@ async def backfill( return pdus + async def get_pdu_from_destination_raw( + self, + destination: str, + event_id: str, + room_version: RoomVersion, + outlier: bool = False, + timeout: Optional[int] = None, + ) -> Optional[EventBase]: + """Requests the PDU with given origin and ID from the remote home + server. + + Does not have any caching or rate limiting! + + Args: + destination: Which homeserver to query + event_id: event to fetch + room_version: version of the room + outlier: Indicates whether the PDU is an `outlier`, i.e. if + it's from an arbitrary point in the context as opposed to part + of the current block of PDUs. Defaults to `False` + timeout: How long to try (in ms) each destination for before + moving to the next destination. None indicates no timeout. + + Returns: + The requested PDU, or None if we were unable to find it. + + Raises: + SynapseError, NotRetryingDestination, FederationDeniedError + """ + + signed_pdu = None + + transaction_data = await self.transport_layer.get_event( + destination, event_id, timeout=timeout + ) + + logger.info( + "retrieved event id %s from %s: %r", + event_id, + destination, + transaction_data, + ) + + pdu_list: List[EventBase] = [ + event_from_pdu_json(p, room_version, outlier=outlier) + for p in transaction_data["pdus"] + ] + + if pdu_list and pdu_list[0]: + pdu = pdu_list[0] + + # Check signatures are correct. + signed_pdu = await self._check_sigs_and_hash(room_version, pdu) + + return signed_pdu + async def get_pdu( self, destinations: Iterable[str], @@ -308,30 +364,14 @@ async def get_pdu( continue try: - transaction_data = await self.transport_layer.get_event( - destination, event_id, timeout=timeout - ) - - logger.debug( - "retrieved event id %s from %s: %r", - event_id, - destination, - transaction_data, + signed_pdu = await self.get_pdu_from_destination_raw( + destination=destination, + event_id=event_id, + room_version=room_version, + outlier=outlier, + timeout=timeout, ) - pdu_list: List[EventBase] = [ - event_from_pdu_json(p, room_version, outlier=outlier) - for p in transaction_data["pdus"] - ] - - if pdu_list and pdu_list[0]: - pdu = pdu_list[0] - - # Check signatures are correct. - signed_pdu = await self._check_sigs_and_hash(room_version, pdu) - - break - pdu_attempts[destination] = now except SynapseError as e: diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index e28c74daf00a..c2163c7e53ff 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -62,6 +62,7 @@ from synapse.util.async_helpers import Linearizer from synapse.util.retryutils import NotRetryingDestination from synapse.visibility import filter_events_for_server +from synapse.storage.databases.main.event_federation import BackfillQueueNavigationItem if TYPE_CHECKING: from synapse.server import HomeServer @@ -1043,8 +1044,16 @@ async def get_state_ids_for_pdu(self, room_id: str, event_id: str) -> List[str]: return [] async def get_backfill_events( - self, room_id: str, event_id_list: list, limit: int + self, origin: str, room_id: str, event_id_list: list, limit: int ) -> List[EventBase]: + logger.info( + "get_backfill_events(room_id=%s): seeding backfill with event_id_list=%s limit=%s origin=%s", + room_id, + event_id_list, + limit, + origin, + ) + event_id_results = set() # In a PriorityQueue, the lowest valued entries are retrieved first. @@ -1054,8 +1063,20 @@ async def get_backfill_events( # negative depth so that we process the newest-in-time messages first # going backwards in time. stream_ordering follows the same pattern. queue = PriorityQueue() - seed_events = await self.store.get_events_as_list(event_id_list) + logger.info( + "get_backfill_events(room_id=%s): seed_events=%s", + room_id, + [ + BackfillQueueNavigationItem( + depth=seed_event.depth, + stream_ordering=seed_event.internal_metadata.stream_ordering, + event_id=seed_event.event_id, + type=seed_event.type, + ) + for seed_event in seed_events + ], + ) for seed_event in seed_events: # Make sure the seed event actually pertains to this room. We also # need to make sure the depth is available since our whole DAG @@ -1079,8 +1100,7 @@ async def get_backfill_events( if event_id in event_id_results: continue - event_id_results.add(event_id) - + found_undiscovered_connected_historical_messages = False if self.hs.config.experimental.msc2716_enabled: # Try and find any potential historical batches of message history. # @@ -1093,8 +1113,10 @@ async def get_backfill_events( event_id, limit - len(event_id_results) ) ) - logger.debug( - "_get_backfill_events: connected_insertion_event_backfill_results=%s", + logger.info( + "get_backfill_events(room_id=%s): connected_insertion_event_backfill_results(%s)=%s", + room_id, + event_id, connected_insertion_event_backfill_results, ) for ( @@ -1104,15 +1126,63 @@ async def get_backfill_events( connected_insertion_event_backfill_item.event_id not in event_id_results ): - queue.put( - ( - -connected_insertion_event_backfill_item.depth, - -connected_insertion_event_backfill_item.stream_ordering, + # Check whether the insertion event is already on the + # federating homeserver we're trying to send backfill + # events to + room_version = await self.store.get_room_version(room_id) + event_exists_on_remote_server = None + try: + # Because of the nature of backfill giving events to + # the federated homeserver in one chunk and then we + # can possibly query about that same event in the + # next chunk, we need to avoid getting a cached + # response. We want to know *now* whether they have + # backfilled the insertion event. + event_exists_on_remote_server = await self.federation_client.get_pdu_from_destination_raw( + origin, connected_insertion_event_backfill_item.event_id, - connected_insertion_event_backfill_item.type, + room_version=room_version, + outlier=True, + timeout=10000, ) + except Exception as e: + logger.info( + "get_backfill_events(room_id=%s): Failed to fetch insertion event_id=%s from origin=%s but we're just going to assume it's not backfilled there yet. error=%s", + room_id, + connected_insertion_event_backfill_item.event_id, + origin, + e, + ) + + logger.info( + "get_backfill_events(room_id=%s): checked if insertion event_id=%s exists on federated homeserver(origin=%s) already? event_exists_on_remote_server=%s", + room_id, + connected_insertion_event_backfill_item.event_id, + origin, + event_exists_on_remote_server, ) + # If the event is already on the federated homeserver, + # we don't need to try to branch off onto this + # historical chain of messages. Below, we will instead + # just go up the `prev_events` as normal. + # + # This is important so that the first time we backfill + # the federated homeserver, we jump off and go down the + # historical branch. But after the historical branch is + # exhausted and the event comes up again in backfill, we + # will choose the "live" DAG. + if not event_exists_on_remote_server: + found_undiscovered_connected_historical_messages = True + queue.put( + ( + -connected_insertion_event_backfill_item.depth, + -connected_insertion_event_backfill_item.stream_ordering, + connected_insertion_event_backfill_item.event_id, + connected_insertion_event_backfill_item.type, + ) + ) + # Second, we need to go and try to find any batch events connected # to a given insertion event (by batch_id). If we find any, we'll # add them to the queue and navigate up the DAG like normal in the @@ -1123,8 +1193,10 @@ async def get_backfill_events( event_id, limit - len(event_id_results) ) ) - logger.debug( - "_get_backfill_events: connected_batch_event_backfill_results %s", + logger.info( + "get_backfill_events(room_id=%s): connected_batch_event_backfill_results(%s)=%s", + room_id, + event_id, connected_batch_event_backfill_results, ) for ( @@ -1143,28 +1215,39 @@ async def get_backfill_events( ) ) - # Now we just look up the DAG by prev_events as normal - connected_prev_event_backfill_results = ( - await self.store.get_connected_prev_event_backfill_results( - event_id, limit - len(event_id_results) + # If we found a historical branch of history off of the message lets + # navigate down that in the next iteration of the loop instead of + # the normal prev_event chain. + if not found_undiscovered_connected_historical_messages: + event_id_results.add(event_id) + + # Now we just look up the DAG by prev_events as normal + connected_prev_event_backfill_results = ( + await self.store.get_connected_prev_event_backfill_results( + event_id, limit - len(event_id_results) + ) ) - ) - logger.debug( - "_get_backfill_events: prev_event_ids %s", - connected_prev_event_backfill_results, - ) - for ( - connected_prev_event_backfill_item - ) in connected_prev_event_backfill_results: - if connected_prev_event_backfill_item.event_id not in event_id_results: - queue.put( - ( - -connected_prev_event_backfill_item.depth, - -connected_prev_event_backfill_item.stream_ordering, - connected_prev_event_backfill_item.event_id, - connected_prev_event_backfill_item.type, + logger.info( + "get_backfill_events(room_id=%s): connected_prev_event_backfill_results(%s)=%s", + room_id, + event_id, + connected_prev_event_backfill_results, + ) + for ( + connected_prev_event_backfill_item + ) in connected_prev_event_backfill_results: + if ( + connected_prev_event_backfill_item.event_id + not in event_id_results + ): + queue.put( + ( + -connected_prev_event_backfill_item.depth, + -connected_prev_event_backfill_item.stream_ordering, + connected_prev_event_backfill_item.event_id, + connected_prev_event_backfill_item.type, + ) ) - ) events = await self.store.get_events_as_list(event_id_results) return sorted( @@ -1182,24 +1265,25 @@ async def on_backfill_request( # Synapse asks for 100 events per backfill request. Do not allow more. limit = min(limit, 100) - events = await self.store.get_backfill_events(room_id, pdu_list, limit) - logger.info( - "old implementation backfill events=%s", - [ - "event_id=%s,depth=%d,body=%s,prevs=%s\n" - % ( - event.event_id, - event.depth, - event.content.get("body", event.type), - event.prev_event_ids(), - ) - for event in events - ], - ) - - events = await self.get_backfill_events(room_id, pdu_list, limit) + # events = await self.store.get_backfill_events(room_id, pdu_list, limit) + # logger.info( + # "old implementation backfill events=%s", + # [ + # "event_id=%s,depth=%d,body=%s,prevs=%s\n" + # % ( + # event.event_id, + # event.depth, + # event.content.get("body", event.type), + # event.prev_event_ids(), + # ) + # for event in events + # ], + # ) + + events = await self.get_backfill_events(origin, room_id, pdu_list, limit) logger.info( - "new implementation backfill events=%s", + "new implementation backfill events(%d)=%s", + len(events), [ "event_id=%s,depth=%d,body=%s,prevs=%s\n" % ( From 76d454f81dbfe71da063bc33b17dc09870ee53fd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sat, 30 Oct 2021 00:46:02 -0500 Subject: [PATCH 25/47] Some backfill receive sorting fixes but not using it yet --- synapse/handlers/federation_event.py | 104 +++++++++++++-------------- 1 file changed, 51 insertions(+), 53 deletions(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index c9060a594f09..f219a0f42a74 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -702,41 +702,37 @@ async def generateEventIdGraphFromEvents( event.event_id ) - # # Maybe we can get lucky and save ourselves a lookup - # # by checking the events in the backfill first - # insertion_event = event_map[ - # insertion_event_id - # ] or await self._store.get_event( - # insertion_event_id, allow_none=True - # ) - - # if insertion_event: - # # Connect the insertion events' `prev_event` successors - # # via fake edges pointing to the insertion event itself - # # so the insertion event sorts topologically - # # behind-in-time the successor. Nestled perfectly - # # between the prev_event and the successor. - # for insertion_prev_event_id in insertion_event.prev_event_ids(): - # successor_event_ids = successor_event_id_map[ - # insertion_prev_event_id - # ] - # logger.info( - # "insertion_event_id=%s successor_event_ids=%s", - # insertion_event_id, - # successor_event_ids, - # ) - # if successor_event_ids: - # for successor_event_id in successor_event_ids: - # # Don't add itself back as a successor - # if successor_event_id != insertion_event_id: - # # Fake edge to point the successor back - # # at the insertion event - # event_id_graph.setdefault( - # successor_event_id, [] - # ).append(insertion_event_id) - - # TODO: We also need to add fake edges to connect the oldest-in-time messages - # in the batch to the event we branched off of, see https://github.com/matrix-org/synapse/pull/11114#discussion_r739300985 + # Maybe we can get lucky and save ourselves a lookup + # by checking the events in the backfill first + insertion_event = event_map[ + insertion_event_id + ] or await self._store.get_event( + insertion_event_id, allow_none=True + ) + + if insertion_event: + # Connect the insertion events' `prev_event` successors + # via fake edges pointing to the insertion event itself + # so the insertion event sorts topologically + # behind-in-time the successor. Nestled perfectly + # between the prev_event and the successor. + for insertion_prev_event_id in insertion_event.prev_event_ids(): + successor_event_ids = successor_event_id_map[ + insertion_prev_event_id + ] + if successor_event_ids: + for successor_event_id in successor_event_ids: + # Don't add itself back as a successor + if successor_event_id != insertion_event_id: + # Fake edge to point the successor back + # at the insertion event + event_id_graph.setdefault( + successor_event_id, [] + ).append(insertion_event_id) + + # TODO: We also need to add fake edges to connect insertion events -> to + # the base event in the "live" DAG we branched off of, see scenario 2 + # https://github.com/matrix-org/synapse/pull/11114#discussion_r739300985 return event_id_graph @@ -774,6 +770,9 @@ async def _process_pulled_events( # We want to sort these by depth so we process them and # tell clients about them in order. sorted_events = sorted(events, key=lambda x: x.depth) + for ev in sorted_events: + with nested_logging_context(ev.event_id): + await self._process_pulled_event(origin, ev, backfilled=backfilled) # # We want to sort topologically so we process them and tell clients # # about them in order. @@ -783,25 +782,24 @@ async def _process_pulled_events( # event_id_graph = await self.generateEventIdGraphFromEvents(events) # for event_id in sorted_topologically(event_ids, event_id_graph): # sorted_events.append(event_map[event_id]) - # sorted_events = reversed(sorted_events) - - logger.info( - "backfill sorted_events=%s", - [ - "event_id=%s,depth=%d,body=%s,prevs=%s\n" - % ( - event.event_id, - event.depth, - event.content.get("body", event.type), - event.prev_event_ids(), - ) - for event in sorted_events - ], - ) - for ev in sorted_events: - with nested_logging_context(ev.event_id): - await self._process_pulled_event(origin, ev, backfilled=backfilled) + # logger.info( + # "backfill sorted_events=%s", + # [ + # "event_id=%s,depth=%d,body=%s,prevs=%s\n" + # % ( + # event.event_id, + # event.depth, + # event.content.get("body", event.type), + # event.prev_event_ids(), + # ) + # for event in reversed(sorted_events) + # ], + # ) + + # for ev in reversed(sorted_events): + # with nested_logging_context(ev.event_id): + # await self._process_pulled_event(origin, ev, backfilled=backfilled) async def _process_pulled_event( self, origin: str, event: EventBase, backfilled: bool From 35294493104d3c6b3d0f1e186fcf7e356527216f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sat, 30 Oct 2021 01:07:26 -0500 Subject: [PATCH 26/47] Fix lints --- synapse/handlers/federation.py | 4 ++-- synapse/storage/databases/main/event_federation.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index c2163c7e53ff..e7570310c58f 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -16,8 +16,8 @@ """Contains handlers for federation events.""" import logging -from queue import Empty, PriorityQueue from http import HTTPStatus +from queue import Empty, PriorityQueue from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Union from signedjson.key import decode_verify_key_bytes @@ -57,12 +57,12 @@ ReplicationCleanRoomRestServlet, ReplicationStoreRoomOnOutlierMembershipRestServlet, ) +from synapse.storage.databases.main.event_federation import BackfillQueueNavigationItem from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.types import JsonDict, StateMap, get_domain_from_id from synapse.util.async_helpers import Linearizer from synapse.util.retryutils import NotRetryingDestination from synapse.visibility import filter_events_for_server -from synapse.storage.databases.main.event_federation import BackfillQueueNavigationItem if TYPE_CHECKING: from synapse.server import HomeServer diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index a569e8146ab1..8107cfa53af7 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -14,7 +14,7 @@ import itertools import logging from queue import Empty, PriorityQueue -from typing import Collection, Dict, Iterable, List, Optional, Set, Tuple, NamedTuple +from typing import Collection, Dict, Iterable, List, NamedTuple, Optional, Set, Tuple from prometheus_client import Counter, Gauge From 321f9ea68b4b87da91dc96c22f7c466a91d46c68 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 2 Nov 2021 17:27:04 -0500 Subject: [PATCH 27/47] Move back to the old get_backfill_events and simplify backfill. We now rely on the marker events to backfill the base insertion event which puts it as a insertion event extremity. This functionality was already in place (see `handle_marker_event`) and was an easy transition. This way, remote federated homeserver will have the insertion extremity to ask about in backfill and goes down the historical branch no problem because of the depth order and the rest of the DAG navigation happens as normal. Yay simplification! The key breakthrough was discussing all the ways we can find connected insertion events. https://docs.google.com/document/d/1KCEmpnGr4J-I8EeaVQ8QJZKBDu53ViI7V62y5BzfXr0/edit#bookmark=id.1hbt9acs963h The three options we came up were: - Find by insertion event prev_events (this is what we were doing before) - Find connected insertion events by depth - Find connected insertion events by the marker event - This made the most sense since we already backfill the insertion event when a marker event is processed (see `handle_marker_event`). - Gets rid of the extra insertion event lookup in backfill because we know it's already backfilled from the marker processing. - And gets rid of the extra federated lookup we added in this to PR to ask whether the homeserver requesting backfill already has the insertion event (deciding whether we fork to the history branch before we go down the "live" DAG) --- synapse/handlers/federation.py | 36 +++--- .../databases/main/event_federation.py | 118 ++++++++++-------- 2 files changed, 87 insertions(+), 67 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index e7570310c58f..21c615432a6d 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1265,25 +1265,9 @@ async def on_backfill_request( # Synapse asks for 100 events per backfill request. Do not allow more. limit = min(limit, 100) - # events = await self.store.get_backfill_events(room_id, pdu_list, limit) - # logger.info( - # "old implementation backfill events=%s", - # [ - # "event_id=%s,depth=%d,body=%s,prevs=%s\n" - # % ( - # event.event_id, - # event.depth, - # event.content.get("body", event.type), - # event.prev_event_ids(), - # ) - # for event in events - # ], - # ) - - events = await self.get_backfill_events(origin, room_id, pdu_list, limit) + events = await self.store.get_backfill_events(room_id, pdu_list, limit) logger.info( - "new implementation backfill events(%d)=%s", - len(events), + "old implementation backfill events=%s", [ "event_id=%s,depth=%d,body=%s,prevs=%s\n" % ( @@ -1296,6 +1280,22 @@ async def on_backfill_request( ], ) + # events = await self.get_backfill_events(origin, room_id, pdu_list, limit) + # logger.info( + # "new implementation backfill events(%d)=%s", + # len(events), + # [ + # "event_id=%s,depth=%d,body=%s,prevs=%s\n" + # % ( + # event.event_id, + # event.depth, + # event.content.get("body", event.type), + # event.prev_event_ids(), + # ) + # for event in events + # ], + # ) + events = await filter_events_for_server(self.storage, origin, events) return events diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 8107cfa53af7..5d9fae48e965 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1106,20 +1106,22 @@ def _get_connected_prev_event_backfill_results_txn(txn): _get_connected_prev_event_backfill_results_txn, ) - async def get_backfill_events(self, room_id: str, event_list: list, limit: int): + async def get_backfill_events( + self, room_id: str, seed_event_id_list: list, limit: int + ): """Get a list of Events for a given topic that occurred before (and - including) the events in event_list. Return a list of max size `limit` + including) the events in seed_event_id_list. Return a list of max size `limit` Args: room_id - event_list + seed_event_id_list limit """ event_ids = await self.db_pool.runInteraction( "get_backfill_events", self._get_backfill_events, room_id, - event_list, + seed_event_id_list, limit, ) events = await self.get_events_as_list(event_ids) @@ -1127,10 +1129,15 @@ async def get_backfill_events(self, room_id: str, event_list: list, limit: int): events, key=lambda e: (-e.depth, -e.internal_metadata.stream_ordering) ) - def _get_backfill_events(self, txn, room_id, event_list, limit): - logger.debug("_get_backfill_events: %s, %r, %s", room_id, event_list, limit) + def _get_backfill_events(self, txn, room_id, seed_event_id_list, limit): + logger.info( + "_get_backfill_events(room_id=%s): seeding backfill with seed_event_id_list=%s limit=%s", + room_id, + seed_event_id_list, + limit, + ) - event_results = set() + event_id_results = set() # We want to make sure that we do a breadth-first, "depth" ordered # search. @@ -1181,11 +1188,11 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): # going backwards in time. stream_ordering follows the same pattern. queue = PriorityQueue() - for event_id in event_list: + for seed_event_id in seed_event_id_list: event_lookup_result = self.db_pool.simple_select_one_txn( txn, table="events", - keyvalues={"event_id": event_id, "room_id": room_id}, + keyvalues={"event_id": seed_event_id, "room_id": room_id}, retcols=( "type", "depth", @@ -1194,57 +1201,66 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): allow_none=True, ) + logger.info( + "get_backfill_events(room_id=%s): seed_event_id=%s depth=%s stream_ordering=%s type=%s", + room_id, + seed_event_id, + event_lookup_result["depth"], + event_lookup_result["stream_ordering"], + event_lookup_result["type"], + ) + if event_lookup_result["depth"]: queue.put( ( -event_lookup_result["depth"], -event_lookup_result["stream_ordering"], - event_id, + seed_event_id, event_lookup_result["type"], ) ) - while not queue.empty() and len(event_results) < limit: + while not queue.empty() and len(event_id_results) < limit: try: _, _, event_id, event_type = queue.get_nowait() except Empty: break - if event_id in event_results: + if event_id in event_id_results: continue - event_results.add(event_id) + event_id_results.add(event_id) + # Try and find any potential historical batches of message history. if self.hs.config.experimental.msc2716_enabled: - # Try and find any potential historical batches of message history. - # - # First we look for an insertion event connected to the current - # event (by prev_event). If we find any, we'll add them to the queue - # and navigate up the DAG like normal in the next iteration of the - # loop. - txn.execute( - connected_insertion_event_query, - (event_id, limit - len(event_results)), - ) - connected_insertion_event_id_results = txn.fetchall() - logger.debug( - "_get_backfill_events: connected_insertion_event_query %s", - connected_insertion_event_id_results, - ) - for row in connected_insertion_event_id_results: - connected_insertion_event_depth = row[0] - connected_insertion_event_stream_ordering = row[1] - connected_insertion_event_id = row[2] - connected_insertion_event_type = row[3] - if connected_insertion_event_id not in event_results: - queue.put( - ( - -connected_insertion_event_depth, - -connected_insertion_event_stream_ordering, - connected_insertion_event_id, - connected_insertion_event_type, - ) - ) + # # First we look for an insertion event connected to the current + # # event (by prev_event). If we find any, we'll add them to the queue + # # and navigate up the DAG like normal in the next iteration of the + # # loop. + # txn.execute( + # connected_insertion_event_query, + # (event_id, limit - len(event_id_results)), + # ) + # connected_insertion_event_id_results = txn.fetchall() + # logger.debug( + # "_get_backfill_events(room_id=%s): connected_insertion_event_query %s", + # room_id, + # connected_insertion_event_id_results, + # ) + # for row in connected_insertion_event_id_results: + # connected_insertion_event_depth = row[0] + # connected_insertion_event_stream_ordering = row[1] + # connected_insertion_event_id = row[2] + # connected_insertion_event_type = row[3] + # if connected_insertion_event_id not in event_id_results: + # queue.put( + # ( + # -connected_insertion_event_depth, + # -connected_insertion_event_stream_ordering, + # connected_insertion_event_id, + # connected_insertion_event_type, + # ) + # ) # Second, we need to go and try to find any batch events connected # to a given insertion event (by batch_id). If we find any, we'll @@ -1254,31 +1270,35 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): # Find any batch connections for the given insertion event txn.execute( batch_connection_query, - (event_id, limit - len(event_results)), + (event_id, limit - len(event_id_results)), ) batch_start_event_id_results = txn.fetchall() logger.debug( - "_get_backfill_events: batch_start_event_id_results %s", + "_get_backfill_events(room_id=%s): batch_start_event_id_results %s", + room_id, batch_start_event_id_results, ) for row in batch_start_event_id_results: - if row[2] not in event_results: + if row[2] not in event_id_results: queue.put((-row[0], -row[1], row[2], row[3])) + # Now we just look up the DAG by prev_events as normal txn.execute( connected_prev_event_query, - (event_id, False, limit - len(event_results)), + (event_id, False, limit - len(event_id_results)), ) prev_event_id_results = txn.fetchall() logger.debug( - "_get_backfill_events: prev_event_ids %s", prev_event_id_results + "_get_backfill_events(room_id=%s): prev_event_ids %s", + room_id, + prev_event_id_results, ) for row in prev_event_id_results: - if row[2] not in event_results: + if row[2] not in event_id_results: queue.put((-row[0], -row[1], row[2], row[3])) - return event_results + return event_id_results async def get_missing_events(self, room_id, earliest_events, latest_events, limit): ids = await self.db_pool.runInteraction( From 15c3282be74fd31e370fa17c7bd5bb0e91f8c910 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 2 Nov 2021 18:18:56 -0500 Subject: [PATCH 28/47] Remove the new backfill implementation and pull some good parts of the refactor --- synapse/handlers/federation.py | 227 ------------- .../databases/main/event_federation.py | 297 ++++++------------ 2 files changed, 103 insertions(+), 421 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 21c615432a6d..2dc5e64a3921 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1043,217 +1043,6 @@ async def get_state_ids_for_pdu(self, room_id: str, event_id: str) -> List[str]: else: return [] - async def get_backfill_events( - self, origin: str, room_id: str, event_id_list: list, limit: int - ) -> List[EventBase]: - logger.info( - "get_backfill_events(room_id=%s): seeding backfill with event_id_list=%s limit=%s origin=%s", - room_id, - event_id_list, - limit, - origin, - ) - - event_id_results = set() - - # In a PriorityQueue, the lowest valued entries are retrieved first. - # We're using depth as the priority in the queue and tie-break based on - # stream_ordering. Depth is lowest at the oldest-in-time message and - # highest and newest-in-time message. We add events to the queue with a - # negative depth so that we process the newest-in-time messages first - # going backwards in time. stream_ordering follows the same pattern. - queue = PriorityQueue() - seed_events = await self.store.get_events_as_list(event_id_list) - logger.info( - "get_backfill_events(room_id=%s): seed_events=%s", - room_id, - [ - BackfillQueueNavigationItem( - depth=seed_event.depth, - stream_ordering=seed_event.internal_metadata.stream_ordering, - event_id=seed_event.event_id, - type=seed_event.type, - ) - for seed_event in seed_events - ], - ) - for seed_event in seed_events: - # Make sure the seed event actually pertains to this room. We also - # need to make sure the depth is available since our whole DAG - # navigation here depends on depth. - if seed_event.room_id == room_id and seed_event.depth: - queue.put( - ( - -seed_event.depth, - -seed_event.internal_metadata.stream_ordering, - seed_event.event_id, - seed_event.type, - ) - ) - - while not queue.empty() and len(event_id_results) < limit: - try: - _, _, event_id, event_type = queue.get_nowait() - except Empty: - break - - if event_id in event_id_results: - continue - - found_undiscovered_connected_historical_messages = False - if self.hs.config.experimental.msc2716_enabled: - # Try and find any potential historical batches of message history. - # - # First we look for an insertion event connected to the current - # event (by prev_event). If we find any, we'll add them to the queue - # and navigate up the DAG like normal in the next iteration of the - # loop. - connected_insertion_event_backfill_results = ( - await self.store.get_connected_insertion_event_backfill_results( - event_id, limit - len(event_id_results) - ) - ) - logger.info( - "get_backfill_events(room_id=%s): connected_insertion_event_backfill_results(%s)=%s", - room_id, - event_id, - connected_insertion_event_backfill_results, - ) - for ( - connected_insertion_event_backfill_item - ) in connected_insertion_event_backfill_results: - if ( - connected_insertion_event_backfill_item.event_id - not in event_id_results - ): - # Check whether the insertion event is already on the - # federating homeserver we're trying to send backfill - # events to - room_version = await self.store.get_room_version(room_id) - event_exists_on_remote_server = None - try: - # Because of the nature of backfill giving events to - # the federated homeserver in one chunk and then we - # can possibly query about that same event in the - # next chunk, we need to avoid getting a cached - # response. We want to know *now* whether they have - # backfilled the insertion event. - event_exists_on_remote_server = await self.federation_client.get_pdu_from_destination_raw( - origin, - connected_insertion_event_backfill_item.event_id, - room_version=room_version, - outlier=True, - timeout=10000, - ) - except Exception as e: - logger.info( - "get_backfill_events(room_id=%s): Failed to fetch insertion event_id=%s from origin=%s but we're just going to assume it's not backfilled there yet. error=%s", - room_id, - connected_insertion_event_backfill_item.event_id, - origin, - e, - ) - - logger.info( - "get_backfill_events(room_id=%s): checked if insertion event_id=%s exists on federated homeserver(origin=%s) already? event_exists_on_remote_server=%s", - room_id, - connected_insertion_event_backfill_item.event_id, - origin, - event_exists_on_remote_server, - ) - - # If the event is already on the federated homeserver, - # we don't need to try to branch off onto this - # historical chain of messages. Below, we will instead - # just go up the `prev_events` as normal. - # - # This is important so that the first time we backfill - # the federated homeserver, we jump off and go down the - # historical branch. But after the historical branch is - # exhausted and the event comes up again in backfill, we - # will choose the "live" DAG. - if not event_exists_on_remote_server: - found_undiscovered_connected_historical_messages = True - queue.put( - ( - -connected_insertion_event_backfill_item.depth, - -connected_insertion_event_backfill_item.stream_ordering, - connected_insertion_event_backfill_item.event_id, - connected_insertion_event_backfill_item.type, - ) - ) - - # Second, we need to go and try to find any batch events connected - # to a given insertion event (by batch_id). If we find any, we'll - # add them to the queue and navigate up the DAG like normal in the - # next iteration of the loop. - if event_type == EventTypes.MSC2716_INSERTION: - connected_batch_event_backfill_results = ( - await self.store.get_connected_batch_event_backfill_results( - event_id, limit - len(event_id_results) - ) - ) - logger.info( - "get_backfill_events(room_id=%s): connected_batch_event_backfill_results(%s)=%s", - room_id, - event_id, - connected_batch_event_backfill_results, - ) - for ( - connected_batch_event_backfill_item - ) in connected_batch_event_backfill_results: - if ( - connected_batch_event_backfill_item.event_id - not in event_id_results - ): - queue.put( - ( - -connected_batch_event_backfill_item.depth, - -connected_batch_event_backfill_item.stream_ordering, - connected_batch_event_backfill_item.event_id, - connected_batch_event_backfill_item.type, - ) - ) - - # If we found a historical branch of history off of the message lets - # navigate down that in the next iteration of the loop instead of - # the normal prev_event chain. - if not found_undiscovered_connected_historical_messages: - event_id_results.add(event_id) - - # Now we just look up the DAG by prev_events as normal - connected_prev_event_backfill_results = ( - await self.store.get_connected_prev_event_backfill_results( - event_id, limit - len(event_id_results) - ) - ) - logger.info( - "get_backfill_events(room_id=%s): connected_prev_event_backfill_results(%s)=%s", - room_id, - event_id, - connected_prev_event_backfill_results, - ) - for ( - connected_prev_event_backfill_item - ) in connected_prev_event_backfill_results: - if ( - connected_prev_event_backfill_item.event_id - not in event_id_results - ): - queue.put( - ( - -connected_prev_event_backfill_item.depth, - -connected_prev_event_backfill_item.stream_ordering, - connected_prev_event_backfill_item.event_id, - connected_prev_event_backfill_item.type, - ) - ) - - events = await self.store.get_events_as_list(event_id_results) - return sorted( - events, key=lambda e: (-e.depth, -e.internal_metadata.stream_ordering) - ) - @log_function async def on_backfill_request( self, origin: str, room_id: str, pdu_list: List[str], limit: int @@ -1280,22 +1069,6 @@ async def on_backfill_request( ], ) - # events = await self.get_backfill_events(origin, room_id, pdu_list, limit) - # logger.info( - # "new implementation backfill events(%d)=%s", - # len(events), - # [ - # "event_id=%s,depth=%d,body=%s,prevs=%s\n" - # % ( - # event.event_id, - # event.depth, - # event.content.get("body", event.type), - # event.prev_event_ids(), - # ) - # for event in events - # ], - # ) - events = await filter_events_for_server(self.storage, origin, events) return events diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 5d9fae48e965..299af0ded2eb 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -995,116 +995,70 @@ def get_forward_extremeties_for_room_txn(txn): "get_forward_extremeties_for_room", get_forward_extremeties_for_room_txn ) - async def get_connected_insertion_event_backfill_results( - self, event_id: str, limit: int - ) -> List[BackfillQueueNavigationItem]: - def _get_connected_insertion_event_backfill_results_txn(txn): - # Look for the "insertion" events connected to the given event_id - connected_insertion_event_query = """ - SELECT e.depth, e.stream_ordering, i.event_id, e.type FROM insertion_event_edges AS i - /* Get the depth of the insertion event from the events table */ - INNER JOIN events AS e USING (event_id) - /* Find an insertion event which points via prev_events to the given event_id */ - WHERE i.insertion_prev_event_id = ? - LIMIT ? - """ - - txn.execute( - connected_insertion_event_query, - (event_id, limit), - ) - connected_insertion_event_id_results = txn.fetchall() - return [ - BackfillQueueNavigationItem( - depth=row[0], - stream_ordering=row[1], - event_id=row[2], - type=row[3], - ) - for row in connected_insertion_event_id_results - ] + def _get_connected_batch_event_backfill_results_txn( + self, txn: LoggingTransaction, insertion_event_id: str, limit: int + ): + # Find any batch connections of a given insertion event + batch_connection_query = """ + SELECT e.depth, e.stream_ordering, c.event_id, e.type FROM insertion_events AS i + /* Find the batch that connects to the given insertion event */ + INNER JOIN batch_events AS c + ON i.next_batch_id = c.batch_id + /* Get the depth of the batch start event from the events table */ + INNER JOIN events AS e USING (event_id) + /* Find an insertion event which matches the given event_id */ + WHERE i.event_id = ? + LIMIT ? + """ - return await self.db_pool.runInteraction( - "get_connected_insertion_event_backfill_results", - _get_connected_insertion_event_backfill_results_txn, + # Find any batch connections for the given insertion event + txn.execute( + batch_connection_query, + (insertion_event_id, limit), ) - - async def get_connected_batch_event_backfill_results( - self, insertion_event_id: str, limit: int - ) -> List[BackfillQueueNavigationItem]: - def _get_connected_batch_event_backfill_results_txn(txn): - # Find any batch connections of a given insertion event - batch_connection_query = """ - SELECT e.depth, e.stream_ordering, c.event_id, e.type FROM insertion_events AS i - /* Find the batch that connects to the given insertion event */ - INNER JOIN batch_events AS c - ON i.next_batch_id = c.batch_id - /* Get the depth of the batch start event from the events table */ - INNER JOIN events AS e USING (event_id) - /* Find an insertion event which matches the given event_id */ - WHERE i.event_id = ? - LIMIT ? - """ - - # Find any batch connections for the given insertion event - txn.execute( - batch_connection_query, - (insertion_event_id, limit), + batch_start_event_id_results = txn.fetchall() + return [ + BackfillQueueNavigationItem( + depth=row[0], + stream_ordering=row[1], + event_id=row[2], + type=row[3], ) - batch_start_event_id_results = txn.fetchall() - return [ - BackfillQueueNavigationItem( - depth=row[0], - stream_ordering=row[1], - event_id=row[2], - type=row[3], - ) - for row in batch_start_event_id_results - ] + for row in batch_start_event_id_results + ] - return await self.db_pool.runInteraction( - "get_connected_batch_event_backfill_results", - _get_connected_batch_event_backfill_results_txn, - ) - - async def get_connected_prev_event_backfill_results( - self, event_id: str, limit: int - ) -> List[BackfillQueueNavigationItem]: - def _get_connected_prev_event_backfill_results_txn(txn): - # Look for the prev_event_id connected to the given event_id - connected_prev_event_query = """ - SELECT depth, stream_ordering, prev_event_id, events.type FROM event_edges - /* Get the depth and stream_ordering of the prev_event_id from the events table */ - INNER JOIN events - ON prev_event_id = events.event_id - /* Look for an edge which matches the given event_id */ - WHERE event_edges.event_id = ? - AND event_edges.is_state = ? - /* Because we can have many events at the same depth, - * we want to also tie-break and sort on stream_ordering */ - ORDER BY depth DESC, stream_ordering DESC - LIMIT ? - """ - - txn.execute( - connected_prev_event_query, - (event_id, False, limit), - ) - prev_event_id_results = txn.fetchall() - return [ - BackfillQueueNavigationItem( - depth=row[0], - stream_ordering=row[1], - event_id=row[2], - type=row[3], - ) - for row in prev_event_id_results - ] + def _get_connected_prev_event_backfill_results_txn( + self, txn: LoggingTransaction, event_id: str, limit: int + ): + # Look for the prev_event_id connected to the given event_id + connected_prev_event_query = """ + SELECT depth, stream_ordering, prev_event_id, events.type FROM event_edges + /* Get the depth and stream_ordering of the prev_event_id from the events table */ + INNER JOIN events + ON prev_event_id = events.event_id + /* Look for an edge which matches the given event_id */ + WHERE event_edges.event_id = ? + AND event_edges.is_state = ? + /* Because we can have many events at the same depth, + * we want to also tie-break and sort on stream_ordering */ + ORDER BY depth DESC, stream_ordering DESC + LIMIT ? + """ - return await self.db_pool.runInteraction( - "get_connected_prev_event_backfill_results", - _get_connected_prev_event_backfill_results_txn, + txn.execute( + connected_prev_event_query, + (event_id, False, limit), ) + prev_event_id_results = txn.fetchall() + return [ + BackfillQueueNavigationItem( + depth=row[0], + stream_ordering=row[1], + event_id=row[2], + type=row[3], + ) + for row in prev_event_id_results + ] async def get_backfill_events( self, room_id: str, seed_event_id_list: list, limit: int @@ -1130,6 +1084,11 @@ async def get_backfill_events( ) def _get_backfill_events(self, txn, room_id, seed_event_id_list, limit): + """ + We want to make sure that we do a breadth-first, "depth" ordered search. + We also handle navigating historical branches of history connected by + insertion and batch events. + """ logger.info( "_get_backfill_events(room_id=%s): seeding backfill with seed_event_id_list=%s limit=%s", room_id, @@ -1139,47 +1098,6 @@ def _get_backfill_events(self, txn, room_id, seed_event_id_list, limit): event_id_results = set() - # We want to make sure that we do a breadth-first, "depth" ordered - # search. - - # Look for the prev_event_id connected to the given event_id - connected_prev_event_query = """ - SELECT depth, stream_ordering, prev_event_id, events.type FROM event_edges - /* Get the depth and stream_ordering of the prev_event_id from the events table */ - INNER JOIN events - ON prev_event_id = events.event_id - /* Look for an edge which matches the given event_id */ - WHERE event_edges.event_id = ? - AND event_edges.is_state = ? - /* Because we can have many events at the same depth, - * we want to also tie-break and sort on stream_ordering */ - ORDER BY depth DESC, stream_ordering DESC - LIMIT ? - """ - - # Look for the "insertion" events connected to the given event_id - connected_insertion_event_query = """ - SELECT e.depth, e.stream_ordering, i.event_id, e.type FROM insertion_event_edges AS i - /* Get the depth of the insertion event from the events table */ - INNER JOIN events AS e USING (event_id) - /* Find an insertion event which points via prev_events to the given event_id */ - WHERE i.insertion_prev_event_id = ? - LIMIT ? - """ - - # Find any batch connections of a given insertion event - batch_connection_query = """ - SELECT e.depth, e.stream_ordering, c.event_id, e.type FROM insertion_events AS i - /* Find the batch that connects to the given insertion event */ - INNER JOIN batch_events AS c - ON i.next_batch_id = c.batch_id - /* Get the depth of the batch start event from the events table */ - INNER JOIN events AS e USING (event_id) - /* Find an insertion event which matches the given event_id */ - WHERE i.event_id = ? - LIMIT ? - """ - # In a PriorityQueue, the lowest valued entries are retrieved first. # We're using depth as the priority in the queue and tie-break based on # stream_ordering. Depth is lowest at the oldest-in-time message and @@ -1233,70 +1151,61 @@ def _get_backfill_events(self, txn, room_id, seed_event_id_list, limit): # Try and find any potential historical batches of message history. if self.hs.config.experimental.msc2716_enabled: - # # First we look for an insertion event connected to the current - # # event (by prev_event). If we find any, we'll add them to the queue - # # and navigate up the DAG like normal in the next iteration of the - # # loop. - # txn.execute( - # connected_insertion_event_query, - # (event_id, limit - len(event_id_results)), - # ) - # connected_insertion_event_id_results = txn.fetchall() - # logger.debug( - # "_get_backfill_events(room_id=%s): connected_insertion_event_query %s", - # room_id, - # connected_insertion_event_id_results, - # ) - # for row in connected_insertion_event_id_results: - # connected_insertion_event_depth = row[0] - # connected_insertion_event_stream_ordering = row[1] - # connected_insertion_event_id = row[2] - # connected_insertion_event_type = row[3] - # if connected_insertion_event_id not in event_id_results: - # queue.put( - # ( - # -connected_insertion_event_depth, - # -connected_insertion_event_stream_ordering, - # connected_insertion_event_id, - # connected_insertion_event_type, - # ) - # ) - - # Second, we need to go and try to find any batch events connected + # We need to go and try to find any batch events connected # to a given insertion event (by batch_id). If we find any, we'll # add them to the queue and navigate up the DAG like normal in the # next iteration of the loop. if event_type == EventTypes.MSC2716_INSERTION: # Find any batch connections for the given insertion event - txn.execute( - batch_connection_query, - (event_id, limit - len(event_id_results)), + connected_batch_event_backfill_results = ( + self._get_connected_batch_event_backfill_results_txn( + txn, event_id, limit - len(event_id_results) + ) ) - batch_start_event_id_results = txn.fetchall() logger.debug( - "_get_backfill_events(room_id=%s): batch_start_event_id_results %s", + "_get_backfill_events(room_id=%s): connected_batch_event_backfill_results=%s", room_id, - batch_start_event_id_results, + connected_batch_event_backfill_results, ) - for row in batch_start_event_id_results: - if row[2] not in event_id_results: - queue.put((-row[0], -row[1], row[2], row[3])) + for ( + connected_batch_event_backfill_item + ) in connected_batch_event_backfill_results: + if ( + connected_batch_event_backfill_item.event_id + not in event_id_results + ): + queue.put( + ( + -connected_batch_event_backfill_item.depth, + -connected_batch_event_backfill_item.stream_ordering, + connected_batch_event_backfill_item.event_id, + connected_batch_event_backfill_item.type, + ) + ) # Now we just look up the DAG by prev_events as normal - txn.execute( - connected_prev_event_query, - (event_id, False, limit - len(event_id_results)), + connected_prev_event_backfill_results = ( + self._get_connected_prev_event_backfill_results_txn( + txn, event_id, limit - len(event_id_results) + ) ) - prev_event_id_results = txn.fetchall() logger.debug( - "_get_backfill_events(room_id=%s): prev_event_ids %s", + "_get_backfill_events(room_id=%s): connected_prev_event_backfill_results=%s", room_id, - prev_event_id_results, + connected_prev_event_backfill_results, ) - - for row in prev_event_id_results: - if row[2] not in event_id_results: - queue.put((-row[0], -row[1], row[2], row[3])) + for ( + connected_prev_event_backfill_item + ) in connected_prev_event_backfill_results: + if connected_prev_event_backfill_item.event_id not in event_id_results: + queue.put( + ( + -connected_prev_event_backfill_item.depth, + -connected_prev_event_backfill_item.stream_ordering, + connected_prev_event_backfill_item.event_id, + connected_prev_event_backfill_item.type, + ) + ) return event_id_results From 5db717ab8569a9e16cfbb589d50f80bceed00d2a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 2 Nov 2021 21:26:52 -0500 Subject: [PATCH 29/47] Always process marker events regardless if backfilled Before, we could rely on the `connected_insertion_event_query` to navigate the and find the historical branch. But now we solely rely on the marker event to point out the historical branch. So we need to make sure to add the insertion event extremeties whenever we see a marker event. Whether it be a live event or backfilled. --- synapse/handlers/federation.py | 6 +++--- synapse/handlers/federation_event.py | 10 +++++----- synapse/storage/databases/main/event_federation.py | 14 +++++++------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 2dc5e64a3921..7bc11f736f74 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -142,7 +142,7 @@ async def _maybe_backfill_inner( insertion_events_to_be_backfilled: Dict[str, int] = {} if self.hs.config.experimental.msc2716_enabled: insertion_events_to_be_backfilled = ( - await self.store.get_insertion_event_backwards_extremities_in_room( + await self.store.get_insertion_event_backward_extremities_in_room( room_id ) ) @@ -1055,8 +1055,8 @@ async def on_backfill_request( limit = min(limit, 100) events = await self.store.get_backfill_events(room_id, pdu_list, limit) - logger.info( - "old implementation backfill events=%s", + logger.debug( + "on_backfill_request: backfill events=%s", [ "event_id=%s,depth=%d,body=%s,prevs=%s\n" % ( diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index f219a0f42a74..9ccf99f5c5de 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -752,9 +752,9 @@ async def _process_pulled_events( backfilled: True if this is part of a historical batch of events (inhibits notification to clients, and validation of device keys.) """ - - logger.info( - "backfill events=%s", + logger.debug( + "processing pulled backfilled=%s events=%s", + backfilled, [ "event_id=%s,depth=%d,body=%s,prevs=%s\n" % ( @@ -1146,6 +1146,8 @@ async def _process_received_pdu( await self._run_push_actions_and_persist_event(event, context, backfilled) + await self._handle_marker_event(origin, event) + if backfilled: return @@ -1223,8 +1225,6 @@ async def _process_received_pdu( event.sender, ) - await self._handle_marker_event(origin, event) - async def _resync_device(self, sender: str) -> None: """We have detected that the device list for the given user may be out of sync, so we try and resync them. diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 299af0ded2eb..cedeb56abe75 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -735,7 +735,7 @@ def get_oldest_event_ids_with_depth_in_room_txn(txn, room_id): room_id, ) - async def get_insertion_event_backwards_extremities_in_room( + async def get_insertion_event_backward_extremities_in_room( self, room_id ) -> Dict[str, int]: """Get the insertion events we know about that we haven't backfilled yet. @@ -752,7 +752,7 @@ async def get_insertion_event_backwards_extremities_in_room( Map from event_id to depth """ - def get_insertion_event_backwards_extremities_in_room_txn(txn, room_id): + def get_insertion_event_backward_extremities_in_room_txn(txn, room_id): sql = """ SELECT b.event_id, MAX(e.depth) FROM insertion_events as i /* We only want insertion events that are also marked as backwards extremities */ @@ -768,8 +768,8 @@ def get_insertion_event_backwards_extremities_in_room_txn(txn, room_id): return dict(txn) return await self.db_pool.runInteraction( - "get_insertion_event_backwards_extremities_in_room", - get_insertion_event_backwards_extremities_in_room_txn, + "get_insertion_event_backward_extremities_in_room", + get_insertion_event_backward_extremities_in_room_txn, room_id, ) @@ -1089,7 +1089,7 @@ def _get_backfill_events(self, txn, room_id, seed_event_id_list, limit): We also handle navigating historical branches of history connected by insertion and batch events. """ - logger.info( + logger.debug( "_get_backfill_events(room_id=%s): seeding backfill with seed_event_id_list=%s limit=%s", room_id, seed_event_id_list, @@ -1119,8 +1119,8 @@ def _get_backfill_events(self, txn, room_id, seed_event_id_list, limit): allow_none=True, ) - logger.info( - "get_backfill_events(room_id=%s): seed_event_id=%s depth=%s stream_ordering=%s type=%s", + logger.debug( + "_get_backfill_events(room_id=%s): seed_event_id=%s depth=%s stream_ordering=%s type=%s", room_id, seed_event_id, event_lookup_result["depth"], From e96fd5c3fd23315eee00d525d6e3ed17c502fe78 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 3 Nov 2021 00:51:05 -0500 Subject: [PATCH 30/47] Add comment docs --- .../databases/main/event_federation.py | 30 +++++++++++++++++-- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index cedeb56abe75..d7bddfc154eb 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -997,8 +997,21 @@ def get_forward_extremeties_for_room_txn(txn): def _get_connected_batch_event_backfill_results_txn( self, txn: LoggingTransaction, insertion_event_id: str, limit: int - ): - # Find any batch connections of a given insertion event + ) -> List[BackfillQueueNavigationItem]: + """ + Find any batch connections of a given insertion event. + A batch event points at a insertion event via: + batch_event.content[MSC2716_BATCH_ID] -> insertion_event.content[MSC2716_NEXT_BATCH_ID] + + Args: + txn: The database transaction to use + insertion_event_id: The event ID to navigate from. We will find + batch events that point back at this insertion event. + limit: Max number of event ID's to query for and return + + Returns: + List of batch events that the backfill queue can process + """ batch_connection_query = """ SELECT e.depth, e.stream_ordering, c.event_id, e.type FROM insertion_events AS i /* Find the batch that connects to the given insertion event */ @@ -1029,7 +1042,18 @@ def _get_connected_batch_event_backfill_results_txn( def _get_connected_prev_event_backfill_results_txn( self, txn: LoggingTransaction, event_id: str, limit: int - ): + ) -> List[BackfillQueueNavigationItem]: + """ + Find any events connected by prev_event the specified event_id. + + Args: + txn: The database transaction to use + event_id: The event ID to navigate from + limit: Max number of event ID's to query for and return + + Returns: + List of prev events that the backfill queue can process + """ # Look for the prev_event_id connected to the given event_id connected_prev_event_query = """ SELECT depth, stream_ordering, prev_event_id, events.type FROM event_edges From f3b7b3e882484dcb12a597aee259ff40cf3363fd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 3 Nov 2021 01:07:33 -0500 Subject: [PATCH 31/47] Add better explanatory comment See https://github.com/matrix-org/synapse/pull/11114#discussion_r741304767 --- synapse/handlers/federation_event.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 9ccf99f5c5de..f7573c118015 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -1474,7 +1474,9 @@ def prep(event: EventBase) -> Optional[Tuple[EventBase, EventContext]]: await self.persist_events_and_notify( room_id, tuple(events_to_persist), - # Events we fetch during backfill should be marked as backfilled as well + # Mark these events backfilled as they're historic events that will + # eventually be backfilled. For example, missing events we fetch + # during backfill should be marked as backfilled as well. backfilled=True, ) From 7f2105ad0663e4e5798dda87d4fbcbb410e49db8 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 3 Nov 2021 01:13:19 -0500 Subject: [PATCH 32/47] Remove topological sort when receiving backfill events See https://github.com/matrix-org/synapse/pull/11114#discussion_r741517138 --- synapse/handlers/federation_event.py | 115 --------------------------- 1 file changed, 115 deletions(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index f7573c118015..6952c879a85f 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -648,94 +648,6 @@ async def _get_missing_events_for_pdu( logger.info("Got %d prev_events", len(missing_events)) await self._process_pulled_events(origin, missing_events, backfilled=False) - async def generateEventIdGraphFromEvents( - self, events: Iterable[EventBase] - ) -> Dict[str, Iterable[str]]: - event_map = {event.event_id: event for event in events} - - # Since the insertion event we try to reference later on might be in the - # backfill chunk itself, we need to make it easy to lookup. Maps a given - # batch_id to the insertion event. - batch_id_map = { - event.content.get( - EventContentFields.MSC2716_NEXT_BATCH_ID, None - ): event.event_id - for event in events - if event.type == EventTypes.MSC2716_INSERTION - } - - # Map a given event to it's successors (backwards prev_events) - successor_event_id_map = {} - for event in events: - for prev_event_id in event.prev_event_ids(): - successor_event_id_map.setdefault(prev_event_id, []).append( - event.event_id - ) - - event_id_graph = {} - for event in events: - # Assign the real edges to the graph. - # Make a copy so we don't modify the actual prev_events when we extend them below. - event_id_graph.setdefault(event.event_id, []).extend( - event.prev_event_ids().copy() - ) - - # We need to make some fake edge connections from the batch event at - # the bottom of the historical batch to the insertion event. This - # way the historical batch topologically sorts in ahead-in-time of - # the event we branched off of. - batch_id = event.content.get(EventContentFields.MSC2716_BATCH_ID, None) - if event.type == EventTypes.MSC2716_BATCH and batch_id: - # Maybe we can get lucky and save ourselves a lookup - # by checking the events in the backfill first - insertion_event_id = batch_id_map[ - batch_id - ] or await self._store.get_insertion_event_id_by_batch_id( - event.room_id, batch_id - ) - - if insertion_event_id: - # Connect the insertion event via a fake edge pointing to the - # batch event so the historical batch topologically sorts - # behind-in-time the insertion event. - event_id_graph.setdefault(insertion_event_id, []).append( - event.event_id - ) - - # Maybe we can get lucky and save ourselves a lookup - # by checking the events in the backfill first - insertion_event = event_map[ - insertion_event_id - ] or await self._store.get_event( - insertion_event_id, allow_none=True - ) - - if insertion_event: - # Connect the insertion events' `prev_event` successors - # via fake edges pointing to the insertion event itself - # so the insertion event sorts topologically - # behind-in-time the successor. Nestled perfectly - # between the prev_event and the successor. - for insertion_prev_event_id in insertion_event.prev_event_ids(): - successor_event_ids = successor_event_id_map[ - insertion_prev_event_id - ] - if successor_event_ids: - for successor_event_id in successor_event_ids: - # Don't add itself back as a successor - if successor_event_id != insertion_event_id: - # Fake edge to point the successor back - # at the insertion event - event_id_graph.setdefault( - successor_event_id, [] - ).append(insertion_event_id) - - # TODO: We also need to add fake edges to connect insertion events -> to - # the base event in the "live" DAG we branched off of, see scenario 2 - # https://github.com/matrix-org/synapse/pull/11114#discussion_r739300985 - - return event_id_graph - async def _process_pulled_events( self, origin: str, events: Iterable[EventBase], backfilled: bool ) -> None: @@ -774,33 +686,6 @@ async def _process_pulled_events( with nested_logging_context(ev.event_id): await self._process_pulled_event(origin, ev, backfilled=backfilled) - # # We want to sort topologically so we process them and tell clients - # # about them in order. - # sorted_events = [] - # event_ids = [event.event_id for event in events] - # event_map = {event.event_id: event for event in events} - # event_id_graph = await self.generateEventIdGraphFromEvents(events) - # for event_id in sorted_topologically(event_ids, event_id_graph): - # sorted_events.append(event_map[event_id]) - - # logger.info( - # "backfill sorted_events=%s", - # [ - # "event_id=%s,depth=%d,body=%s,prevs=%s\n" - # % ( - # event.event_id, - # event.depth, - # event.content.get("body", event.type), - # event.prev_event_ids(), - # ) - # for event in reversed(sorted_events) - # ], - # ) - - # for ev in reversed(sorted_events): - # with nested_logging_context(ev.event_id): - # await self._process_pulled_event(origin, ev, backfilled=backfilled) - async def _process_pulled_event( self, origin: str, event: EventBase, backfilled: bool ) -> None: From 246278e9a38da7d50d0d49abcbdb06a7215dd972 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 3 Nov 2021 01:29:26 -0500 Subject: [PATCH 33/47] Fix lints --- synapse/handlers/federation.py | 2 -- synapse/handlers/federation_event.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 7bc11f736f74..3f5d1d701fca 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -17,7 +17,6 @@ import logging from http import HTTPStatus -from queue import Empty, PriorityQueue from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Union from signedjson.key import decode_verify_key_bytes @@ -57,7 +56,6 @@ ReplicationCleanRoomRestServlet, ReplicationStoreRoomOnOutlierMembershipRestServlet, ) -from synapse.storage.databases.main.event_federation import BackfillQueueNavigationItem from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.types import JsonDict, StateMap, get_domain_from_id from synapse.util.async_helpers import Linearizer diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 6952c879a85f..2be3ef89a3a6 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -72,7 +72,7 @@ get_domain_from_id, ) from synapse.util.async_helpers import Linearizer, concurrently_execute -from synapse.util.iterutils import batch_iter, sorted_topologically +from synapse.util.iterutils import batch_iter from synapse.util.retryutils import NotRetryingDestination from synapse.util.stringutils import shortstr From bc0ba8c094d012557e0b5393569e86b3798e4d9b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 3 Nov 2021 02:39:39 -0500 Subject: [PATCH 34/47] Protect from no auth events for non-existent provided prev_event --- synapse/rest/client/room_batch.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index 6f29c56d6440..982763f2d891 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -131,6 +131,14 @@ async def on_POST( prev_event_ids_from_query ) + if not auth_event_ids: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "No auth events found for given prev_event query parameter. The prev_event=%s probably does not exist." + % prev_event_ids_from_query, + errcode=Codes.INVALID_PARAM, + ) + # Create and persist all of the state events that float off on their own # before the batch. These will most likely be all of the invite/member # state events used to auth the upcoming historical messages. From 363aed60e747ca36136cbd13345bbdbe72cba184 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 3 Nov 2021 18:31:32 -0500 Subject: [PATCH 35/47] Revert unused refactor to get PDU raw Code split out into https://github.com/matrix-org/synapse/pull/11242 --- synapse/federation/federation_client.py | 84 +++++++------------------ 1 file changed, 22 insertions(+), 62 deletions(-) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 67056be7f738..670186f5482f 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -277,62 +277,6 @@ async def backfill( return pdus - async def get_pdu_from_destination_raw( - self, - destination: str, - event_id: str, - room_version: RoomVersion, - outlier: bool = False, - timeout: Optional[int] = None, - ) -> Optional[EventBase]: - """Requests the PDU with given origin and ID from the remote home - server. - - Does not have any caching or rate limiting! - - Args: - destination: Which homeserver to query - event_id: event to fetch - room_version: version of the room - outlier: Indicates whether the PDU is an `outlier`, i.e. if - it's from an arbitrary point in the context as opposed to part - of the current block of PDUs. Defaults to `False` - timeout: How long to try (in ms) each destination for before - moving to the next destination. None indicates no timeout. - - Returns: - The requested PDU, or None if we were unable to find it. - - Raises: - SynapseError, NotRetryingDestination, FederationDeniedError - """ - - signed_pdu = None - - transaction_data = await self.transport_layer.get_event( - destination, event_id, timeout=timeout - ) - - logger.info( - "retrieved event id %s from %s: %r", - event_id, - destination, - transaction_data, - ) - - pdu_list: List[EventBase] = [ - event_from_pdu_json(p, room_version, outlier=outlier) - for p in transaction_data["pdus"] - ] - - if pdu_list and pdu_list[0]: - pdu = pdu_list[0] - - # Check signatures are correct. - signed_pdu = await self._check_sigs_and_hash(room_version, pdu) - - return signed_pdu - async def get_pdu( self, destinations: Iterable[str], @@ -377,14 +321,30 @@ async def get_pdu( continue try: - signed_pdu = await self.get_pdu_from_destination_raw( - destination=destination, - event_id=event_id, - room_version=room_version, - outlier=outlier, - timeout=timeout, + transaction_data = await self.transport_layer.get_event( + destination, event_id, timeout=timeout + ) + + logger.debug( + "retrieved event id %s from %s: %r", + event_id, + destination, + transaction_data, ) + pdu_list: List[EventBase] = [ + event_from_pdu_json(p, room_version, outlier=outlier) + for p in transaction_data["pdus"] + ] + + if pdu_list and pdu_list[0]: + pdu = pdu_list[0] + + # Check signatures are correct. + signed_pdu = await self._check_sigs_and_hash(room_version, pdu) + + break + pdu_attempts[destination] = now except SynapseError as e: From d771fbd3b1f5edde521781cb67c565913f03b253 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 11 Nov 2021 00:49:57 -0600 Subject: [PATCH 36/47] Only run the tests package to get streaming Complement output --- scripts-dev/complement.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 9a69637d79e0..927c69a753d8 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -65,4 +65,4 @@ if [[ -n "$1" ]]; then fi # Run the tests! -go test -v -tags synapse_blacklist,msc2946,msc3083,msc2403,msc2716 -count=1 "${EXTRA_COMPLEMENT_ARGS[@]}" ./tests/main_test.go ./tests/msc2716_test.go +go test -v -tags synapse_blacklist,msc2946,msc3083,msc2403,msc2716 -count=1 "${EXTRA_COMPLEMENT_ARGS[@]}" ./tests From b071426a63ef7dffbf25f96c747290117c19bdca Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 15 Dec 2021 23:12:37 -0600 Subject: [PATCH 37/47] Plumb allow_no_prev_events through for MSC2716 --- synapse/handlers/message.py | 15 +++++++++++---- synapse/handlers/room_batch.py | 8 +++++++- synapse/handlers/room_member.py | 22 ++++++++++++++++++++-- 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 5e3d3886eb1d..3dbd58e12c27 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -490,12 +490,12 @@ async def create_event( requester: Requester, event_dict: dict, txn_id: Optional[str] = None, + allow_no_prev_events: bool = False, prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, require_consent: bool = True, outlier: bool = False, historical: bool = False, - allow_no_prev_events: bool = False, depth: Optional[int] = None, ) -> Tuple[EventBase, EventContext]: """ @@ -510,6 +510,10 @@ async def create_event( requester event_dict: An entire event txn_id + allow_no_prev_events: Whether to allow this event to be created an empty + list of prev_events. Normally this is prohibited just because most + events should have a prev_event and we should only use this in special + cases like MSC2716. prev_event_ids: the forward extremities to use as the prev_events for the new event. @@ -604,10 +608,10 @@ async def create_event( event, context = await self.create_new_client_event( builder=builder, requester=requester, + allow_no_prev_events=allow_no_prev_events, prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids, depth=depth, - allow_no_prev_events=allow_no_prev_events, ) # In an ideal world we wouldn't need the second part of this condition. However, @@ -880,16 +884,20 @@ async def create_new_client_event( self, builder: EventBuilder, requester: Optional[Requester] = None, + allow_no_prev_events: bool = False, prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, depth: Optional[int] = None, - allow_no_prev_events: bool = False, ) -> Tuple[EventBase, EventContext]: """Create a new event for a local client Args: builder: requester: + allow_no_prev_events: Whether to allow this event to be created an empty + list of prev_events. Normally this is prohibited just because most + events should have a prev_event and we should only use this in special + cases like MSC2716. prev_event_ids: the forward extremities to use as the prev_events for the new event. @@ -908,7 +916,6 @@ async def create_new_client_event( Returns: Tuple of created event, context """ - # Strip down the auth_event_ids to only what we need to auth the event. # For example, we don't need extra m.room.member that don't match event.sender full_state_ids_at_event = None diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py index c4d22ad297e1..f09714a85f7d 100644 --- a/synapse/handlers/room_batch.py +++ b/synapse/handlers/room_batch.py @@ -182,7 +182,7 @@ async def persist_state_events_at_start( # bunch of `@mxid joined the room` noise between each batch prev_event_ids_for_state_chain: List[str] = [] - for state_event in state_events_at_start: + for index, state_event in enumerate(state_events_at_start): assert_params_in_dict( state_event, ["type", "origin_server_ts", "content", "sender"] ) @@ -218,6 +218,9 @@ async def persist_state_events_at_start( content=event_dict["content"], outlier=True, historical=True, + # Only the first even in the chain should be floating. + # The rest should hang off each other in a chain. + allow_no_prev_events=index == 0, prev_event_ids=prev_event_ids_for_state_chain, # Make sure to use a copy of this list because we modify it # later in the loop here. Otherwise it will be the same @@ -238,6 +241,9 @@ async def persist_state_events_at_start( event_dict, outlier=True, historical=True, + # Only the first even in the chain should be floating. + # The rest should hang off each other in a chain. + allow_no_prev_events=index == 0, prev_event_ids=prev_event_ids_for_state_chain, # Make sure to use a copy of this list because we modify it # later in the loop here. Otherwise it will be the same diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 6aa910dd10f9..bc6b72b41be9 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -261,7 +261,8 @@ async def _local_membership_update( target: UserID, room_id: str, membership: str, - prev_event_ids: List[str], + allow_no_prev_events: bool = False, + prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, txn_id: Optional[str] = None, ratelimit: bool = True, @@ -279,8 +280,12 @@ async def _local_membership_update( target: room_id: membership: - prev_event_ids: The event IDs to use as the prev events + allow_no_prev_events: Whether to allow this event to be created an empty + list of prev_events. Normally this is prohibited just because most + events should have a prev_event and we should only use this in special + cases like MSC2716. + prev_event_ids: The event IDs to use as the prev events auth_event_ids: The event ids to use as the auth_events for the new event. Should normally be left as None, which will cause them to be calculated @@ -337,6 +342,7 @@ async def _local_membership_update( "membership": membership, }, txn_id=txn_id, + allow_no_prev_events=allow_no_prev_events, prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids, require_consent=require_consent, @@ -439,6 +445,7 @@ async def update_membership( require_consent: bool = True, outlier: bool = False, historical: bool = False, + allow_no_prev_events: bool = False, prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, ) -> Tuple[str, int]: @@ -463,6 +470,10 @@ async def update_membership( historical: Indicates whether the message is being inserted back in time around some existing events. This is used to skip a few checks and mark the event as backfilled. + allow_no_prev_events: Whether to allow this event to be created an empty + list of prev_events. Normally this is prohibited just because most + events should have a prev_event and we should only use this in special + cases like MSC2716. prev_event_ids: The event IDs to use as the prev events auth_event_ids: The event ids to use as the auth_events for the new event. @@ -497,6 +508,7 @@ async def update_membership( require_consent=require_consent, outlier=outlier, historical=historical, + allow_no_prev_events=allow_no_prev_events, prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids, ) @@ -518,6 +530,7 @@ async def update_membership_locked( require_consent: bool = True, outlier: bool = False, historical: bool = False, + allow_no_prev_events: bool = False, prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, ) -> Tuple[str, int]: @@ -544,6 +557,10 @@ async def update_membership_locked( historical: Indicates whether the message is being inserted back in time around some existing events. This is used to skip a few checks and mark the event as backfilled. + allow_no_prev_events: Whether to allow this event to be created an empty + list of prev_events. Normally this is prohibited just because most + events should have a prev_event and we should only use this in special + cases like MSC2716. prev_event_ids: The event IDs to use as the prev events auth_event_ids: The event ids to use as the auth_events for the new event. @@ -667,6 +684,7 @@ async def update_membership_locked( membership=effective_membership_state, txn_id=txn_id, ratelimit=ratelimit, + allow_no_prev_events=allow_no_prev_events, prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids, content=content, From ec33a40201e1086cb5ff0548e409abc35f0ce056 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 15 Dec 2021 23:20:53 -0600 Subject: [PATCH 38/47] Make the historical events float separately from the state chain See https://github.com/matrix-org/synapse/pull/11114#discussion_r760843910 --- synapse/handlers/room_batch.py | 22 +++++++++------------- synapse/rest/client/room_batch.py | 9 --------- 2 files changed, 9 insertions(+), 22 deletions(-) diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py index f09714a85f7d..5dcc28620fca 100644 --- a/synapse/handlers/room_batch.py +++ b/synapse/handlers/room_batch.py @@ -218,7 +218,7 @@ async def persist_state_events_at_start( content=event_dict["content"], outlier=True, historical=True, - # Only the first even in the chain should be floating. + # Only the first event in the chain should be floating. # The rest should hang off each other in a chain. allow_no_prev_events=index == 0, prev_event_ids=prev_event_ids_for_state_chain, @@ -241,7 +241,7 @@ async def persist_state_events_at_start( event_dict, outlier=True, historical=True, - # Only the first even in the chain should be floating. + # Only the first event in the chain should be floating. # The rest should hang off each other in a chain. allow_no_prev_events=index == 0, prev_event_ids=prev_event_ids_for_state_chain, @@ -263,7 +263,6 @@ async def persist_historical_events( self, events_to_create: List[JsonDict], room_id: str, - initial_prev_event_ids: List[str], inherited_depth: int, auth_event_ids: List[str], app_service_requester: Requester, @@ -279,9 +278,6 @@ async def persist_historical_events( events_to_create: List of historical events to create in JSON dictionary format. room_id: Room where you want the events persisted in. - initial_prev_event_ids: These will be the prev_events for the first - event created. Each event created afterwards will point to the - previous event created. inherited_depth: The depth to create the events at (you will probably by calling inherit_depth_from_prev_ids(...)). auth_event_ids: Define which events allow you to create the given @@ -293,11 +289,13 @@ async def persist_historical_events( """ assert app_service_requester.app_service - prev_event_ids = initial_prev_event_ids.copy() + # Make the historical event chain float off on its own which causes the + # HS to ask for the state at the start of the batch later. + prev_event_ids: List[str] = [] event_ids = [] events_to_persist = [] - for ev in events_to_create: + for index, ev in enumerate(events_to_create): assert_params_in_dict(ev, ["type", "origin_server_ts", "content", "sender"]) assert self.hs.is_mine_id(ev["sender"]), "User must be our own: %s" % ( @@ -321,6 +319,9 @@ async def persist_historical_events( ev["sender"], app_service_requester.app_service ), event_dict, + # Only the first event in the chain should be floating. + # The rest should hang off each other in a chain. + allow_no_prev_events=index == 0, prev_event_ids=event_dict.get("prev_events"), auth_event_ids=auth_event_ids, historical=True, @@ -372,7 +373,6 @@ async def handle_batch_of_events( events_to_create: List[JsonDict], room_id: str, batch_id_to_connect_to: str, - initial_prev_event_ids: List[str], inherited_depth: int, auth_event_ids: List[str], app_service_requester: Requester, @@ -387,9 +387,6 @@ async def handle_batch_of_events( room_id: Room where you want the events created in. batch_id_to_connect_to: The batch_id from the insertion event you want this batch to connect to. - initial_prev_event_ids: These will be the prev_events for the first - event created. Each event created afterwards will point to the - previous event created. inherited_depth: The depth to create the events at (you will probably by calling inherit_depth_from_prev_ids(...)). auth_event_ids: Define which events allow you to create the given @@ -438,7 +435,6 @@ async def handle_batch_of_events( event_ids = await self.persist_historical_events( events_to_create=events_to_create, room_id=room_id, - initial_prev_event_ids=initial_prev_event_ids, inherited_depth=inherited_depth, auth_event_ids=auth_event_ids, app_service_requester=app_service_requester, diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index c9509d2ae344..4b6be38327e0 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -205,21 +205,12 @@ async def on_POST( EventContentFields.MSC2716_NEXT_BATCH_ID ] - # Also connect the historical event chain to the end of the floating - # state chain, which causes the HS to ask for the state at the start of - # the batch later. If there is no state chain to connect to, just make - # the insertion event float itself. - prev_event_ids = [] - if len(state_event_ids_at_start): - prev_event_ids = [state_event_ids_at_start[-1]] - # Create and persist all of the historical events as well as insertion # and batch meta events to make the batch navigable in the DAG. event_ids, next_batch_id = await self.room_batch_handler.handle_batch_of_events( events_to_create=events_to_create, room_id=room_id, batch_id_to_connect_to=batch_id_to_connect_to, - initial_prev_event_ids=prev_event_ids, inherited_depth=inherited_depth, auth_event_ids=auth_event_ids, app_service_requester=requester, From b99efa8026c4cc1f9dc89dc1465c2976c0bf773f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 16 Dec 2021 15:31:12 -0600 Subject: [PATCH 39/47] Plumb allow_no_prev_events through create_and_send_nonmember_event --- synapse/handlers/message.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 3dbd58e12c27..c31e4376c3f7 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -768,6 +768,7 @@ async def create_and_send_nonmember_event( self, requester: Requester, event_dict: dict, + allow_no_prev_events: bool = False, prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, ratelimit: bool = True, @@ -785,6 +786,10 @@ async def create_and_send_nonmember_event( Args: requester: The requester sending the event. event_dict: An entire event. + allow_no_prev_events: Whether to allow this event to be created an empty + list of prev_events. Normally this is prohibited just because most + events should have a prev_event and we should only use this in special + cases like MSC2716. prev_event_ids: The event IDs to use as the prev events. Should normally be left as None to automatically request them From 3810ae10aa5ac60f4c99e8294ce40ccd6043b7cb Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 16 Dec 2021 15:51:27 -0600 Subject: [PATCH 40/47] Clarify comments --- synapse/handlers/room_batch.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py index 5dcc28620fca..f8137ec04cc5 100644 --- a/synapse/handlers/room_batch.py +++ b/synapse/handlers/room_batch.py @@ -178,8 +178,9 @@ async def persist_state_events_at_start( state_event_ids_at_start = [] auth_event_ids = initial_auth_event_ids.copy() - # Make the state events float off on their own so we don't have a - # bunch of `@mxid joined the room` noise between each batch + # Make the state events float off on their own by specifying no + # prev_events for the first one in the chain so we don't have a bunch of + # `@mxid joined the room` noise between each batch. prev_event_ids_for_state_chain: List[str] = [] for index, state_event in enumerate(state_events_at_start): @@ -289,8 +290,9 @@ async def persist_historical_events( """ assert app_service_requester.app_service - # Make the historical event chain float off on its own which causes the - # HS to ask for the state at the start of the batch later. + # Make the historical event chain float off on its own by specifying no + # prev_events for the first event in the chain which causes the HS to + # ask for the state at the start of the batch later. prev_event_ids: List[str] = [] event_ids = [] From df2a1523fcecc0f70aeed5e9c4c6bbb51cc06239 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 16 Dec 2021 17:52:13 -0600 Subject: [PATCH 41/47] Fix NPE when trying to grab event from wrong roomId (fix sytest) See https://github.com/matrix-org/synapse/pull/11114#discussion_r741687508 --- .../databases/main/event_federation.py | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index b9d4ef6c203b..ef05c67fa02b 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1165,24 +1165,25 @@ def _get_backfill_events(self, txn, room_id, seed_event_id_list, limit): allow_none=True, ) - logger.debug( - "_get_backfill_events(room_id=%s): seed_event_id=%s depth=%s stream_ordering=%s type=%s", - room_id, - seed_event_id, - event_lookup_result["depth"], - event_lookup_result["stream_ordering"], - event_lookup_result["type"], - ) + if event_lookup_result is not None: + logger.debug( + "_get_backfill_events(room_id=%s): seed_event_id=%s depth=%s stream_ordering=%s type=%s", + room_id, + seed_event_id, + event_lookup_result["depth"], + event_lookup_result["stream_ordering"], + event_lookup_result["type"], + ) - if event_lookup_result["depth"]: - queue.put( - ( - -event_lookup_result["depth"], - -event_lookup_result["stream_ordering"], - seed_event_id, - event_lookup_result["type"], + if event_lookup_result["depth"]: + queue.put( + ( + -event_lookup_result["depth"], + -event_lookup_result["stream_ordering"], + seed_event_id, + event_lookup_result["type"], + ) ) - ) while not queue.empty() and len(event_id_results) < limit: try: From a38befa07b24076cbe14ee40a2efb38219f94cc9 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 4 Feb 2022 16:05:57 -0600 Subject: [PATCH 42/47] Some review optimizations See: - https://github.com/matrix-org/synapse/pull/11114#discussion_r799350801 - https://github.com/matrix-org/synapse/pull/11114#discussion_r799354680 --- synapse/storage/databases/main/event_federation.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index ec2629e6574a..80c22e4a816a 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -13,6 +13,7 @@ # limitations under the License. import itertools import logging +import attr from queue import Empty, PriorityQueue from typing import ( TYPE_CHECKING, @@ -71,7 +72,8 @@ # All the info we need while iterating the DAG while backfilling -class BackfillQueueNavigationItem(NamedTuple): +@attr.s(frozen=True, slots=True, auto_attribs=True) +class BackfillQueueNavigationItem: depth: int stream_ordering: int event_id: str @@ -1051,7 +1053,6 @@ def _get_connected_batch_event_backfill_results_txn( batch_connection_query, (insertion_event_id, limit), ) - batch_start_event_id_results = txn.fetchall() return [ BackfillQueueNavigationItem( depth=row[0], @@ -1059,7 +1060,7 @@ def _get_connected_batch_event_backfill_results_txn( event_id=row[2], type=row[3], ) - for row in batch_start_event_id_results + for row in txn ] def _get_connected_prev_event_backfill_results_txn( From 033360aa29e53cc1a26d9bbda6157fb217774ceb Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 4 Feb 2022 16:16:13 -0600 Subject: [PATCH 43/47] Fix lints --- synapse/storage/databases/main/event_federation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 80c22e4a816a..2914e9ec8cde 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -13,7 +13,6 @@ # limitations under the License. import itertools import logging -import attr from queue import Empty, PriorityQueue from typing import ( TYPE_CHECKING, @@ -27,6 +26,7 @@ Tuple, ) +import attr from prometheus_client import Counter, Gauge from synapse.api.constants import MAX_DEPTH, EventTypes From 3f22e42b8aaed6ae6307b2cf83435accf37fe70f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 4 Feb 2022 16:20:29 -0600 Subject: [PATCH 44/47] Fix unused lint --- synapse/storage/databases/main/event_federation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 2914e9ec8cde..9cd361b1e958 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -20,7 +20,6 @@ Dict, Iterable, List, - NamedTuple, Optional, Set, Tuple, From e5670ff818388446d7d122269c86f314542189f1 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 4 Feb 2022 16:32:56 -0600 Subject: [PATCH 45/47] Fix lints --- synapse/storage/databases/main/event_federation.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 9cd361b1e958..79e74a41a9b5 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -14,16 +14,7 @@ import itertools import logging from queue import Empty, PriorityQueue -from typing import ( - TYPE_CHECKING, - Collection, - Dict, - Iterable, - List, - Optional, - Set, - Tuple, -) +from typing import TYPE_CHECKING, Collection, Dict, Iterable, List, Optional, Set, Tuple import attr from prometheus_client import Counter, Gauge From 023bd3eb8c4f7c903089b129264fb5beb009ca79 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 7 Feb 2022 14:33:43 -0600 Subject: [PATCH 46/47] Don't run MSC2716 complement tests for everyone --- scripts-dev/complement.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index b310a4621931..e08ffedaf33a 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -71,4 +71,4 @@ fi # Run the tests! echo "Images built; running complement" -go test -v -tags synapse_blacklist,msc2403,msc2716 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests/... +go test -v -tags synapse_blacklist,msc2403 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests/... From b3fcffbd1646b0b298d3c41e2f34d665e1d0422b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 7 Feb 2022 14:46:52 -0600 Subject: [PATCH 47/47] Use same txn iteration optimization See https://github.com/matrix-org/synapse/pull/11114#discussion_r799354680 --- synapse/storage/databases/main/event_federation.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 79e74a41a9b5..22f64741277a 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1086,7 +1086,6 @@ def _get_connected_prev_event_backfill_results_txn( connected_prev_event_query, (event_id, False, limit), ) - prev_event_id_results = txn.fetchall() return [ BackfillQueueNavigationItem( depth=row[0], @@ -1094,7 +1093,7 @@ def _get_connected_prev_event_backfill_results_txn( event_id=row[2], type=row[3], ) - for row in prev_event_id_results + for row in txn ] async def get_backfill_events(