From 49e888d10c949e58be92cacf3f023a21b7f3f244 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 1 Dec 2020 14:10:51 +0000 Subject: [PATCH 01/44] Change alg --- synapse/storage/database.py | 8 ++ .../databases/main/event_federation.py | 120 +++++++++++++++++- .../schema/delta/58/24_event_auth_chains.sql | 36 ++++++ 3 files changed, 162 insertions(+), 2 deletions(-) create mode 100644 synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql diff --git a/synapse/storage/database.py b/synapse/storage/database.py index d1b5760c2c09..e4a2b2fabb74 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -267,6 +267,14 @@ def execute_batch(self, sql: str, args: Iterable[Iterable[Any]]) -> None: for val in args: self.execute(sql, val) + def execute_values(self, sql: str, *args: Any, **kwargs) -> None: + assert isinstance(self.database_engine, PostgresEngine) + from psycopg2.extras import execute_values # type: ignore + + return self._do_execute( + lambda *x: execute_values(self.txn, *x, **kwargs), sql, *args + ) + def execute(self, sql: str, *args: Any) -> None: self._do_execute(self.txn.execute, sql, *args) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index ebffd89251a9..67822f1544a2 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -15,7 +15,7 @@ import itertools import logging from queue import Empty, PriorityQueue -from typing import Dict, Iterable, List, Set, Tuple +from typing import Dict, Iterable, List, Optional, Set, Tuple from synapse.api.errors import StoreError from synapse.events import EventBase @@ -153,10 +153,126 @@ async def get_auth_chain_difference( return await self.db_pool.runInteraction( "get_auth_chain_difference", - self._get_auth_chain_difference_txn, + self._get_auth_chain_difference_using_chains_txn, state_sets, ) + def _get_auth_chain_difference_using_chains_txn( + self, txn, state_sets: List[Set[str]] + ) -> Set[str]: + """Uses chains dlfks;fk + """ + + initial_events = set(state_sets[0]).union(*state_sets[1:]) + + chain_info = {} + chain_to_event = {} + seen_chains = set() + + # FIXME: Need to handle chains that point to chains not in state sets + + sql = """ + SELECT event_id, chain_id, sequence_number + FROM event_auth_chains + WHERE %s + """ + for batch in batch_iter(initial_events, 1000): + clause, args = make_in_list_sql_clause( + txn.database_engine, "event_id", batch + ) + txn.execute(sql % (clause,), args) + + for event_id, chain_id, sequence_number in txn: + chain_info[event_id] = (chain_id, sequence_number) + seen_chains.add(chain_id) + chain_to_event.setdefault(chain_id, {})[sequence_number] = event_id + + set_to_chain = {} + for set_id, state_set in enumerate(state_sets): + chains = set_to_chain.setdefault(set_id, {}) + for event_id in state_set: + chain_id, seq_no = chain_info[event_id] + + curr = chains.setdefault(chain_id, seq_no) + if curr < seq_no: + chains[chain_id] = seq_no + + sql = """ + SELECT + origin_chain_id, origin_sequence_number, + target_chain_id, target_sequence_number + FROM event_auth_chain_links + WHERE %s + """ + + # chain_links = {} + for batch in batch_iter(seen_chains, 1000): + clause, args = make_in_list_sql_clause( + txn.database_engine, "origin_chain_id", batch + ) + txn.execute(sql % (clause,), args) + + for ( + origin_chain_id, + origin_sequence_number, + target_chain_id, + target_sequence_number, + ) in txn: + # chain_links.setdefault( + # (origin_chain_id, origin_sequence_number), [] + # ).append((target_chain_id, target_sequence_number)) + + # TODO: Handle this case, its valid for it to happen. + # DOES THIS WORK? + for chains in set_to_chain.values(): + if origin_sequence_number <= chains.get(chain_id, 0): + curr = chains.setdefault( + target_chain_id, target_sequence_number + ) + if curr < target_sequence_number: + chains[target_chain_id] = target_sequence_number + + result = set() + + chain_to_gap = {} + for chain_id in seen_chains: + min_seq_no = min( + chains.get(chain_id, 0) for chains in set_to_chain.values() + ) + + max_seq_no = 0 + for chains in set_to_chain.values(): + s = chains.get(chain_id) + if s: + max_seq_no = max(max_seq_no, s) + + if min_seq_no < max_seq_no: + for seq_no in range(min_seq_no + 1, max_seq_no + 1): + event_id = chain_to_event[chain_id].get(seq_no) + if event_id: + result.add(event_id) + else: + chain_to_gap[chain_id] = (min_seq_no, max_seq_no) + break + + sql = """ + SELECT event_id + FROM event_auth_chains AS c, (VALUES ?) AS l(chain_id, min_seq, max_seq) + WHERE + c.chain_id = l.chain_id + AND min_seq < sequence_number AND sequence_number <= max_seq + """ + + args = [ + (chain_id, min_no, max_no) + for chain_id, (min_no, max_no) in chain_to_gap.items() + ] + + rows = txn.execute_values(sql, args, fetch=True) + result.update(r for r, in rows) + + return result + def _get_auth_chain_difference_txn( self, txn, state_sets: List[Set[str]] ) -> Set[str]: diff --git a/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql b/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql new file mode 100644 index 000000000000..582fdb31ae11 --- /dev/null +++ b/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql @@ -0,0 +1,36 @@ +/* Copyright 2020 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE SEQUENCE IF NOT EXISTS event_auth_chain_id; + +CREATE TABLE event_auth_chains ( + event_id TEXT PRIMARY KEY, + chain_id BIGINT NOT NULL, + sequence_number BIGINT NOT NULL +); + +CREATE UNIQUE INDEX ON event_auth_chains (chain_id, sequence_number); + + +CREATE TABLE event_auth_chain_links ( + origin_chain_id BIGINT NOT NULL, + origin_sequence_number BIGINT NOT NULL, + + target_chain_id BIGINT NOT NULL, + target_sequence_number BIGINT NOT NULL +); + + +CREATE INDEX ON event_auth_chain_links (origin_chain_id, target_chain_id); From 8c760ffa4850cc2f288cf5e565c57e34ef3a0fb9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Dec 2020 13:37:10 +0000 Subject: [PATCH 02/44] Calculate chain ID/seq no on event insertion --- synapse/storage/databases/main/events.py | 334 +++++++++++++++++++++-- synapse/util/iterutils.py | 33 ++- 2 files changed, 348 insertions(+), 19 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 90fb1a1f004e..7f636fe0dfbb 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -33,9 +33,10 @@ from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.databases.main.search import SearchEntry from synapse.storage.util.id_generators import MultiWriterIdGenerator +from synapse.storage.util.sequence import build_sequence_generator from synapse.types import StateMap, get_domain_from_id from synapse.util import json_encoder -from synapse.util.iterutils import batch_iter +from synapse.util.iterutils import batch_iter, sorted_topologically if TYPE_CHECKING: from synapse.server import HomeServer @@ -89,6 +90,14 @@ def __init__( self._clock = hs.get_clock() self._instance_name = hs.get_instance_name() + def get_chain_id_txn(txn): + txn.execute("SELECT COALESCE(max(chain_id), 0) FROM event_auth_chains") + return txn.fetchone()[0] + + self._event_chain_id_gen = build_sequence_generator( + db.engine, get_chain_id_txn, "event_auth_chain_id" + ) + self._ephemeral_messages_enabled = hs.config.enable_ephemeral_messages self.is_mine_id = hs.is_mine_id @@ -366,6 +375,31 @@ def _persist_events_txn( # Insert into event_to_state_groups. self._store_event_state_mappings_txn(txn, events_and_contexts) + self._persist_event_auth_chain_txn(txn, [e for e, _ in events_and_contexts]) + + # _store_rejected_events_txn filters out any events which were + # rejected, and returns the filtered list. + events_and_contexts = self._store_rejected_events_txn( + txn, events_and_contexts=events_and_contexts + ) + + # From this point onwards the events are only ones that weren't + # rejected. + + self._update_metadata_tables_txn( + txn, + events_and_contexts=events_and_contexts, + all_events_and_contexts=all_events_and_contexts, + backfilled=backfilled, + ) + + # We call this last as it assumes we've inserted the events into + # room_memberships, where applicable. + self._update_current_state_txn(txn, state_delta_for_room, min_stream_order) + + def _persist_event_auth_chain_txn( + self, txn: LoggingTransaction, events: List[EventBase], + ): # We want to store event_auth mappings for rejected events, as they're # used in state res v2. # This is only necessary if the rejected event appears in an accepted @@ -381,31 +415,295 @@ def _persist_events_txn( "room_id": event.room_id, "auth_id": auth_id, } - for event, _ in events_and_contexts + for event in events for auth_id in event.auth_event_ids() if event.is_state() ], ) - # _store_rejected_events_txn filters out any events which were - # rejected, and returns the filtered list. - events_and_contexts = self._store_rejected_events_txn( - txn, events_and_contexts=events_and_contexts - ) + # We now calculate chain ID/sequence numbers for any state events we're + # persisting. We ignore out of band memberships as we're not in the room + # and won't have their auth chain (we'll fix it up later if we join the + # room). + event_ids = {event.event_id for event in events} + state_events = [ + event + for event in events + if event.is_state() + and not event.internal_metadata.is_out_of_band_membership() + ] + if state_events: + chain_map = {} + new_chains = {} - # From this point onwards the events are only ones that weren't - # rejected. + event_to_types = {e.event_id: (e.type, e.state_key) for e in state_events} + events_to_calc_chain_id_for = set(event_to_types) + event_to_auth_chain = {e.event_id: e.auth_event_ids() for e in state_events} - self._update_metadata_tables_txn( - txn, - events_and_contexts=events_and_contexts, - all_events_and_contexts=all_events_and_contexts, - backfilled=backfilled, - ) + # First we get the chain ID and sequence numbers for the events' + # auth events (that aren't also currently being persisted). + # + # Note that there there is an edge case here where we might not have + # calculated chains and sequence numbers for events that were "out + # of band". We handle this case by fetching the necessary info and + # adding it to the set of events to calculate chain IDs for. + + missing_auth_chains = { + a_id + for e in state_events + for a_id in e.auth_event_ids() + if a_id not in event_ids + } - # We call this last as it assumes we've inserted the events into - # room_memberships, where applicable. - self._update_current_state_txn(txn, state_delta_for_room, min_stream_order) + # We loop here in case we find an out of band membership and need to + # fetch their auth event info. + while missing_auth_chains: + sql = """ + SELECT event_id, events.type, state_key, chain_id, sequence_number + FROM events + INNER JOIN state_events USING (event_id) + LEFT JOIN event_auth_chains USING (event_id) + WHERE + """ + clause, args = make_in_list_sql_clause( + txn.database_engine, "event_id", missing_auth_chains, + ) + txn.execute(sql + clause, args) + + missing_auth_chains.clear() + + for auth_id, etype, state_key, chain_id, sequence_number in txn: + event_to_types[auth_id] = (etype, state_key) + + if chain_id is None: + # No chain ID, so the event was persisted out of band. + # We add to list of events to calculate auth chains for. + + events_to_calc_chain_id_for.add(auth_id) + + event_to_auth_chain[ + auth_id + ] = self.db_pool.simple_select_onecol_txn( + txn, + "event_auth", + keyvalues={"event_id": auth_id}, + retcol="auth_id", + ) + + missing_auth_chains.update( + e + for e in event_to_auth_chain[auth_id] + if e not in event_to_types + ) + else: + chain_map[auth_id] = (chain_id, sequence_number) + + # We now calculate the chain IDs/sequence numbers for the events. We + # do this by looking at the chain ID and sequence number of any auth + # event with the same type/state_key and incrementing the sequence + # number by one. If there was no match or the chain ID/sequence + # number is already taken we generate a new chain. + # + # We need to do this in a topologically sorted order as we want to + # generate chain IDs/sequence numbers of an event's auth events + # before the event itself. + for event_id in sorted_topologically( + events_to_calc_chain_id_for, event_to_auth_chain + ): + existing_chain_id = None + for auth_id in event_to_auth_chain[event_id]: + if event_to_types.get(event_id) == event_to_types.get(auth_id): + existing_chain_id = chain_map[auth_id] + + new_chain_id = None + if existing_chain_id: + # We found a chain ID/sequence number candidate, check its + # not already taken. + row = self.db_pool.simple_select_one_onecol_txn( + txn, + table="event_auth_chains", + keyvalues={ + "chain_id": existing_chain_id[0], + "sequence_number": existing_chain_id[1] + 1, + }, + retcol="event_id", + allow_none=True, + ) + if not row: + new_chain_id = (existing_chain_id[0], existing_chain_id[1] + 1) + + if not new_chain_id: + new_chain_id = (self._event_chain_id_gen.get_next_id_txn(txn), 1) + + chain_map[event_id] = new_chain_id + new_chains[event_id] = new_chain_id + + self.db_pool.simple_insert_many_txn( + txn, + table="event_auth_chains", + values=[ + {"event_id": event_id, "chain_id": c_id, "sequence_number": seq} + for event_id, (c_id, seq) in new_chains.items() + ], + ) + + # Now we need to calculate any new links between chains caused by + # the new events. + # + # Links are pairs of chain ID/sequence numbers such that for any + # event A (CA, SA) and any event B (CB, SB), B is in A's auth chain + # if and only if there is at least one link (CA, S1) -> (CB, S2) + # where SA >= S1 and S2 >= SB. + # + # We try and avoid adding redundant links to the table, e.g. if we + # have two links between two chains which both start/end at the + # sequence number event (or cross) then one can be safely dropped. + # + # To calculate new links we look at every new event and: + # 1. Fetch the chain ID/sequence numbers of its auth events, + # discarding any that are reachable by other auth events, or + # that have the same chain ID as the event. + # 2. For each retained auth event we: + # 1. propose adding a link from the event's to the auth + # event's chain ID/sequence number; and + # 2. propose adding a link from the event to every chain + # reachable by the auth event. + # 3. Filter redundant links from the list of proposed links. + # 4. Persist the new links + # + + # Step 1, fetch all existing links + chain_links = {} # type: Dict[Tuple[int, int], Set[Tuple[int, int]]] + rows = self.db_pool.simple_select_many_txn( + txn, + table="event_auth_chain_links", + column="origin_chain_id", + iterable={chain_id for chain_id, _ in chain_map.values()}, + keyvalues={}, + retcols=( + "origin_chain_id", + "origin_sequence_number", + "target_chain_id", + "target_sequence_number", + ), + ) + for row in rows: + chain_links.setdefault( + (row["origin_chain_id"], row["target_chain_id"]), set() + ).add((row["origin_sequence_number"], row["target_sequence_number"]),) + + to_add = {} # type: Dict[Tuple[int, int], Set[Tuple[int, int]]] + for event_id in events_to_calc_chain_id_for: + chain_id, sequence_number = chain_map[event_id] + + # Filter out auth events that are reachable by other auth + # events. We do this by looking at every permutation of pairs of + # auth events (A, B) to check if B is reachable from B. + reduction = set(event_to_auth_chain[event_id]) + for start_auth_id, end_auth_id in itertools.permutations( + [ + auth_id + for auth_id in event_to_auth_chain[event_id] + if auth_id in chain_map + ], + r=2, + ): + source_chain_id, source_seq_no = chain_map[start_auth_id] + target_chain_id, target_seq_no = chain_map[end_auth_id] + + if source_chain_id == chain_id: + # Discard auth events with in same chain. + reduction.discard(start_auth_id) + + links = chain_links.get((source_chain_id, target_chain_id), set()) + for link_start_seq, link_end_seq in links: + if ( + link_start_seq <= source_seq_no + and target_seq_no <= link_end_seq + ): + reduction.discard(end_auth_id) + break + + # Step 2, figure out what the new links are from the reduced + # list of auth events. + for auth_id in reduction: + auth_chain_id, auth_sequence_number = chain_map[auth_id] + + # Step 2a, add link from event -> auth event + links = chain_links.setdefault((chain_id, auth_chain_id), set()) + links.add((sequence_number, auth_sequence_number)) + + # Step 2b, lookup up all links from auth events and add them + # as links from the event. + for (source_id, target_id), auth_links in chain_links.items(): + if source_id != auth_chain_id or target_id == chain_id: + continue + + for (source_seq, target_seq) in auth_links: + if source_seq <= auth_sequence_number: + to_add.setdefault((chain_id, target_id), set()).add( + (sequence_number, target_seq) + ) + + for key, values in to_add.items(): + chain_links.setdefault(key, set()).update(values) + + # Step 3, filter out redundant links. + # + # We do this by comparing a proposed link with all existing links + # and other proposed links. + # + # Note: new links won't cause existing links in the DB to become + # redundant, as new links must start at newly persisted events + # (which won't be reachable by any existing events). + filtered_links = {} # type: Dict[Tuple[int, int], Set[Tuple[int, int]]] + for ((source_chain, target_chain), links) in to_add.items(): + filtered_chain_links = filtered_links.setdefault( + (source_chain, target_chain), set() + ) + for link_to_add in links: + for existing_link in chain_links[(source_chain, target_chain)]: + # If a link "crosses" another link then its redundant. + # For example in the following link 1 (L1) is redundant, + # as any event reachable via L1 is *also* reachable via + # L2. + # + # Chain A Chain B + # | | + # L1 |------ | + # | | | + # L2 |---- | -->| + # | | | + # | |--->| + # | | + # | | + # + # So we only need to keep links which *do not* cross, + # i.e. links that both start and end above or below an + # existing link. + if ( + link_to_add[0] < existing_link[0] + and link_to_add[1] < existing_link[1] + ) or ( + link_to_add[0] > existing_link[0] + and link_to_add[1] > existing_link[1] + ): + filtered_chain_links.add(link_to_add) + + self.db_pool.simple_insert_many_txn( + txn, + table="event_auth_chain_links", + values=[ + { + "origin_chain_id": source_id, + "origin_sequence_number": target_id, + "target_chain_id": source_seq, + "target_sequence_number": target_seq, + } + for (source_id, target_id), sequences in filtered_links.items() + for source_seq, target_seq in sequences + ], + ) def _persist_transaction_ids_txn( self, diff --git a/synapse/util/iterutils.py b/synapse/util/iterutils.py index 06faeebe7f5a..e2428016be61 100644 --- a/synapse/util/iterutils.py +++ b/synapse/util/iterutils.py @@ -14,7 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. from itertools import islice -from typing import Iterable, Iterator, Sequence, Tuple, TypeVar +from typing import Dict, Generator, Iterable, Iterator, Sequence, Set, Tuple, TypeVar + +from synapse.types import Collection T = TypeVar("T") @@ -46,3 +48,32 @@ def chunk_seq(iseq: ISeq, maxlen: int) -> Iterable[ISeq]: If the input is empty, no chunks are returned. """ return (iseq[i : i + maxlen] for i in range(0, len(iseq), maxlen)) + + +def sorted_topologically( + nodes: Iterable[T], graph: Dict[T, Collection[T]], +) -> Generator[T, None, None]: + degree_map = {node: 0 for node in nodes} + reverse_graph = {} # type: Dict[T, Set[T]] + + for node, edges in graph.items(): + if node not in degree_map: + continue + + for edge in edges: + if edge in degree_map: + degree_map[node] += 1 + + reverse_graph.setdefault(edge, set()).add(node) + reverse_graph.setdefault(node, set()) + + zero_degree = [node for node, degree in degree_map.items() if degree == 0] + while zero_degree: + node = zero_degree.pop() + yield node + + for edge in reverse_graph[node]: + if edge in degree_map: + degree_map[edge] -= 1 + if degree_map[edge] == 0: + zero_degree.append(edge) From 85348e15edc1bb7deb3adba5bfe02af92cdba7eb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 2 Dec 2020 17:36:34 +0000 Subject: [PATCH 03/44] Add some docs about the chain cover --- docs/auth_chain_diff.dot | 32 +++++++ docs/auth_chain_diff.dot.png | Bin 0 -> 42427 bytes docs/auth_chain_difference_algorithm.md | 78 ++++++++++++++++++ .../databases/main/event_federation.py | 4 +- synapse/storage/databases/main/events.py | 2 + .../schema/delta/58/24_event_auth_chains.sql | 8 ++ 6 files changed, 123 insertions(+), 1 deletion(-) create mode 100644 docs/auth_chain_diff.dot create mode 100644 docs/auth_chain_diff.dot.png create mode 100644 docs/auth_chain_difference_algorithm.md diff --git a/docs/auth_chain_diff.dot b/docs/auth_chain_diff.dot new file mode 100644 index 000000000000..978d579ada1b --- /dev/null +++ b/docs/auth_chain_diff.dot @@ -0,0 +1,32 @@ +digraph auth { + nodesep=0.5; + rankdir="RL"; + + C [label="Create (1,1)"]; + + BJ [label="Bob's Join (2,1)", color=red]; + BJ2 [label="Bob's Join (2,2)", color=red]; + BJ2 -> BJ [color=red, dir=none]; + + subgraph cluster_foo { + A1 [label="Alice's invite (4,1)", color=blue]; + A2 [label="Alice's Join (4,2)", color=blue]; + A3 [label="Alice's Join (4,3)", color=blue]; + A3 -> A2 -> A1 [color=blue, dir=none]; + color=none; + } + + PL1 [label="Power Level (3,1)", color=darkgreen]; + PL2 [label="Power Level (3,2)", color=darkgreen]; + PL2 -> PL1 [color=darkgreen, dir=none]; + + {rank = same; C; BJ; PL1; A1;} + + A1 -> C [color=grey]; + A1 -> BJ [color=grey]; + PL1 -> C [color=grey]; + BJ2 -> PL1 [penwidth=2]; + + A3 -> PL2 [penwidth=2]; + A1 -> PL1 -> BJ -> C [penwidth=2]; +} diff --git a/docs/auth_chain_diff.dot.png b/docs/auth_chain_diff.dot.png new file mode 100644 index 0000000000000000000000000000000000000000..771c07308f08e21900cb8e1190471f803ce1ab04 GIT binary patch literal 42427 zcmd43g!LBUKzK|wnpz<_sf>AbtaUvNg!5~5JA z|Ni836vsnBkwJYC6;koYI9hSnQNDc-3-k8_f}wDFZ};}8cr+%4g5Pi4>=r%^xsut_*ftL|9vE!h35Z%1F;&z z8*oW|(9)V%j@&h?uU~etwwKT*+;)qy1EwjU6!_)nfv?A zmT!}htPj(ZG_<;h5TPn63@qPjYp105tdbC+5)uD43+xXp{NT}klAfRc5FH3RGJ5@R zn?r&ChOkm9=5r3cUzq$Bjq2hiOz~AGItj;Bs;Mcz90@8dT}blQ%TCsVDxQ60)X*9U zYIb(r>GxJ?UFN;c{9^Yc1}dt9LjV|*+PVKXw9UhZ@2&nymhSx4#qQeatpfH|O1B$I zv?eUBw_{hwe6#Q4SL*-#JueDP%UT{oH8XQ?Q890$nrT^1$I!l<`Ykl>52M^iu}B;_ zs8p(;4quDs>iRCsVG9m&sHl%ttI<(Y`nIx?Z_U)Ua&<&oySy&EA2xkO7e6KS_ZVux zLoNp@{LKyL$G=yL1S9Z*mnzouScuo~IW72f*lFp17fM$#mN|GOB$`{hSBR^sAUr<& zIy%4hdU{~(?CJ$QCk9R4Au6heK6p*f3R4Q(euNy7W8@aWp`h^5!xf#mIzYt~pKu=^ z8``0gpyA-u)G9K)nQr)bL)d_s^^T2mt=>!)ZOp{mxsH2@ojposwj*=@Yky!m;hj%& z%kQ3io{p8}^@F!)G3jMQ5NNQ0xeA$s6zE`S%CNO~4GksbBF4t!;lI0uJ7$_1j;kFoafDKfC9&t;@`q zCT3KQV#!B>wHh9s-jbDt{KhMyu(VO}hosPzHR2dC7bsn8&)YP&^~~X}uAbeft({+d z%O>UJBVEoQNub0~Q4u8jb$wV;5hR%NqNzUjOu(<7c_R}CIe2Tnm$lZHVzk(t(p^}3 zP!xx^Yb~Oi`5-4N>n>ntRy1ZcIy#5z=ZAn05FD)Nk12&qKrr2+tGVEdi7+vd;|HBy zIcCem$|!hqO;f5 znVRA4&cW8zV-*(C&>wbMh72z+w{q6&=|N2u6iYrz`pghEB`3a`l;}G=l3oAATR-Hg6gM0T1u;E@_k! zqt@2~rRf>qA}~VkwY}4NBV!7ztap#^Dc|JgBD@uH_|)C~Ymbn}4l=gwl#pDGnLc<% zuRbjX7x5oj%*p7d%c*_|K1z-Ow~>YhFo>%wC$_eX7i-UF92`7uEHDJBxFkY%_dB9x zXY<%nLYm%CjmRgT?Y;aQsQY^w#t=~$sH7x37SWDl6F-)YkE|>**EtF~jW~&kD`JEo zrZFrn!)iW>ijvVGDk^@D42p@VeP0xigtNU%K@I=;^T4Q>Avre>^smuM?>Yj4@Lz&= zkL9;~0_y~XGhg(SzHJ&MB&M zM2`>4S1VZXN!~%}>px_}1Uz`T+OdX>ZgVKdo!vt!%NoQ%ka3wP=%sa? zr{`b;>R9vg(b2ySNV=A8Y>9ML918k&c;S^c<9T}0&eSRKzPSXlo~Ksh^$N|cvJ?OTY7 zclY#ss#UPC64oO@M2bre1-rDJlMmb22%)27Mt}SE?1sZFA@IAx_scyn`zx32UE$n- zY9{f>{TC9E*TvRCmAnE`zO01!K!I0LWlS5}xY&Td`EdzBiuqn7Vsh8AU*1km#U+JY zxnSYK3Q8huZniov&CJY`qcZC1(lDstE-Wkz3<-PlWMq7@!us*!lNoXd+NmdG2dZFg zGd~yFd~E&7GK0HNZirlDny@kG&7Z#RNhSX%YjS31&o8nAO4eHQwLrayJ zZ3)Dq2y7%q_R}SUw)!2fXrf6abcq>0&KOJQOBhenG&ke@yHfb;kuBO!^1G+QY0nP;lm%i3JN#w99?xS z%tOD#e{!WG@kZ|K>_fH7QKE}WDjGY^AadY>d|bXe$K%f+sbyABQQM3W04YcvUR_-# z^_%6?*?vB(^<#P6W`5~gYPt0@Ln5w#S+zHCMi(YBS&?7j*S8DeacH(;rQ#BU>)A56 zzB=#as2FWaj-&=sXXW&ebJ8;W$>N*uApG#b!MUf|F@+bO)oda&3|80kvs}inFib8# zqR~+VwEm3G8tr&rzxGAj7$}s+W<~RP*O;SOS+_mjlqw(im4A&MJnji~+5bEv&(5FR8AH%G-88134TavQ9zXL67#gs6z z{0NY6`I{wP^g1JtmulUatgPPQhqX8}yE6(4M-t<>zAi2~-hEP1K7Tw_pkwxA*&PS# z=;OV}jp+?l0?t!oONdGglN`}c(sQ)nu^k7^9E@%Xm6^%qiU^OozAp5=H-!sbI@##v z2!5Z8q!6R)>6>i2!xI-T-^{a<@V78x2e}nBbLwakRdsSY>?}_>rmo0&4dYN#8Wwyd zY``MX$kJ#sa*KP#C}&!+_iqW+4CMa4xS71|Qt zz9FQ`?vB2{zwbkpMC0Zj5P1EPkr@^U>v<`wq-Av!KqK|{d-+(lCty0!J@!I#+z@`i>!)bA_^!QYP77^W8rvss3@?f= z;>euJ8d{@4N&Yp1+w{o7r651h=ZSga{Cfn_{U*|4hd|Ec)L_utH)JuqMmc_uj9~k} zbUuX7FU0*4{P9DWn+F9Q(-fjY=d=!m&!3T!8mC$=DG^!=3(2;g8%SXW(`vs!d={6C zt6;bocn|i7sl()gC-e0wJ}4NShv%mkdz@q>F^Z{8e@uMP8zVb8VB3M62ix+d&G!P? zkwjlcMhDAO>xqg^(xt>2?WyJDC_lv#@t7T1JXk%t8JSB^P)MpJ)r#c8{g_tSJd%Nd z8Ca<6LoJlwcKT^YTw6nyFA@Kf#aKcF1&YY8+^?($uIZ*)N!{{abcL&R)-Ck z*-F+$O=P7~m;PvC7@obIfSa zr1uyR_%I6w$i|mu&kgywwzetEu%y00?A8t^8IE#pIV~&ZUZb`+NP~mTE^q0uL(uw9 zyADfK zs=YluZ5QhgR@Wx?8oe|JAFdD6)6*YovaKrz@gZ2HNufwtFAKndiIJ<+y!eQU%{}T8?TBhg)n*#=5g8{ zxVmyN%Y}C0VL-ODOyMJBHQ!9uznbc+3?x6C1AcC^SkQl-q&+h;qp77emM!o)Tl@wF zI^cHIyZW`##MBh}ZfWgh_Eq&-Q(Ifxz`#IRdFr{7`ThHzzkh3MYHIo~`ufE6_4T28 zdwbozPJeum&dbY#YQNizSRDR<@kK#lqBz^ntLyjG?^@$OlRp^r?E;Ei40;ibY-J~> zP^OmyM*T}wTHTJdppbAG6Cloe*4b0uz2VqPjkea7mgop?q>~t+&u*6NS>^E=I$rPP zyUpT#AFo!TQ9)cTvTbe%Rq3)SM6fkIW-JxUXX`cDWr|YJ($L^B8wFKX+WPwPT_zYh zX%|uqC!y2RJ01KuQD3dQ8yHa`wJ|a%SFiSaJZ>5t8L86mTHPv*jz-n}U?Cv^AKJ4} zYb^1qK4Tt%%LM1p)f=kAh=z))*Wq~#0Zuy!DeFT~oBZ0~iWCluP}>Ae3LZC&mmW0fLK@^Ma) zF454?kc5OJ?E{?j>9)z{wy2fW)ry?I@A-cq4*g#&n{h1dZd(ptE{2)dZ~gCgaG}iW zi}aXf?C7yfCMhw9Djv7^y`!5uUby?Bd)rPS<|#XU8qDrjkW(CoNHL`0-ksdc^Lz6Si|+M-E9TfJKE892!9?yj1)_T$UrXOVjd_iZtkBN6^)IJOMRZ@#5`^Z z2?-3kt@ZsE;03S}8X?C~mA?sUsIh0;d-oqdei#`Usa5Ow&aq#ufx>z4q5YX-aS3HY zi?mclqXF@9ccZizd3y8L$g5rkpZ5(|;m_JyM|XGsgTmw#-|x3Co-L}`Q=mh1{hKG2 z7HY%6!@w}2emLm>O#b==&%vS5rnYu`B`uA_fB)vR-MIPS71+G^P_NUcW!FznFDpA+ z-^eJ>EX>d;z32DsT7DTKEp4k_gWCtEdlqP9&gqI;yOq|A3?ktH=W#xBYwIPCO9n)x zQY?Sfejc|Y2L}hJ8mk$)-G!AlcW)1m3?BEY{QSTL_(tzrJK=#w+oir}Vr?xgI-Cge zv5Y~py1Jh~5y*#=nbc{pf?|O^P8aGV=n}I)DP34-xk+-S@vdh#8^J>jgVoUb(|CL-E)YLTQQ5b)y-0A^%8RV`JaSFc+BX*zs!yyX36gy?4h{(ZA@245s0 zCnFbEbAR{d<|cKPhj5{i@9m0v^U?r3c~Jk>3ipK&g6DC=+{oxCNPph`ZF1jxgN8qd z6MMHtIPW0{A|fKs0ekyLk*uM{5c8Oso4engZOqQjX8S!1_Vt;8e>k-~tqL$TapcBd znFR$07Zwy)FfnU*`QLx2ts|VcM7LRys;Kxb#J_I^qBnbKXJ_ZM^RcR_$z4xRFQynd zX^E*zCNIyWeJnjZW}N@Mo?b{+yF67q-}BYfh_6}K%3_0cgMLnCPSCqtg_18YsWQ6| zNUP_i*>366lMgj@aiw}ktTDCt5zu)zYdXuceaU%k` zedj$@Kv@~x?gp;D540c+7K<$G{7=(}-42j?T8^daBVyY6O7AgK)6>VZ__VzZ&CU1f zEvFz52m<*t1Aj_>{SCfV^7VA7$zO^dwi#Sfe;F89l(}WWG5-)Z_)4YHZ{NPDRq2#^ z-t?deys2^A5_b~FPA2h(&Me?0-;NP@`5qpww9LEG@q4nZeVI#t^<*VaYQYdO@;MJY>jE|!U z8XD&AMgc2=MJ{J`|auy%&)ymXbU$z4Q*}M*x?Gal`}isqxGa`9{W@HIljdzbONtWm)3Jt*BYJR=N>5QjrNAtnfO!~VNl9i8%dT9DK0KzEAu4F>IiC zHl=n4^iS<@+tcxjNB*MD&MAV+%4)UA$MwEBO69O!3_&4elLosE3k%x?%6Fl3^6GVg z6*V&E%D+uSPa>Mma&{a>pE zx`vHo_;9eW>iwP{&dwa@>FKFHdbu+>ZdU8p=^oOIULVfAf@;Qxx{ip1g!K09!A`1~v6j>_f;e)|je%Y!)E`nzxcAx%ubk&x(UG8MibFMyxg!o|h47BDq4 z!@xn2%>YZ^z5CjT10* zbUh%(Q=u#m!TwT~&#gjzygBF~Pg_D>aa+O0#*X2CD7nGt*>+RM?YxI`-ra9CJ9b2t zltN@?eZMz$aMAo@$;!q?MovynQgYw%sZOc%>7Y0ps=1{lQ@~%3F`1Otf4x*$&dTi`JgJJDAX`M?4)iL}OZf*tjDlxLiPfu$#hJpF{TcD9@2hG9v?+BxD zPcj}L;d{8cg2*1dI+!Y!P4Dx0b8>r`_c#0M1(@9acov)k@;uuNC%yONx{c!M**ZA# zV3Ba%2?#j7IuotJr!6f$WGtzws`~K1E|_P3q@}gAv-{T@g3itB=?hBy5~+`1MP-;^ z&eqHg1HTF#i#La(>&#0CnjPO61E|b2av~s^G0RzxWkl` zlr$gL@v(S}q%e1OcK&l6HVN*n;POAw-B=B>pMRyix;WT_4M?xUy84( z0_!Qu^SLrrj|>hP)-9x_rapf^Igyf*w4lM~PFQwgS%muV?j0%Vk61xb3ggfVs80u1 za`Mz;W7~fvuA4v4RZ1s+&m>O$JoTm%4jG0jbIJP_gE6^6BR4s5t>}&{sJ`)sX5IJT z|1_JTm+e-uh7dpjwz0R*2HAs&>7`%b<+o04sXI%m&DH+jwD`GelR3uCeM#|1vY8n# zTZw)t>A_fXfCMZrWoJNeF=bCbYJX3}_Iz^0T#79&L^n5QF?vHw%j`yj2Bkb=+mcbC zCp&L*F+?v~IxUUa#Kez4F7*L?wT9M@c@3vFT29*cTrT^u$;opd>n%PGzdWwJuHO8L zBMB1T*dC0}_Px<8%=!@#dpI}vJ_Z0k6*`ti9jy7CSpWe78R4|UoMp{gHrcY!U6(kO zKhuG0`_!MBbNtDKpi1w9qvM~x5Sw{}{jd+Df<=i->Q%7-=`aejOr2h8XVs9z|zNngnt8!360-$ZvG21|)?*ivwC zP;w+Ki!=#dM|>%0(vj;=xX0B=fT58+zgJn4lOq5D9{0aAdGp->Vd`#yFmt+Jw$FaX)+7tpDxe|j~L$YV1n zVNy#PQGnwnrabb#w1)!s+>DstQ?2*=%DJf;6!}63V_DM2_rbyc5V4$DwTn^CKWo_z zE!z!qlDe5+7dz%Q1OG03X|(%bX!wTg@89jN*AJX_`?Q~iOzNBBkCz6U9RUudr)RRl zO%VscKVNQ-*gyT40my{nN=lO2>9w$VSEXg)K4;M9XJkaEo)?3^+r7UnWU{%?Ko8uV zQ?e(L?6D1CX>~we^!T)$W+;IMNqB zu5G7`&w(A@=Tq{rQq0D1NsPVqmd*ENdV#aSNb}9;-Zufmy_>x$oM`#t_z!uJ2`>XC zTuyIz+yUmv#j_QR8=GM>QT`J@LMnKxAE2Xt&)7cvV2~+!wB_;{1>cFuZ-MZ&zC3c- z;s#`TJ^*>`URangysEu3sBbVFC$U+JzT7jki>6fub0YC4a_@|!H@(o(1`skT(J%Y1 z+!p-WT8-v+5jaWlvrYe}1>nnhy&~iUP;ciWs(RJ0et73ygA*M}$_ctZDz0vu`-%XL zJu?Jfu*^kCy){S;j{ipk$W=cv(m9Ryu<6 zK0E8t=p>pd!rv4YXqu_s6xr%S0ly4{cYNXkcC*4NlKK57Oj0JO1@zkT_^DFC%Q_j{ z!I;zz4{TV7k%XbCtjyL)4@$!-dL&B9acSX`N_UT-$jPLKoNpaC;S0Yn8XFNe$+USg%9 z5~Wza+&TT;TsEWRrFDPXNqkcG)qe^f0ESHcVPy`fr|N`JXxsZ#ceA7!tKk%X+*M`J8ZrIOJrxSX_63z5s&Fh zVaDSlP*dxRKK)7=N5-nW!cpD!`)FV8`#lNyTyu*jSZ*{K%v|@vT*{+{rnvA zhlCkMr>b_#n`~xwbMk}hsqX;>HxD}A_k$_m`~V{O{))wiLrM^oQzF^HH#{Qh$ebYM z;YC3X{azd}l5ufow{z}T)3(!d?*&LbJ)6B6E~jbjH1VO{z)}2T-SF5E&(A-xvbrTu z$rTE_ag1e(Xc0_KDQY7*JOAkilvE_%SHqrnL^AOIU5n9)`SCkBIh<2WPB~uFK?Q{v zf#2f?o^PSQGdOxDn=1S@68Y&I9?)=cU;D&0wBM6MfBEt~Ld1pv24+7XyHk>KU{G=I z=Kk^>f`M5`2~8wPQ~{+-1_CFiap*j~2o@E%dkodl5o}GqI618)R1_~7 zRZvbd9zMRLw1_yIlvIw=*M1S1NbcJFJgk=&C0tw{V+pIWva(iZ9v(t$Y_d0?Z6)*R0Ux-4;k`|DhiNSdk+dD#(Fi`OD$oSuW{mSBm=na@OyP=4Q(J)DP zeqLN!VqvjKHEF2%HIWP9)8E+m{$0?3jDUdPANV*syL)tWw6|w&F2RE!5<9G@lsER& zEv(a=!w64I-1f@;^(z9Jgar*T@xd2H303T&VMUD+x$AE*(=*Wi&(Q2Vo;}_BBqAb* z5MfK&pFaVB9vKN)Ue?vsO%J#yA$c{x(2n1DdAaJE!n~ZDa#;@!9vBZ5g%RwCm!A3> z8k!E7|8X7>qphl%+l*&w3emQtrK6izcngJ!dUhRL&E(^=evCp&diT)1M7X*N>zJL? zNsCod87vr3Mu!kwQua*{D)$}C*VX`5*1&2e3``z9pY!n>H;E_dr2e@Q8!`AOG6CkaB=hDX;H?ism=ZD;4;92kM5 zqr0&I1SHMPzeZ8U#)@tQtgS_qF(D8#jEz2&@1h{BMiIV?m_a&(mw6~=Zb8cR`btjQp6#=`4$oaM@UErnsEN?@mA-C<;h7UP0fYr=|5ZNK0ZDf2dJO& zt*n&P$>*x`^MqIURMkZo;h~U_JUoB*!20m>@~-c~@snm{L7v3`jOeQi+ot|+J7j~IM;BMu!l6p&WU;rj3|uWX#Gvi4+V8rs?>=%4V@ zBtn*F3)tc}DA28}rc1|an$(l^UtZ3x#T4EFtKxB`?3I0r+RVpwb%iiJtExg?Yj28L zE02qCdAPsz2L{4=w5Oi{Db*r@4x^1N&=z@nJ8%K~G=0UFr9N{-HdN1vi!Ub1-d;jo z{Z>SH+LaIR&p-fV`Bf^oh^j;dyb%66t0q}!VJYOys--pER775Ec6`sQct3@*aVW!>wy_1R1EkTvT4CmmM7f3O`g-On*LI2QrBHg;HFYbtiks z^5}?NnQ{G8I(b|Ps~QEE*sj^Yj)irg3SufU##?Tlzsr$DRIVY`uhZ)e(+9@~Z~66o z`}-xHgHxi-ksZ=@dVeRnJB?c~rlV1c^ zS-DbOU(@Vsw6s+I=uCoUX10EAU*Jwn6#XChURl@a0h)3%4|mycjyC4fUvxP+$REE?djTiCWK8@IA@``o_<6sMJNKB0 zZ`0W05fSA8zV7o_+1$@5Cdl^@ZhOvF>1k=; zj0je=vb+of1JjJQe$F1KtE_yhkRb!N%<`k3vYkWLi`B+vT3OG^%cQ8i2(GrOswxm3 zt*D?NgV$5ZTe%zAl|ie1U}zq5@sqDd47WC`RKEcowk@BRA?g&xrhaCE1Z<^F$<;^cFT z@I1Jte=r>YF9DE>$qe5F3q?dsyf>1131DLJi7%9;ZMm3|d=m@|3^1^;0K_UUFAo+r z1aL=8OiXd{o~`sd03+k#CWBoI4-W@t5n=`$>&b#I z5jY>ySk0sVhH}#=3q_zInwgpT`T2p#v9q(oe9b8<8v@fB9zL9{MZt*xGu?qewDk1+ zc7_rk9v*UX$QD-=rgPMllz!R*0&uC(GUlSSrRAgMPm_h8rXoPOU}Td(o7blM&^ztk zwk*Y%jP&MPS{i|~v$KGJ0D!7_G85?IB&DRJ3Y1E-v$JDjV+WNBHdK!c2c;Lx%Xc?c4~`T4nU z?it{KudlCv|NedDq7zQQR;AbR04OkDKpk?XSh_<)ot&JE^FP`+IXRJK+NMEBfr_^2 z_l{HGw{JeazQErB%6ZB*10T_QaenUT>iY5H$I-#T^P3w~1cV&;Jf(z#JQ3mq!->tn z*7uCbgaibTogGy`I>pCt9UbwS48&^3oYmCVM}~)EMD%|6@Bsw{1zLcgOy%xFviSM-T{K7)-pFe=eOGr+B1#k%9SHPw; z$suk+AXfJF{s84zUe*C5E-<1O7J?Xx4*)37HhO4jX@SKi7^ab3N3!_ZlJNs*%Em3! z6mS0kdDY-xX=7v3=kE7pWWRcP^82j;ZRq0S0&uMc>p9>mKmvB)VuHnPfQ^f5zt-tH zob)kD3-iL;%ZpK~-U2Z2FE1~BNKph&K&iY!51irPU~um;)mYKm$|?kv*z1r4M_yhY zgg4-mrIQ)|gNJXM)C{JzBFhyao)#30k8BkH&IJ-QFh1xjK7an~!|y*3OO7-0XVJg5 zu`xO#f*u%}j_v~385y~t&46ZTANa4^(fnWIlAhsVYrxYpbzJ?(Uh~z-3D0{7!y#4C z2MmXVgv462V}5%(FDol6SlrZD~*s~#%^64w8t3sc!b2Nkppv3?^|0<{e>U3o0rrjg5^X!oz_F0{Hh}$ACs^jS>TZ4KK}}m%D&Sw8pUt;m-2d z4AZM3{h}BGLQ@L9qM`yM+d~LsXm}Vn9iXY|sm0h?S;07E0Go5#k@EKD$s~-+y9&aG z0Q{d6ggzTmYHl<(y&5P;c+4E(lj2|?^W~M5%*@Pc-8MB1je)&O;Pz}HWDGd;pFT}2 zF17+u4`VVsJp7c10W#38kf^5P(OJ z11LLfhnSeSrKJT7@b~ZE`j?y%Z}+9#==Jq=z@aeewkC`nNTeca`CK7=8-!U!&NNrMiD5VKC#0^$I;-l#BR@*}{5r*ct;u>Q(rPcpHx*45N} z0%;hq;YCG7;5BF!p#WtEz*{g1fWH2HGml{iH>i&@fB)uc;$dK1ECV_k&|QngiHZqt zbNTp95r7fg+}zYb#RROefk94kGHyL225kQRzJ=Elt(YYnCuea{5jd0Z3jiW57PxaD z9JoIK7SI)uVPP$R(g6XkrKKe-EX+lO$|@uYh6556QPDTY^0eXjkogv8K(fs+Afuq@ z{xo?@49S5*0SPgr$EX%mNlY4h{yk{Fhcx zWAl(9jUvS0O^}z5keJ%n0jdm~LP{QF8+_>G;i#}MS`cv{JAkvu?JdyTpdupD(b581 zB)1qz3GEvwN2gt{ejsbjRq0OsOMx<3ybIrJ6%#ta9ThC79O-~YurW7RS5uq)`xhPL z?2V1Tv$Jm35LIhy>)lAaZ~`A-1>iP-&OjyNB1;}-9|kBfE|Xrn%>v6Yz9WrQ4Y(!4 zR0c#b5c&M~-+y2iAX^5Mr6eN6np7O*ZaG3Otzy|+0qZy+4 zXM*g+#F-ky9zTgWF6>zJBZ35ET={#=&91tm0&62h{;|1XHQ<0MS%du5;cS`$s5(f}0i$mUp>3 zs@>`1X=77yE}UQTHCRvM!?Ab}O%B$smLqkfSP^nx&cpTjyDR4+qJ5UY7GUhT@S?d2lS;7mogr z!^4jD_5>+Yrs~g*n|}(v#3SQO=71Ii5fPD_o4Z+uk(`o|acmx4QKe}$lp6kmca1973cduDcM9a#;dp7V`3WUCtssKerxe8sX44_fBH#Q89zSAHACaVnu zHz-bmWD^V~A>4X?_d6*mc<=kf6{&(w+*<4}_r^~P?|{;Qq>|U=pQXjc!8m~Gytr@z z<%s(j&y+!`@B%PTpte8W98ZzlfnjhMbu4UcZT~sP_C#FFBUPLvNa+ns(1jmaS)KoU zM~6eTd}nQI3z`#~)t?0t z9Y9?GerPfmHGC4DJ5X4E1c=fahjD?`=+QErU0qF4?i_z<=Ii4_;{S9GG(evcLjwavKtc5LdjWa)M|`~Hj*#NBs09CmFjag@TU%3o{gs9u zkU)@-*d@fmaVAP70%CDt;j_^ngvC>3U$R80f5Ga0s)rK+)CpdP+spt>fkz{e`SRtn z=S6{A^}zpj??Xd13Vw~|?jF_2|}PRoD}4fr%T=z+9nRuZU2f>o#uY?u#9_=@Y> zLJ=UB0%ftXQuZJIjx&^?)8=Li$g@RfSU(`$acXI=t8)et73CtCw{PFxAJvVEQlLX| zr1N=RY)?;5x3;#9rgKV&isCUjgcFSH+a>cW(O`kr9JID0B4CY*iW>lbS}uX(EL2KO zN;0#wOa&Wsq(L*~@bYx8S!c%1#&)H_56G4JdbV1P3eC-vlNazUjM|Nna(nWMaDu=9 z10y3xrNvp3yhJ>143)fBQ9T=H_dca6UI428XMzBrxA25amo}-9K!LRQONAsY7xk-P zjok{jmZ!qMCOH4Je%!NV>YgdV?!Py0plE1m6-!n6qJs%0{?SWgnLM|0m*-FpuCBTb zR;oaj2E-2FFe6f2lTRb!RpZv?;^JB_sh!4?rd{wcjYpPPc9eKjH@vEGj3ZTtDp7ig zXQ>TK7b8_PSA|VF9hyc}D(tFA1f?}W_c-(T?ybdt^S@YY>g(!Rx(-U%2&+7-OwDh2A39pw^s{MQW8T5GVx*E`^rNR5FT8cAw*R8jf$1lECO{q6)-rQ(rN}k= zsaO6~qZ&*v9Q{cAadgs*>iC0Fm*i5sQU5*FP{DUb1xFRO6eb2If)~N5!5xWw zp}meZZU#js4mC~_Sv`THMwb;H0ycLWzi#0-`~EyX3;3HqI38a&NouQz7Ix+RObr5m%5B3etjLpo<&SdV5HEp#mwzOU@uNC)Fs`Jhix8Q(k(Ky3yUG^Hdx624~ks z#}no}Gh60>BFQ+(4{33DRL8(CQf+V>!r8DB&7k@sZFe`;Hx9QCxAwQTw*NikB%AT9oPNY)>rA zvRwCC9=RSoc)VDBV`JiDn(~|4I{w)jWzJ!)PEJOK7i8JW%C9#+2DVv>0;}w*s;nwn zZSPHO4T-mLu>PPK`N6gQ2jBAF`7zwM+rvLOU_%3F!hvjs(~Y53g{ibl%Uv}dQg~fx zn#WSA6RyqCu^PSVH|7UijD#0Woo7yn7jAZTp)bw%aT0L5G!^u$as*Hez`v z=GtGc53kXgX3y>D9 z(XKJGhMaDm9`ExBaxec&o(buM$=WC5`l!ra|Ky1WeX^z3fmY&9~|7!I+{|Khq^oL#rZ zcjDraGqC-$tf=gevnn}UN}fo-@op^?o28epbME)=ufsclb^uy3pvl{3mkZCcGacE? z7h80yeKOj;9$7A12G0w^9jV005Npg0eDp11%mZxvO%j#_f$6n{Y_~RFaRhXZx2beN zvJ!L%SwdWc67-7I*V8CL4n2jVp@@;lGP%<~C&K~E?m0p=;o71UiPSK1@ z)lSq_P*#}H`0v+XA8j8s)et@1B>j(4j&`ETrh9cVDYVSpQpSgEl- zH$S&Azp=HW^=IkN_?f}l^}EaGe3<-Am`(A}Xp#+qyjO2suoJ9-IHXdEKWmE4gztlq zc5oThblR1?#l%!z#$L|-&VgCu^x}YhzgB&PY= z_)T>5MRZ9e>eLSgvdS~VQw7GyZ;*&Rlk^Ft!j zusAH0_6yLV-;d0j^+nefgOd(ucHRk7ynhe6(*SAd7JZ$huZG0(KDW>U|Qa>lR!62dK z>YgS7`5-=ODk%0XW)D>~J~vf2>$eke`~fM5G-HCk0Y^5-AvbC_GF`kMjkHRwb3Kg{1CAo1ta(7u0+KpW?^3{|<8OJhamEdcoFa{-$_u?y`tC)~yKup)>=a&eHarR22me|Y%>pn7`4p)!4*Se;Dw%I+JUx@Fa zMmyVrEAcK?p_N?zRQG>cfT^L$8{O3=0q?FH#r4x80Uo;>*08d$W&)|KC{K!|HjWS{?i2eSy(f5)^}eD=1*~I^MN5MjC@)SF8((!JuRW> z+-XL>#@XM!jWz~?fw%Xef}=>FA2umVi_O}OulBe-pYwGj^|-U!@)5OBHXbf-^oc6| z9|UsX(2qZFkK5W$tyX3X`FoP0v>VABzbeWtgF73nbagX-n-N;u*e`n;?j<>EBU&TA z=Y5|Q>F?xrbOb()Ji%tl;8gfDGQ%pM|8g&Q1MOJuA$6C4+t+#KF|sw1h)EL;N$1BQ z-PydTmh)@4?3N2tVc|1-Kr+_z;JPr>a^|&|9GYx=sb#-bq6;YA%=WUquy*Wp3_;Fm zY5B~_IX5%I&daM)jiO7_bv83tePe#gxtB6d$b1@H%S8Exa=$^$DkqM9r)&4QbtmSd z1X2w55|=>w)e$|W{<-4?vzrc<>5r4PyJo7+n}h1?lh&Hy{=vOqW+n#wjz57C)u-2j z@sb||OkQH07(W!#?bVj_^JkprlHc|M9UCb)%!Y-9d1?7SG+f1?`nyz|X$T7nUo>wQ zOYq88-`@sca=dNITx-03n)UkfJo#68&GU*_pIYA-{J#oRyZqrXhbVIb+*g0AklLPf zpVs`3p8tD~i~NiA6YcwlkFQPaq5qQNoyL{1wOJMO2n{$FM~hP6bJ}H$&G~>RhKJ|p z(sS_{nP4X6f7#fs8RWMA#-a;4*VC!4*M9rg63bG}ANAX>W>2JGB>P zrsqzN{m0|??q9BMv%mO+BoR+%KIxr3N`&&as(2nm(b`v}wupx&OLct&SfrvNIxeoc zd2!u6>mv8k>dNg&$P}SC8u9Z+BpP+F?Bq;ZsgQV8g1!9O3$P%EE8FvBsXrrO+sMB8 zSH*-c&EDsVRCL;n#tZk8m-ceLm-(rre=#*m+9-mK+G{SOvR55y+~$9J41?HzurBZD zc%Cej1_;OR->skVFKh-EOqY^D|C!BqkH30p`vbZ)IjZD;Xy-<18t`pu` z-j;!Xce#^o%pYzEv1q+p?RAx}%1;a(`%*Xw?iGn7=D7((T&3IQ z^0REos8%CF;v?fzi9^bG&ERWR`@@9QbAzd+<=Y$pE54Rf1o^ma*S*b_M{q==y(=N} z)L{WerG$wn1f~^2FCg?4u8j$*F0>LFXd7S}G=i#>c{z@`+ z=QgFY-pX^?OB$CmKulT9!RH^{+~rp`w_B~> zWoK)9yt_NQyu3U++vvO}2QWq+o;J{tbToRa0}jWBKfBjR7l=c174{o|KTUY+XT-&A z?C)y;=$97|@K->q3uY%I^!q#ddm!gNJUl$TTKyoM1VA5X51^uNNS3Djk&}}nFEDfZ znq0d#Nm6H0HxsIMX!*Q1mMJYS&qPmuI4-ric$w7e6rq3VH>0Z3B!-!_&$^*rp-~G+ zp`;`na9}hU#DD@#8OLP;Ii}yts%nPG7*k6ZUe3pG7Db>`91=Bkqd!)AshNaA2P#+a zup|Kjs!#xg!n4L<-a zLGtjiqR%^FK%;^4JfKbljJnj*$83z4jI-(Z3u6+C3k!F4cg+AQ<%tsNgj$t=Q| zZK&cdjHxI8mI))w?P#CGUg7_{(JJ!`53s zRk?Lxqnk!r1O%i@L>d(76cGVAbazXMbT<}KB3&ZgDV&qJaS0KupW@Pn2#JG1pC)ea~_=K&btSwj9^opYW=&;Cm9gA?$J zTwGjurmX;|#Em6;2y$MMTem7ak2bunW6y0*rF(OpJomfInNXL>I*6^9v;`sTz<|;l znIB(nROh`}7861kZ*PcH*MGh-FP(O4?*Hw^n)vF4Ng&N~5sGnVi(^|{nI*GcT`t3+B!7qL>~t;1}-*yV?Tbr^OD&Dwd@F zf$!fbMLqe$+EMm^RS@Rm8|ms=18nXq=J@&T*6CtxCgN~fahewJVCe$MC%2_I~pDxmyY}{P?pA|_;PF5DvKSg+KY59(TJ-Xxi59ZZHFkG1NCg^dl z9h?H;(%1I=yF3DrSjOOsZ3jOzBp76Xx|zkbW8M$gGospd^0UVuFn)UaJ%cAnfrU>% zAWhWEHDU!s>I&Q4|2mh7>UX|0tNtmo9$`TM?znlmT}KRy)HU^mcuL9f#pwaeEdaFF zyZyKu7$4`^LRsKj}wxo`$#pW zRW{GAC+l}hbBhmOF6@$Iqe$;ke+hS1F3i2@V)~9Hd-7MJ%u=Fa`O913x+eV1h1s^A zH=C2HSjuE#a;;4?9zPzYAX;beu^<;3IlsJo+2c}Bz)U(iN^e*Uko-Z&B>uEb}cJZfY;Q(H=zF zB)*!zCzN6j4~L=85$WWzsYyT1?#}N4^H(9;QujZG4CTIxTMp`7l^;_Hq$_;b6HrjF zIay)pWyjPp4`)3TV#IOY@!vP8gsvd|rFr+}~0e_^0 z+Igd18 z{>$cQ?CD}mA#LOhkA5}|j_ukgrJS&^u$U4}Fu_LnT`a{J`sU^5>ooiN0B41Sy0@pQ zU=O*kv-Rs2D&olps?%P>E0fL^rQ}D83k%#W34me~QBWX#My_p_?*P2p_d8l;}@XIe)j{;AX{dZ-zj1YU|?zf^Yg ziq+U+Vq(IaG4piJ+9tSI`Eg!UA0LH~()6h<&V#!bDuqM~^>q(g^L`w^re6)MR;8;9 z@36xe+$gq*Zxh9lfBd_{wW}kZm{LR{&JoQewc5{0nR zYOuv(Vq$`Hg^7h_XZmXd6{MmqWBqsmR%ozizmxe4DjC?GMlzzlmt{o!J~|69r90~r zb>JpfN)bXr>+=aH{!$qYdu_N77vKeDQFjj}CV_0u$kFZh&CTL5JtXfs(9pH@^&!sV zBET=|sQuBfAM^^Vb}8Z?;8jRzV~2~m zZQpRJ+v8u+{?2p7^UO^!uef;C5ok?dK(lx2VP(Pe2j3U?^P}V{9T(dGiH}kxiv~9C zN#?M^?2g#q1+@L|=2MH8C-b|fy+76u0U7LF*@vG)@t2_c&BYCM($W$d0@zBkH*B%_ zz&QYsj&z@%3s8vdNRwzV4xgd+_iTOolVZaympGzG7urKA+Z$0sI0p(cJb zstV&7m}XkwXT0Ga*-1>Z1;l$t=W3FW}gFpa*%MfLid)MRgd?MwcQ0&`h{WJz#T8lTHY5avk z`>^rbCDahv71~8@yN#7``|K-H9T?AU2M@81?c(L!A+UG!oja8uBkhfyeNnxRflGEc zy9R5r+1(2uzd@3cKemy&`+a3U^+{D_rO3rzd*eXlzp4kM7DpjIG9^WkDA+n00q#1* zq=!cwmAkehqEUBiHxs<3z+%hQeBm@`1dQ9=kp!;~E&PIx3kZaore>1w3TVEZ9#H`a zOH12qH~la{!D7k-eg&R;QOiR3Ms;ujh+VH#5oYENoDtoz>hVxpf`p?T_Quu=RZCE9 z7}WmIf95dPAoyPUOQq-bm#-|3?ZZi2@C08{h!pMxGvB|c-<``U`_#U8cyUK3r|r6A z%Wn&ROEaR;W<8LIvhhq&41Vo%#O>Oq1<&kdPE}O80v>Wa5t>2%XNRIaEBofXpYSaP z0sdo)O^k~Z2>iWMB&S7EC&>RN{F{bH0rP;NhK4JUA|nwdWq;vDw8zuwF~okm5M?>r z9VU&&sla?4YEh~AcNe$GZRYpiZ##F{qih)YE$}K!lXY?o$}BN_s4x3f9c+$t3Oa&4 zAt^zR(=Bo&3^}n;x(i`N>lCF5xoiM|`-Ey%;@DuS_ma=;tazQHoD7%rA7y3be}+>y zt@smxajmMv%X#!qm{nWStrT9#%d@WF zo&3^L$H@NPlS&Xz8jL6K}p9zK!3`L*oK1n*ino0arf5<3{ee{_A z46nHUejr`bqw9`p+Uk3iZ}IsA8hCd}9V~ZSsb25U!U9t9Dy{Lnw;reH7gvJc>2zRo z8P!gCdJ4r;rf8@Lkm7*6*`UtpsVRFHi5~lAP8ZvItX1Fb#BI-)zdqL!EKZ`GY2Y-a zo({145IOvleuHTH%wr-I*K3Aqc1~}9HSyky!iPMJQkkB|HcrkHv6Wn-dXe_-`tM*HOnPuOe1|KM|VwFVQhA_zd9b~^TCUC+qsd1l!)(0M12r&ErcOWi;y%I{b3f{fw|#Zs@m{lk^#&%Hd8-F8E< zHm!g|=j2S~HOHylmwT>~$1g7K*A{$(ilA$2<>7CBhZS*W6cx2@QRdJdje_)~}9U-qbnJIse_am$U!s&uRp59#f})h4uFzVKKBy z_)n|0yX+o64$?{V;FeM7==CE$({- z&?(_J5u=nV&Tc6-CQ{31gBEcX*D4q9o>#H_9Ne}t*hbZ(MMS(pvL(KQua!O^9? z_5EHOn>obVCem)vHNaBR`_s@tEi%u1?f!seoLL%&ur51Ww(5FJ+uck09l~ePpY|?1 zsy`I>jumKvKtM`bdiTs8fUsEkl|O|y?Tn=t-(1{kCb@PhI!g@TW$Dj7kdksU z!FW2>X_)ST>%&wI;b*H}wSN@U37BC#yln>)y!rx@Xd+W}f z%@y-=8giY+>hvc6!flT+kFQA%cWz|f8o9?w$4Spk4_{(*Vq)BL;Y${qa$VSgbQ{x~ z4i9GHa^(|(#1Pl`0FADBLwFBylXVwkzbbX@s{KUT$tk-TQ#%Z=ihp6?b>-fFH+9= z@&!unwzf7D0;u{pIXOviIv}E_prnbHqs=}|jSU3F22xUiNz)Q(;}a!@z*`d%ey#Tn z)M-4{7~$~cXqHh^VP|WCG06~%@W0d5uQmveyvJ3;Uw?NmuLbk4sn~7xes%cj`236b zK>-GdW={I@!QRRu9n~@MI{5*gbChtgHCd{;7S` z{QV?0bs^Jn7nd`&qvwj{O}Ixn0tsS$r2ec%vKXHkP@ z0;NQy(U9K+!2$ZzHl3)G6_34u^&#{)H1Sj7+3^{Cb5?zpIG#8)u9KjHiJ-!~`{S-t z@ZOcm*k}1oR8y5QLloa{=-Erf``xmq}SodASqR&Y6Ftbu5R*D#s(O-QbeL zh4<0*fjeMonHj~6iU=CmbNh4mMg7+bw-emr5`l?nIP&l%5CgxART|5-V!ZEBQCO=z z&b~tu!$p}DPMr5;jY}!pq!mbjomP4pdb75hzsamN4N>X&wK7>we@En@q&GDh{N!XR zQSC2ZWzklJyI)bLojDQjYVjkXm`d|@^}G9BvEGZFFJGA=XMvyRh=0iw&^m=KA_&Y$@BsW~&b89qu@P|M#B60f4Xt1vjAV(1Zc24H~N=DW_cb;ac6y)cFG&b~r%b3HdHhFN3kb9sb znCfOsV&cZ~vRTT{LVsICu0uhIT@k6v+wQ5k*o^OqN#^~YnRAsDj3Z?Q)fKzB$8gdt zDLuxHkMIa$2AoX<1&65gesc1&^~Y|@)AX9Z@*2?SOXu-C>e^CsiJn9}%^Zo0)o>W? z9pw!-$Q)%;*NN51VYz-Gr>;`o=9Vus9lJd{W-vLNu`#q!m91JvrDeu^vg+w2=n>^c za$*-lC}sL1_qo{4)BNbS!X5qltFDiQJ+d}WdFG3)c9mV6*@UYwIXqsfS&>LTswZwi zNT85^&$-hZ(BWznqv;KEIdvM2t>*0y@9s~J#V&1hzC3W=KV0>U@RofXNH^yWNYtD= z8WOXn^;nLA4K?H}P>XsV0v=>jl{jN}yak2$BqMw85BvG1*&jc6yAAGv_U`@b*Lw#C z70znMK?;)HIP-id4}ALE*o1osy+UL&e&me`Rb@##nYc0w5b$G_=f|8X&kLjQw!2yg z>IyLhN7{v7E}N2f-!IqWn4oaZo)*rNx1qz!_UV3cax8AgRj0AMwQ-TgcWo?K`t!Xo z&1QV6K)sWmwHl?z9~YCUEl%&pbK0lVkbnRYj30#Yd2qbLiKNk`yH+O+(twF)#!ac%irMDp84fYh@@SB%GZOX(5I2 z!HefaD=QDW`_K@N zs6MFVHJtcj(+3w~$3&bfsi}=Z;RV5F)>8~IJ%F98ZEqv@?Vuz_O04@6A1JV>=8RB_ z`+5T+0q+cHEO~{6mHH6Auac+65`%AD^oSrVbJ*(8^Rm(HF*dy!N4)n;1ZY(KK=Q9% zppj7hB`b^h?p-Vl3@FkC85m3ehl6aJC0PFCGlvlp>}0!BRrh&$dBG8K|Nc8~ep4UN z3h8m4qz37EIypO|BWy1uu<7dqd%Aqy_Lu>@&zk&*2-M|Rq1(7SM5!~(~a~op? zNNd9L!6*XErX0}DrYOaMk7*R`)Vm4N*{GGuKM+Zsa3IGC9uqBsj`$5cYUf^>Y_S^Z z>MpLXfHA*(l#Gn?F*k?BAi>p$Hvznz230m=0KNBg@mt)Dl?S|JX=!P8b~elt$%?cZ zfdIWi#t3d>C<<*o9XZ_r0>K#v*yG2@$aFO}@Qr-p)Ynl_0qC#-)SHk+hoB5!{XLus z?+iu%Y`OU}K&K}e3-j}xUcRJ|T}45#lRe8Ep79b7CXyNt?7^ZW5fJza%2Wu?1bk@k z^y2mF*AS408`1`UC((p}V`&R+8{AG{;=pC2399IOIt)?HuCB5&GI+a=m_Y&mT~r0J zA)|j-N&Nh-A%zp9`*Q%p53Z@QeuU_*8|Z=X6gLCx2uLu(h@Y~eq5=s>lpAgUH!7== zJ8V@576MShyV9(FoW{eH&KX%;dK^qtP8;tHobkx|;^HF6l;L3q`ULzxl;JE79w6ym zTPv$aPRnu@7A%5+*^P~&KnLHu_l7F{eoRm3KYDQTtwz|e`*Cdn4B>#G|7dWi zj*gF#gM))>Y91vhz5j79Iw1?o)~OwZ%mqEKsb(pMr5eE{4zsu;7U=0I}*n3As9p2Lx`~STyBg6uN zZ@VXl8)h@8qqE;g-iJbQNF;N#HgPOiI!P3E#6CqMro}JAyA`A8#NXg>o=63x4@t&DwR8;-TCCsq) zH}Bu`_f8T?VTTh{d($1fa&vqB?AgKI9&o7eXT3!8iGo5OD3^f@_XCvd39ZjZr5v=D zUmfqjZYf|t6aVpJ<%U4;Lcv)b*k2ipiquieXyVpuYL=p=oXTH$&8K zSTfow3bAom96ov7mhT3qkS zA9o2)rgz2V+lpnZ#kU1vsKExCT;lC3oRyiiXxuc_MLMfa? z*uhf9C2mKaKE}rM$lbR4^5_;~;n%Nnm@vfB4As;s0Vy}Y?E;bt#0O33=|tyk0#H}iyo-A;(mAm+bB_eai<4>ucuqjG zl9Q7^eL4n^y}GIjUzz{kCqxqtZd^J;8Ma5n#Kd690N;x`8>u7_ys3N^FFn2S!s)ko z1(sSz1<*ghjRN4kdm93_vf>PIu(8RF7RdwY6!6*LuGkVhseh0C!Igq0Tox|EXJuAk z3lcma1eQLf)r$WLs1UFvA*tWNj0PBq&&5ucL7A%9JB10NC+#Ba=C0RR>M@-g6qP&_e;h%~IV@yKSr{Mqgd z^GE_U3vD;%ds$PEiYVoPlg-B7p7Y*43fD9|m{Vcan}xL2V%#otEG%T0AFStUY#J`` ztla+l(_Ch+tQQ!RP*_N6^=Kd%a>Xf2#To#Sq<(Ns-Z}N&V2cHbzABsuB)>fv+x|M| z`L4%INKlZ&uqnAu2?qKk(HsFo!4I1%m=fU%^6*4?PqnQ$0UPBaL_LY&61-+vF=^I3 z3T8DJg}`5+!Lv;CMh0eLLTqfUMo(CkswsSL!Aj5`(t}k_1MZ8oG~kM0HSwyJD4pDb z?HXmchO#mkr}|aMY8~IpF$qDopM(UW_8ahCqK*qK5P9h?l3GjlK@yvuU+w>#*h1CU zuW=tg!W4T&QvxRmbZUaar^ra81FZA&skc~A>t86{PqrpsogdqcxMBwJzz~80g$tZ| zdgBE2!I7j!y)cT)w7d^i72#@r`}XZh$aTo75~ezs0NUE5fNtpgQKf?{y#QZ#n5~PA zi80lABwH|sgxt%zXmI~q{~%U?0vGC)QCizm@ z@ECz;8imQ@_#fvQ78*dV*E?D?R=IddK0yoOZsdpl}BoqdtV`Cm( zUQZrB#@{V5<|gm14ueF%s6$Lp;DfO@5hf&~HsFyVgKP-a4)BkePIa4<0RvVEfqOts zFH!Qq$Z&#@(T(Z5zyIyl^O>~-EWuO_RuWlQvN}5n&PyaShamyiSx6tOByuWdaA9&r zuqb4Wj24i$7OYFKq#-A4Z>>y?f(RN>At+>FO#>Yg*s<`1$>AEo($)?OF3jcUNd)(~ zxEuhG)gb8cg)~tpBr7Jje*V;|IGp_0V+KzPoFZ5>?#xlxym$c#0-+}nKj!9q!SjT4 zD#AZxhr?q74=~d7zN}i#5Ip1qRrDe>3wZK@R(#IQOGf4!P-{DbpFCpa;pd8AYKB!G z{GfbSH@iCwAp`TdD=sx#1-o_c+5rR!Be(mhrv7kr$U4F@)q_P$%$Moz6YPw0d@)yw z4_B!TpsV^N)+d4;NQ4vOEoN9S#=^`T0--P`5ewj)U0c&MFeuh22zyzhl1JrnN5RHs z1C{`0;vT31fZgdaEG;Yh1NDWxoSYn(HNSu7otP9WFm zQ^3d^x%FJ+n=EH$Wd*MHF;xD{6P3u(9~hw@`m|&M3llR@ zXZ9Y6yo<{b-Q7Qxe#>nqP-TjWHcrf>gV+R?KOlYqz=M&qc~t!PCOSP4HaTABg0MR{ zi6NQ-gbdpJ+_#{l&5#Zw!3nX>Y;L}W^l#9i+y;AF3OB6r18_IEhu05*m`v~mWy)E6 zj|e}dqOWh0P7w?CXUnB9+04A$Tq10A%piKGW4o5L`Jm*-3E>5125bp%>XI~{)!4#> zYB%H}FI7UM7GV_RD5c2Tlf01u0p{EH?~@1b$>O_B$}euPg!EVN`TKCZR!l$R~C zpv;P&GLWv@m|ZAkz=(TvV!{w2V~dJ7U>*UI7XGj-Lbe59jbN#z4DW2+a`=?NDgBs} z>J^Bh;1Ny5?b54rw!S<&g2@Uspym5NFjZgH7}C~T@ehq-Zg0=d_uOV9mCH(33zS8p zrcNLBmqOvC_da#)j|fgOYT2cOU92R9UV{+Gi-U!lj2BAe#x+u19oq{_<5X+wzC4z_ttzEKHSd zgDHTbL2xc?5DSU4{JKL-k9X&5;zfS}KCThy=^jvd%UhoX5L3&wVxXB=XSn zNGhjIXa=L|;b5YX)*B!Hl3^yyQ17%-zUFq4o zmG@ep#o^*%I@Y#WpuNN3Ms?d^r zx^ilop;y)X%iWb#O!o0T9H1u@z-HvWw*bu=LMfV++@6d))4fw!>Fo11auRkvC+evk zfu*IwO;XbI45+dfL^4GhF$quR&tN$P%P`#WCtAT2d1D*@`nSkI?T6wDq7&-nXeSL* zeo8+`2!VJAT4C41weA!Uy{`1eIY34&O-h)Mkf-L~M9SPelo32zPR?2NlneHg!>!q6 z?!b^US^2Du*ab|5Xe>!@kn*A%IfaZ$4P5bkldEGzW4E? z#M2vUnrI$5JKH{8q-^Yko2xx-jkf@=0OjZ=V2-W4ALWT~Bw2$K6NSZGWEkkKPf^k6 zACh%OV9^kom?G;w=PV3_zidz`H@SFI!9Z81lx?A1nkGYV==YtNh;faDgZd_P zRJc5W2$owWCUsn*!^1>mm>N{0L#j|#>**yczQqj3w6R$jFWMQ*DK9U!AQl&wdm1o0 zT3m9qcG6%8cmoLzGcT_bw-0-vEN9%o=>=2|D0NNXsD|n=H<$GV=0iLP4Y|I$1Xr)^ zix-Svy+s`64oGmY(Ip1dI#;A@*}rv2D&W1RC0y#1lHg@a zs&t)CUQtLnZ;eOAeba(M^gsk0xgxYnf{sm3V-y6sKqv&YTh0i+GJdWanO&&?-}?lmBQk7ZfSw!i{xJE`O%)Y7 zF&Y!o*3?vw$=i)I`T6O&*l*u1oVmdD)d^)_pyR-2Q3`&K2aQ*HF{F+=#i9v|$cpuY z4%QaiFrbmjveVviXirT2uBo@TE%mS5vB?{#k`pec-$`?eFfh^JcPaQj@>#$I6>6OO zsB%cPER{3zuQ1t!H$j0Vrq#&Z-TkNf+8bC20hfUic;*2!Ly{~=sS5=MyNr6wAYo9i z2C+8SezciBA}NFZB`2n;k%zA33*Pti;QQORpVv~+5ZMNU z&dz3`!Q7XZJxoCt_L~yBikZWDfY?G(da<;6Ppu*>XEslY9z1@mPkOa)&P`6ol3m}H zF82yO1j$JwLZD69g_oR2nj9lf*xh(afUl z;*|RZ+2M%q2lfCu?6+EYYXhwj;%$TC(l74I$h5SDlseM)B|gwe%!A#V7P|&aMDt(2 zfB#;oV+Sxh&bNCc!TaPqE(5_ z%V)f*V0-*kG!} z0H7)!r#+%0Yflw*{7nRH6>ZrI^={nPXwlL>vBazMBqg8;ZV$`I!pK$(8Y?jFcw5G; zM?rsX(X*V{L<5aH%FAK72%`;+y-&~gs1k^VLIQZffFc7?3l=R%y%$!;W6g51bxNH1iG`>y_Hn61u;={$Y zDJypJm-}UQcM7%9UQD#D^co4gl3H6?5k23kFg7+G=c$8c{LC|mmes1&0 z2rL^#u&O4ms5REsz=+#hiQ5|xE_Jp(@!ZkqGARQ}XgE?FDq`?cJQIEO51a8YgZsJN z-;?IWpFE{XU!%QRr(kJ|%HDWJy^S*tp$~4nTu& zkfW26>xt&82i*rJPFp|AhK7cq#Z@dFc9qU0?9fCZTC9EXTSte_@uVd{bj@fTCV!ys zlMpC}0UbvB}cQ&#YNbxb5s;whLt2~&SoL%d?Kj3W>qoXoSVB!#U+erqA zvmcKR2lUiiTz{%pZ!vev)b4Jm%Jlj42xV0Y-D|EUx?Hti{Q?}iI8vsw*wGOsO$@9g zCi7wk7_Y+W>V!DsfSQ~QOLMKQ`dHv|NJ4f;KN!QBYU`(fRe zw;d)xtZZy}4w?d=Cq8%NRH(*g%|8VW`!Es+&$^3Z1azddkc}Lm7(li`a9%epKntFj z*4*SKrlyw{7nqNN3`#6{6B?1?O%S%pWT}Am)%rNTetS-F$jZWjnEVTr6E{Mn_FW*~ z1*nVD-bZd9;K-!I`utYsjqL;)g(d8o*HXuGx6vgF^`Ai_KRdfm92gm8FKT?mt}Y#n z%8b?qjA_L5ph5vYuVf-gRsedd`F6gifXN-U3y=~6dv3a~F{-*yfkAFbP`|}pW@h~c z_nQc?Rl)ivFOP$msdKpx&BeeQ1dfw`!)5N=!J_Ta?}bK2SOiq@Jf@wXILAZ)E&)7z zKDaNm*pay)JCMt`yXyxyN`0y$RCJ(~hK)M2-{Jng4%na8_1R(}wd)S3*(K!oDZ{rA z!@VExsS@A1^%dm9f9LJHQ+>!-6ayy=zZ%`V%v8xU^8s-=Jai)?BNyfu2Wij~5_*?i z@tqL9@R~>4t<8C0BaFB>pHClY?V{DKCJF8=T{L5tWnA2RW;<`cqmcU*8a{x?27pz5 z%2;`GNTC6-Ho^!57s~|AS7!y#ECp_zIqxLciC~f$`SHV9h+2{eNiV$Xn1T!~Ep|?5 zVzuziNY4r(z z{M6k1e`CXTP9Zo%yBa&Vv)GuKc4Y+}5fx{-W90encSSc-iV|{5)Z-Q|Yiee;46ZYW zscW!$U?IxAW?{EHTR~r!o{lsL0A`)dv0NEH;p?ZeH~fSaf8R;O5t#^*-OD3M1nuyk ztL~ZSei!htgoy6vA*rJ_RY5`RpkO`6rGwNxF|pJO5(){wo-T+NAV73QKz8xo&8XfR zY_Wfjb#cjtBY>rRE5i^-2b*|cdHM}ontagTJ+`V^6@NvjHLZxL{eLGMXm^G9Jm?Tc zMETxW)YaMs6@iUMHBctKyt7lRk_UVtU|v9k!$<+=%ot=!wOR4+pI+#GwP$8wNpyvl zbkqpITk3W_V7LK70Mm-`RGUQwJeSZY=4k3g3E)ZM(7y&AlAs&b3r-Lw<+-E4>2Bdk z*|zA2keIN{{|4n*iqgdV0cwg865fZg|MuaG`6_lLrliF z*v8VwerOb{VW6|d%14;@_8ETbs`g`&(7cxRqJaW99w+bO0P=&tGRdYjefR zsppTJk*35-tJ{@ry!^a$GON-2)o>6CTeg!bM3kAKvEDL|QIhcExTKvcYnN)lR3iw8 zE$|SqMTdqzI0*S&z2*1+`grD#w4&trT{cEjb>UE40ZZj-`fCDDs6*q?Cx{S3Zfqsi z!AOK4A|m4c;)K;cJ4+3l%E$9x!D9QbyCyr34m*(d@ZN(ln+F#(%~^Y=>BB8C&jX>m zDh2`(q=8#oTS>vElV2dJ#V$ycvh_bLz*}arS$w?U=VAdrR9=Taxb8U*tMc)m5^F&| zLsakWz+=4Zjq-Lpv8 z!J^(xTJ2*jA0c80Br+0NDp@Ld;EsCy9pe1~2QyqOt3J`3#+v(g*)|%afquRd12R_e zp_7`+8|r~l^i5~qy&Io<7UbnYuQ*sX07-(G5Y7UM|K)Ikh%T^cqDz9Fg&YC-b?k8A z8HT|ru?Cg~CAYuezY3DdK&EK1liemG>-_dj1R{?_2TN4)02&2804~u#kh^*7)&W>U zkKZoB1mp4wNS|p3AbbRUFWgm|FHydWx@?dj6cTtJgQIh718Ll8fwo?D{FGHX+kk1~ zAb|hf5%ya8&yM41k8BJHVlRlXN}ziU!FoVYtSv9cyFR20m&%B2HG(|f+}#=F66oI= zD^Hysu_*WF6g@@h;-vYLkl!_+%g|$!iv)*DuZBCG^KCoaF_?Bfe$@JXs5OmcIF0o) z#_Z3O5|NAsnPle8PpOLNu)?DACEL%))zse1%38(E)}6nR1Lay~CSydG7nCv3(n!&Z z1`M&l!u6P8VqpOxHO603J!~~oVM!R#Wy(zsEUJ&?3ynOrH1B<+0tZ9|kiLwJLOmh& z067cn^&oi+3_S5?dxXk6Afiw}xy!;b5YN+oxFo#W5R;l}fA$9qVLR&M0cco=^t!sl zi$4%Dwe1zM0&Q_5AvF`1fFM=euW8-UDLvpRupEOiph-QPne5ZieUr;-va0a;dUBp4 zr@HzWT#!g51z9g1Xk-sPO@7HR&<8}y_xsuxZU>cYqwPK|x3#;A{W;y=(Um2X*MVIs zH|{1A0D90|;wGiMmWkuiWtR~*Fk2`r%Day_v4g2rhm&^lV{KWRQ}= zem7@zTmrP2IP`owJpkYICyu=8PkTqq7X1unyuD=3wB$v|nlhTWZ9dI?0p%HCqpX$f zo!0zncCw2-G#6nD{E1NGHbG?w zo%6_FzW0HRsw^gU$X+iOcTGl;#gk(`_*F9jSzH`~P($BxHdNIy5m{`Y%hcn zW|q)s6Q^d(^>B*5V={4%*>AgU{g&p7&K#Uyt2+KBf}0w2c& zFR*5Up!f0X7>AHn@ zHpu|0I1LUfz)LTv@S>o%y_Xgtu_*Z?Xp>LPgkCV;CM zJ-l;@hpyM!CeZH!?eph_E$yGpw8eU@eQ5N2yoSADH#}ZlT%D(nwQY>;#Wnjswmg^o z#6s47H?3u7HeI=8cTK_>OdC}25=DXx94ac?n9SsmkG4S)dHh@%D>N0mo1 zlZQKAiFltfQwgHl+m{-nLH8uh=OcKS?V##gcdo-vtvzHWZp}$^&9b_$*_QZ#+Z^Yo z2phk<&4Xl-8(zl}t`EO`Yk9`iny=nsF)hR%+{yBenU*Q@rc!Efgm~!nB_Z@V0Gy`- zr-604{xicPccP{d#NbjeonYY;HLN;+t&CkKX!SY+ygl|-CU=g%frb!yp z!w~--TWS)5ABLLY#YYMFBK-abYP4Fa)xH4nYYJH6O8&vt{umNN7!!*5cjG?%v;ACf zDYyIbO|4P{^m98a)u@)XctS)+)RLzPEmvl0Q0VDPO)v_zP()IT7&kQm7zPE8jZJ<> z9MPT7HYwx*UV*a=P|i=BA*ToQ_j<;SGv=+UB4f~UlmM=g@)2da+x|UvZ#hOoEAI|H z`uH<31svSPA^N|ISgiNaG&P~O5E*B%_yvlHXb~kQ!#xtmRXo7ma-Iii;ROT$h6iYk zLYn=xn=k0;z<3Gai)Ai!mvDZ5*Fg?V>t_d;7_jRsHA3l24c|Vk9v(#(zrxOW-d@ri zsD|_MWvP2v%H2uq@TVHj#T4=j7W*h3c@PgIql9C&wbS$6%2o=&qVaE$m=1PMgVcV3 z+nff$&*Bz-brj#L04Q!zDqd*p>J>rszkEAH-@;zB`h&US+jonj_u!43UI9M+?OQQU zT2ug^hPH<4ExksZI8I5|hs$edK`0~Hk`^rN+JVo%5tWlpGt;v0QY4Gw0#F%(tE`1B z%lNIc?L7szHSw1$NoW2ctA`&P@pAJ~#bCS#BvW+{Vl~oy&ZK8V0^M)e!ALUJGx zBYwjqDw^_OLzdxn2l85pBcX6}=Are-PnZ-Gk*NfEc|^n}n}21+8)@X91q_Du#?jx! z`u-ibfMqj)X%hGr1F%|#hsWs|oSQ3xmNRn(l)8HYaBsupg^oI8)7o<*IvQ@MiYh$^ zvJug3ijRqu3kQoxNE_a*`XTUsU7PZcpw~(j2?RjZIm0Gj39MV=0q@_-p|1_NjGdPA za#OaBRMc1Je35W0Et00fKlB7pzjzM=1Nw8k!?<@(zshHd-_f<&?+J(x z!3qoHyl$6?n3Et0iSvXj@9jk=dS2qT_WBoI3VoNUew)t9hV%N<&*am;0Ba2&(cN7P zi~`OrmY<765G~R3Atu70QU~aQO&wJo9A;*K{4miTJ(AGUI?;4@pTK=q^3vv(lc1WW zv^uGprkjttdh7Odx&|5o9ZULO`l76i+gF5%BS&-chyWh{iuk${2 zIg_i3^85GnLOeFMIxYn%dh&44hjIehadic3;pGaFKwMZdZMG|wXRfUDSH>p3!Ot(D zdJD;3qDs-fmKmCvsY)twIO%#WgTCiZbmv8L^T*ni<>gW>_GcVP_FsjBGb%~mzN^m{ zI&I9$+tjC}-8Y4WJ|O7l36h377r7g^n0nm*B(-L9NuW za=}4GmGRz`5(@e)eyfMQ*ZYgcXe|!Dr@Pvk(}(`iqO-v;s~#PWZnOVGee07pY1%owKx=H;#N(#~ zj&znjGC@^BLbLiNaiAh4b~ukI=P5b7H}Q?v9S}U>b93uRswngWfpURl7G>YP4g?#5 zvc)Kj6}aTz*aHKzw3t4(=-k|}@z7~$lf$oy(NP2RI0w7COI_$(pJzhVKQ6X8ZBIy;~-3H#F{^G_hmPKJVONJZQ42EOuR&jFy+o zL`f(-|1mR+lC2j`m!1h7j<=ghNj-#w1t3;q_T(MYesL5qA!|F&xG5i{4}#Z3l=OibcT`Bd!lm6{30|7D{RPS z`e|Fe6i+n?io_FqQ10f^li=WEN-HZz7NH?UXAGba{s%G}KgnjEF4kqjE8mUx17xw@ z$b24IFP%iA5AH<3ul;resTpNBS>R?deelrlFpxfDK$N^vPCJwh!JU}!Bdp5m>W_0q z{v_an^aMZNo6ZYXvy6$&m-e7=rQ`zvqpM=U7(jtfo;*Qz>Ff#wNrwpXU!w}aCMLzi5icH}^%^t!#3TI7hLYVh&XHT#K{P(gF@?}8UfChU_ zs0W(FE#$8=g5m+>Oy1rzj(O_v>93F<^CV9V!sdC&-$UT^f1iGVd^zV|yapm6ri6$< z3PlEj>~b!s?m_)2Bt-e7hAwzW3n}npgn${@d^LE z+kNDjhYnly_4dMNK_&61l#D)j=l;)H!b!wwCET0Gva)B8wJt;j=c$1_r-wxvUYBPs zDN46VNkbCg3}(aN?KQI0*f3B7T8)s{9Yk*ywIgo;8l1$YHFUq?zz&B*v7 z9VjwkTt&u$97a=Jeb}lJa+Tl?|97w!@NVPE8XAztt;J3@VEmRy>a`S6aFHpwBBVq_ zCnX8*i2+(`QWoE1rgN7`IA>xLMEuZI0FsVCm;c`fdp#QL8WK5OjyaiG166EiUl3DF z4`6tp7pGAj>AYY_SK?beCM>M)2IPxs9s z`eJ1zHZ}DF^1@{pl&+X|L-|xvQX-#Kw^w^c1IJ&5^sw(!= zr;Q^|X!;%;jGiok2M@65BK3x2sa7FSD2G$fP{!NPoMw5;SioTpkY(6vgXTLPd90Lp zpnzIOCIzj1;wj9|UWF(vmAp}CJp$@lcoM2Vz-JHT2K-+Um^>5l{u7)K6@7hb(8o{b z7N$cn{rNLc>JYaLb^$#Gkf#nQ|9^yBgCe6gN3fp&fgQ|FGBPrro4f4z9nttw}2CZ_~CA63GE&NclQ2kc8m|b~JH%I>Q29I=f9q_2pdo}aE-KponPbMQ z4dtqv+gE^Aq2E!NR;lSd1j1i{|7%m~|5MYs$76Z_e_X5m@}**-FmebP%gV3GsJr-@ z)Et(>2$e@vQcH>wI*5{-k}+n@w+u!6uvgQ!2IDELsc((K|Rkw^ffA`fqN%ti;Imuj4KmSdeyAeT^xpa{k#NWDdgf()jPt5J$;XYw(Y*Y$IBY%3K^{{ zyd{w+%c=#Mou)8;tH=;z&;vV0JjE$PH_-+_-gua3Z@yqo(xU-%90?{ZtM{-DHslK2 zNCPG&Ch$5uSoaCn@4(iD&H%<^ZYO<{_Fj_ zfjS6|@4(gl{D3L)?2v;QGrH?ztTLmT&3i~$BWAftNljor>wJujTDS^dpN8J0U!1um z-0)yw$-5W&MB@|}RoS!46k3j0>C}evMMX(9y>aH}5Xt>oUs2}6G~3ICg-0Y)xccXfq9HEK&UJ@1M(s;=uL6eJ?2{8az6rEu{Uj)rh z&h_27ldXR~SN09fv#jmtFxSz!)z-Fe@Yjo%A?#qWt)!gZz}D}<5&LVOWf3CL z&02bb$GJ#-i|=?aLIPL-{fG^LudlMQatTcZ%!kgE=03=wmrk5DGy9>Y7wjmw)_|#( zvPxL&Py4_Ep9UuS*Z!=huWjC(D;M!ZeA9qL3&9A4Yzn&Qo89D{wo}QS~ddt>d!x$+S)4i4Rs=G zhH;MO{%){<-mpa>in07PHI=};r>lK_rwlJo?eqCJYmi)mQvx;8*`T4m9@bDbd)zLM znmuZ(tM$yxO0Qje$WZGLK37Xi-COJ!x2b!aHAHZj7-vIW6&WW_Vzf=aYbdugvsf+S zMhUMU62(eQys4mucGyllnaSlk?2>!_ZIJw;v#g`5s|yb}Y?M$-O>ND$xgmIW0AW9I z{J5mpAAhurm6~4%t>W$mWRe$}!j`83ARLhVXUfPpu9lP18tuLj0or`FbR#K})P7${(vF}5 z`fz_{6Y$9^8WSJ}dK_iEeSPu5LZS6(e?P`W;oh4$w);y1X$>|!FNnnzoUeTD zUyw)~PMh`PV&moYpPZaxdS|#$6&4mkhMFRFdV^vo18j2B?{GN;@iTY|UMuzn2Er1l zuB?PW6ZSItGxaMp&q#PfXkBg$H2w&2rj(TIoSfH`M)9$+-EJyeTiM}^C)=&WD0)Ga z#e<+T=s!S;M3wdW<3Dw0uMP+pAv2HuP~+ zfg$Iiv^{&Aj4@oOy83DK&w4%P2=|rMoUk6g>0A6?1ZSTSRMmiqf?Cq31tgkVf3-;9ssDyEeV z5ATAPJ6MIFoK%x#+Ul)vpGXbkt;A{_d1>Ti4U+AbRJXJ!b!5KeD8wYjwa&a-p5{wm0&CR{PEPp%Zv1H(Oja5_4y0VbZ zBW?t5b0y^(xQg^tt{j>?NYtJyetqrCrAwC>LcJae|2#574dM$$&3G#`(UdwLoL#K6 z1I`MjTiC!!Yf-MxifU?Z{)GWad;;%(7h^N_{K!{iPLCf9zORSlXOq*!O)g))oMg7) zQ~jhmy&pualmy`KF-y8A9zR4RJ{A>4#vfVuoY231NXT8;Q!UaKJb&>b`=0N3D=AZ2 z3x--<6ijp}tzR&b8mz%V2D?}vh^G@>lN9}!;AepmqlA0@ozwqOFNPUFcDtoYEeR{ zOo!dItgI@DB=})1Xddp8hpQn9OI@ky36s0ZFUeLz)Pp^eUo_;_pSy8hjfCvEjA51+mWfgHmLq!Fe zH63wDOx8VLhJU6LdG7UH^C92u+xM9Mh=}cAH>@Ci#S_R|W4^>rLwx+*8xhl2SzEIp zz(UV=i942LI!!gvadBTt3uLlEi^065nP3a{hVgvd)h5Y~G06@8i(;`q=AWR4 zt{3R>MHj^v4*3}-uOY{CPKcZv|58dWKJ00(p<&d_S+iihydLhUG}TKGSG+UBflf5>$Nqx9vwUvc)OblGzzDkT_88`6r z1QkwCGiQB8K|hqNcIfRZS2;O3Pk<*7T#t!1N(Fq9^flu#4)83W)FHU4@xw&Vmmb1HiX+WxBS7Mrz$wZ8r4VJ04feS^sk8tBo$;|pEkRp z!|*8o`}I9PpiWGUxv}FYjYg(0AVjQnkv;#_*K_j4phzb|J*mxpRw= zJH^tLjg`(Td%z@E{Zse++gVMti21~7fQo8k*vN-5RVSjORa8|SgPi=e5aUcq8R+X% z{KqKO!y~l&`V@&Zg~{Pl4abMW9uh=BZ+Ap~O@S&QLQV2J7Z+CStQraMl1aZ4(4aSM`knm=bhKmR z;wS~F7y$^Ldsfb)I;^55vSxZ?(t4|=hLWBtkgW(b0n_Sy&X}c59+2!ZvvMyoLvykD#4`k$) znFbNIdhBMQnk{@98kC}~aQ{UMx2&R~0^eXY2X9WdrR2d}Q8gsFS}H0Q1rskN}c%26q%3ae8VadkeMCJWs@WvhaNUG|yj5>HXPST;{wSa_{41sKW5 z+}wQn^yyeR_w`Y@IX@Dj%5ZzLMRC2p zLq)VfXHE<{K~!}=i4L?6C%~^sO#CSFB!|aYLzlI-wgwFtYKx*PO5C=sV9Q!sbd$Kt zb}~7u+q_)r^vukNaKbVtFc{6W~8O3qfjy@W;4lRbfXeqY*E?3n(^RVW*i<$tbQ(EHVI$3$V35OdY)T> z+oy>(oo}qz;{e^M{~cHnWmVNppVYkHV?kd{K|uixgTUw5OYr C2 -> C1` then the link `C3 -> C1` would not be +stored. Synapse uses the former variant so that it doesn't need to recurse to +test reachability between chains. + +### Example + +An example auth graph would look like the following, where chains have been +formed based on type/state_key and are denoted by colour and are labelled with +`(chain ID, sequence number)`. Links are denoted by the arrows (links in grey +are those that would be remove in the second variant described above). + +![Example](auth_chain_diff.dot.png) + +Note that we don't add links between every event and its auth events, as that is +redundant (under both variants), e.g. all events point to the create event, but +each chain only needs the one link from it's base to the create event. + +## Using the Index + +This index can be used to calculate the auth chain difference of the state sets +by looking at the chain ID and sequence numbers reachable from each state set: + +1. For every state set lookup the chain ID/sequence numbers of each state event +2. Use the index to find all chains and the maximum sequence number reachable + from each state set. +3. The auth chain difference is then all events in each chain that have sequence + numbers between the maximum sequence number reachable from *any* state set and + the minimum reachable by *all* state sets (if any). + +### Worked Examplee + +For example, if we take the above graph and try and get the difference between +state sets consisting of: + +1. `S1`: Alice's invite `(4,1)` and Bob's second join `(2,2)`; and +2. `S2`: Alice's second join `(4,3)` and Bob's first join `(2,1)`. + +Using the index we see that the following chains are reachable from each: +1. `S1`: `(1,1)`, `(2,2)`, `(3,1)` & `(4,1)` +2. `S2`: `(1,1)`, `(2,1)`, `(3,2)` & `(4,3)` + +And so, for each the ranges that are in the auth chain difference: +1. Chain 1: None, (since everything can reach the create event). +2. Chain 2: The range `(1, 2]` (i.e. just `2`), as `1` is reachable by all state + sets and the maximum reachable is `2` (corresponding to Bob's second join). +3. Chain 3: Similarly the range `(1, 2]` (corresponding to the second power + level). +4. Chain 4: The range `(1, 3]` (corresponding to both of Alice's joins). + +So the final result is: Bob's second join, the second power level and both of +Alice's joins. diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 67822f1544a2..26b4bc063fba 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -160,7 +160,9 @@ async def get_auth_chain_difference( def _get_auth_chain_difference_using_chains_txn( self, txn, state_sets: List[Set[str]] ) -> Set[str]: - """Uses chains dlfks;fk + """Calculates the auth chain difference using the chain index. + + See docs/auth_chain_difference_algorithm.md for details """ initial_events = set(state_sets[0]).union(*state_sets[1:]) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 7f636fe0dfbb..b25e278b4264 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -425,6 +425,8 @@ def _persist_event_auth_chain_txn( # persisting. We ignore out of band memberships as we're not in the room # and won't have their auth chain (we'll fix it up later if we join the # room). + # + # See: docs/auth_chain_difference_algorithm.md event_ids = {event.event_id for event in events} state_events = [ event diff --git a/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql b/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql index 582fdb31ae11..a12cf97f72c6 100644 --- a/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql +++ b/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql @@ -34,3 +34,11 @@ CREATE TABLE event_auth_chain_links ( CREATE INDEX ON event_auth_chain_links (origin_chain_id, target_chain_id); + + +-- List of rooms that we haven't calculated the above index for +CREATE TABLE rooms_without_auth_chains_calculated ( + room_id TEXT PRIMARY KEY, +); + +INSERT INTO rooms_without_auth_chains_calculated (room_id) SELECT room_id FROM rooms; From 02d11989a245b38ab281c5c7fc4eafcac54f591c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Dec 2020 10:33:56 +0000 Subject: [PATCH 04/44] Handle old rooms --- .../databases/main/event_federation.py | 22 ++++++++++++++----- synapse/storage/databases/main/events.py | 15 +++++++++++++ synapse/storage/databases/main/room.py | 10 +++++++-- .../schema/delta/58/24_event_auth_chains.sql | 8 ++----- tests/storage/test_event_federation.py | 17 ++++++++++++++ 5 files changed, 58 insertions(+), 14 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 26b4bc063fba..bc80687f7c43 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -15,7 +15,7 @@ import itertools import logging from queue import Empty, PriorityQueue -from typing import Dict, Iterable, List, Optional, Set, Tuple +from typing import Dict, Iterable, List, Set, Tuple from synapse.api.errors import StoreError from synapse.events import EventBase @@ -151,11 +151,21 @@ async def get_auth_chain_difference( The set of the difference in auth chains. """ - return await self.db_pool.runInteraction( - "get_auth_chain_difference", - self._get_auth_chain_difference_using_chains_txn, - state_sets, - ) + # Check if we have indexed the room so we can use the chain cover + # algorithm. + room = await self.get_room(room_id) + if room["has_auth_chain_index"]: + return await self.db_pool.runInteraction( + "get_auth_chain_difference_chains", + self._get_auth_chain_difference_using_chains_txn, + state_sets, + ) + else: + return await self.db_pool.runInteraction( + "get_auth_chain_difference", + self._get_auth_chain_difference_txn, + state_sets, + ) def _get_auth_chain_difference_using_chains_txn( self, txn, state_sets: List[Set[str]] diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index b25e278b4264..fd2bdedb05bc 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -427,11 +427,26 @@ def _persist_event_auth_chain_txn( # room). # # See: docs/auth_chain_difference_algorithm.md + + # We ignore rooms that we aren't filling the chain cover index for. + rows = self.db_pool.simple_select_many_txn( + txn, + table="rooms", + column="room_id", + iterable={event.room_id for event in events}, + keyvalues={}, + retcols=("room_id", "has_auth_chain_index"), + ) + room_to_index = { + row["room_id"]: bool(row["has_auth_chain_index"]) for row in rows + } + event_ids = {event.event_id for event in events} state_events = [ event for event in events if event.is_state() + and room_to_index[event.room_id] and not event.internal_metadata.is_out_of_band_membership() ] if state_events: diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 6b89db15c967..c7812dc7e8ec 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -84,7 +84,7 @@ async def get_room(self, room_id: str) -> dict: return await self.db_pool.simple_select_one( table="rooms", keyvalues={"room_id": room_id}, - retcols=("room_id", "is_public", "creator"), + retcols=("room_id", "is_public", "creator", "has_auth_chain_index"), desc="get_room", allow_none=True, ) @@ -1184,7 +1184,11 @@ async def upsert_room_on_join(self, room_id: str, room_version: RoomVersion): table="rooms", keyvalues={"room_id": room_id}, values={"room_version": room_version.identifier}, - insertion_values={"is_public": False, "creator": ""}, + insertion_values={ + "is_public": False, + "creator": "", + "has_auth_chain_index": True, + }, # rooms has a unique constraint on room_id, so no need to lock when doing an # emulated upsert. lock=False, @@ -1219,6 +1223,7 @@ def store_room_txn(txn, next_id): "creator": room_creator_user_id, "is_public": is_public, "room_version": room_version.identifier, + "has_auth_chain_index": True, }, ) if is_public: @@ -1256,6 +1261,7 @@ async def maybe_store_room_on_outlier_membership( "room_version": room_version.identifier, "is_public": False, "creator": "", + "has_auth_chain_index": True, }, # rooms has a unique constraint on room_id, so no need to lock when doing an # emulated upsert. diff --git a/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql b/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql index a12cf97f72c6..b83c64b9c6fc 100644 --- a/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql +++ b/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql @@ -36,9 +36,5 @@ CREATE TABLE event_auth_chain_links ( CREATE INDEX ON event_auth_chain_links (origin_chain_id, target_chain_id); --- List of rooms that we haven't calculated the above index for -CREATE TABLE rooms_without_auth_chains_calculated ( - room_id TEXT PRIMARY KEY, -); - -INSERT INTO rooms_without_auth_chains_calculated (room_id) SELECT room_id FROM rooms; +-- Whether we've calculated the above index for a room. +ALTER TABLE rooms ADD COLUMN has_auth_chain_index BOOLEAN; diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index 482506d731c6..b5dc40df78b2 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -159,6 +159,23 @@ def test_auth_difference(self): "j": 1, } + # Mark the room as not having a cover index + + def store_room(txn): + self.store.db_pool.simple_insert_txn( + txn, + "rooms", + { + "room_id": room_id, + "creator": "room_creator_user_id", + "is_public": True, + "room_version": "6", + "has_auth_chain_index": False, + }, + ) + + self.get_success(self.store.db_pool.runInteraction("store_room", store_room)) + # We rudely fiddle with the appropriate tables directly, as that's much # easier than constructing events properly. From 61ab47edaf6912101924fa135d8223a1e2cc8b15 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Dec 2020 10:42:18 +0000 Subject: [PATCH 05/44] Fix schema for sqlite --- .../schema/delta/58/24_event_auth_chains.sql | 6 +++--- .../delta/58/24_event_auth_chains.sql.postgres | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql.postgres diff --git a/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql b/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql index b83c64b9c6fc..ce15020a8124 100644 --- a/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql +++ b/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql @@ -13,7 +13,7 @@ * limitations under the License. */ -CREATE SEQUENCE IF NOT EXISTS event_auth_chain_id; +-- See docs/auth_chain_difference_algorithm.md CREATE TABLE event_auth_chains ( event_id TEXT PRIMARY KEY, @@ -21,7 +21,7 @@ CREATE TABLE event_auth_chains ( sequence_number BIGINT NOT NULL ); -CREATE UNIQUE INDEX ON event_auth_chains (chain_id, sequence_number); +CREATE UNIQUE INDEX event_auth_chains_c_seq_index ON event_auth_chains (chain_id, sequence_number); CREATE TABLE event_auth_chain_links ( @@ -33,7 +33,7 @@ CREATE TABLE event_auth_chain_links ( ); -CREATE INDEX ON event_auth_chain_links (origin_chain_id, target_chain_id); +CREATE INDEX event_auth_chain_links_idx ON event_auth_chain_links (origin_chain_id, target_chain_id); -- Whether we've calculated the above index for a room. diff --git a/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql.postgres b/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql.postgres new file mode 100644 index 000000000000..e8a035bbeb1b --- /dev/null +++ b/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql.postgres @@ -0,0 +1,16 @@ +/* Copyright 2020 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE SEQUENCE IF NOT EXISTS event_auth_chain_id; From 6141825ae6abdcd0b8e2b0eb5a1e38b066dd184e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Dec 2020 11:52:26 +0000 Subject: [PATCH 06/44] Fix up _get_auth_chain_difference_using_chains_txn --- synapse/storage/database.py | 3 + .../databases/main/event_federation.py | 95 ++++++++++++------- 2 files changed, 66 insertions(+), 32 deletions(-) diff --git a/synapse/storage/database.py b/synapse/storage/database.py index e4a2b2fabb74..145dee524ffd 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -268,6 +268,9 @@ def execute_batch(self, sql: str, args: Iterable[Iterable[Any]]) -> None: self.execute(sql, val) def execute_values(self, sql: str, *args: Any, **kwargs) -> None: + """Corresponds to psycopg2.extras.execute_values. Only available when + using postgres. + """ assert isinstance(self.database_engine, PostgresEngine) from psycopg2.extras import execute_values # type: ignore diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index bc80687f7c43..c86be13cb080 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -24,6 +24,7 @@ from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.databases.main.signatures import SignatureWorkerStore +from synapse.storage.engines import PostgresEngine from synapse.types import Collection from synapse.util.caches.descriptors import cached from synapse.util.caches.lrucache import LruCache @@ -175,13 +176,20 @@ def _get_auth_chain_difference_using_chains_txn( See docs/auth_chain_difference_algorithm.md for details """ + # First we look up the chain ID/sequence numbers for all the events, and + # work out the chain/sequence numbers reachable from each state set. + initial_events = set(state_sets[0]).union(*state_sets[1:]) - chain_info = {} - chain_to_event = {} - seen_chains = set() + # Map from event_id -> (chain ID, seq no) + chain_info = {} # type: Dict[str, Tuple[int, int]] + + # Map from chain ID -> seq no -> event Id + chain_to_event = {} # type: Dict[int, Dict[int, str]] - # FIXME: Need to handle chains that point to chains not in state sets + # All the chains that we've found that are reachable from the state + # sets. + seen_chains = set() # type: Set[str] sql = """ SELECT event_id, chain_id, sequence_number @@ -199,9 +207,13 @@ def _get_auth_chain_difference_using_chains_txn( seen_chains.add(chain_id) chain_to_event.setdefault(chain_id, {})[sequence_number] = event_id - set_to_chain = {} - for set_id, state_set in enumerate(state_sets): - chains = set_to_chain.setdefault(set_id, {}) + # Corresponds to `state_sets`, except as a map from chain ID to max + # sequence number reachable from the state set. + set_to_chain = [] # type: List[Dict[int, int]] + for state_set in state_sets: + chains = {} + set_to_chain.append(chains) + for event_id in state_set: chain_id, seq_no = chain_info[event_id] @@ -209,6 +221,8 @@ def _get_auth_chain_difference_using_chains_txn( if curr < seq_no: chains[chain_id] = seq_no + # Now we lok up all links for the chains we have, adding chains to + # set_to_chain that are reachable from each set. sql = """ SELECT origin_chain_id, origin_sequence_number, @@ -217,7 +231,6 @@ def _get_auth_chain_difference_using_chains_txn( WHERE %s """ - # chain_links = {} for batch in batch_iter(seen_chains, 1000): clause, args = make_in_list_sql_clause( txn.database_engine, "origin_chain_id", batch @@ -230,13 +243,10 @@ def _get_auth_chain_difference_using_chains_txn( target_chain_id, target_sequence_number, ) in txn: - # chain_links.setdefault( - # (origin_chain_id, origin_sequence_number), [] - # ).append((target_chain_id, target_sequence_number)) - - # TODO: Handle this case, its valid for it to happen. - # DOES THIS WORK? - for chains in set_to_chain.values(): + for chains in set_to_chain: + # chains are only reachable if the origin sequence number of + # the link is less than the max sequence number in the + # origin chain. if origin_sequence_number <= chains.get(chain_id, 0): curr = chains.setdefault( target_chain_id, target_sequence_number @@ -244,21 +254,26 @@ def _get_auth_chain_difference_using_chains_txn( if curr < target_sequence_number: chains[target_chain_id] = target_sequence_number + # Now for each chain we figure out the maximum sequence number reachable + # from *any* state set and the minimum sequence number reachable from + # *all* state sets. Events in that range are in the auth chain + # difference. result = set() chain_to_gap = {} for chain_id in seen_chains: - min_seq_no = min( - chains.get(chain_id, 0) for chains in set_to_chain.values() - ) + min_seq_no = min(chains.get(chain_id, 0) for chains in set_to_chain) max_seq_no = 0 - for chains in set_to_chain.values(): + for chains in set_to_chain: s = chains.get(chain_id) if s: max_seq_no = max(max_seq_no, s) if min_seq_no < max_seq_no: + # We have a non empty gap, try and fill it from the events that + # we have, otherwise add them to the list of gaps to pull out + # from the DB. for seq_no in range(min_seq_no + 1, max_seq_no + 1): event_id = chain_to_event[chain_id].get(seq_no) if event_id: @@ -267,21 +282,37 @@ def _get_auth_chain_difference_using_chains_txn( chain_to_gap[chain_id] = (min_seq_no, max_seq_no) break - sql = """ - SELECT event_id - FROM event_auth_chains AS c, (VALUES ?) AS l(chain_id, min_seq, max_seq) - WHERE - c.chain_id = l.chain_id - AND min_seq < sequence_number AND sequence_number <= max_seq - """ + if not chain_to_gap: + # If there are no gaps to fetch, we're done! + return result - args = [ - (chain_id, min_no, max_no) - for chain_id, (min_no, max_no) in chain_to_gap.items() - ] + if isinstance(self.database_engine, PostgresEngine): + # We can use `execute_values` to efficiently fetch the gaps when + # using postgres. + sql = """ + SELECT event_id + FROM event_auth_chains AS c, (VALUES ?) AS l(chain_id, min_seq, max_seq) + WHERE + c.chain_id = l.chain_id + AND min_seq < sequence_number AND sequence_number <= max_seq + """ + + args = [ + (chain_id, min_no, max_no) + for chain_id, (min_no, max_no) in chain_to_gap.items() + ] - rows = txn.execute_values(sql, args, fetch=True) - result.update(r for r, in rows) + rows = txn.execute_values(sql, args, fetch=True) + result.update(r for r, in rows) + else: + # For SQLite we just fall back to doing a noddy for loop. + sql = """ + SELECT event_id FROM event_auth_chains + WHERE chain_id = ? AND ? < sequence_number AND sequence_number <= ? + """ + for chain_id, (min_no, max_no) in chain_to_gap.items(): + txn.execute(sql, (chain_id, min_no, max_no)) + result.update(r for r, in txn) return result From c7e2ce5ebf092b3195d4ea2045d12b33da9a668c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Dec 2020 11:57:26 +0000 Subject: [PATCH 07/44] Newsfile --- changelog.d/8868.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/8868.misc diff --git a/changelog.d/8868.misc b/changelog.d/8868.misc new file mode 100644 index 000000000000..1a11e3094457 --- /dev/null +++ b/changelog.d/8868.misc @@ -0,0 +1 @@ +Improve efficiency of large state resolutions for new rooms. From 66e779d51fa0d01f0764972dfd311e954f4c6316 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Dec 2020 13:07:53 +0000 Subject: [PATCH 08/44] Add type --- synapse/storage/databases/main/event_federation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index c86be13cb080..58c604386412 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -211,7 +211,7 @@ def _get_auth_chain_difference_using_chains_txn( # sequence number reachable from the state set. set_to_chain = [] # type: List[Dict[int, int]] for state_set in state_sets: - chains = {} + chains = {} # type: Dict[int, int] set_to_chain.append(chains) for event_id in state_set: From cf2243fc24eab10e14c4960e1deec53c48f375ca Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Dec 2020 18:13:29 +0000 Subject: [PATCH 09/44] Fixup --- .../databases/main/event_federation.py | 6 +- synapse/storage/databases/main/events.py | 308 +++++++++++------- synapse/storage/persist_events.py | 5 + tests/storage/test_event_federation.py | 81 +++-- 4 files changed, 254 insertions(+), 146 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 58c604386412..68c52a3bebcf 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -247,13 +247,15 @@ def _get_auth_chain_difference_using_chains_txn( # chains are only reachable if the origin sequence number of # the link is less than the max sequence number in the # origin chain. - if origin_sequence_number <= chains.get(chain_id, 0): + if origin_sequence_number <= chains.get(origin_chain_id, 0): curr = chains.setdefault( target_chain_id, target_sequence_number ) if curr < target_sequence_number: chains[target_chain_id] = target_sequence_number + seen_chains.add(target_chain_id) + # Now for each chain we figure out the maximum sequence number reachable # from *any* state set and the minimum sequence number reachable from # *all* state sets. Events in that range are in the auth chain @@ -275,7 +277,7 @@ def _get_auth_chain_difference_using_chains_txn( # we have, otherwise add them to the list of gaps to pull out # from the DB. for seq_no in range(min_seq_no + 1, max_seq_no + 1): - event_id = chain_to_event[chain_id].get(seq_no) + event_id = chain_to_event.get(chain_id, {}).get(seq_no) if event_id: result.add(event_id) else: diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index fd2bdedb05bc..91da3580cc54 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -17,7 +17,17 @@ import itertools import logging from collections import OrderedDict, namedtuple -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Tuple +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Generator, + Iterable, + List, + Optional, + Set, + Tuple, +) import attr from prometheus_client import Counter @@ -400,6 +410,7 @@ def _persist_events_txn( def _persist_event_auth_chain_txn( self, txn: LoggingTransaction, events: List[EventBase], ): + # We want to store event_auth mappings for rejected events, as they're # used in state res v2. # This is only necessary if the rejected event appears in an accepted @@ -524,6 +535,7 @@ def _persist_event_auth_chain_txn( # We need to do this in a topologically sorted order as we want to # generate chain IDs/sequence numbers of an event's auth events # before the event itself. + chains_ids_allocated = set() for event_id in sorted_topologically( events_to_calc_chain_id_for, event_to_auth_chain ): @@ -536,22 +548,35 @@ def _persist_event_auth_chain_txn( if existing_chain_id: # We found a chain ID/sequence number candidate, check its # not already taken. - row = self.db_pool.simple_select_one_onecol_txn( - txn, - table="event_auth_chains", - keyvalues={ - "chain_id": existing_chain_id[0], - "sequence_number": existing_chain_id[1] + 1, - }, - retcol="event_id", - allow_none=True, - ) - if not row: - new_chain_id = (existing_chain_id[0], existing_chain_id[1] + 1) + if ( + existing_chain_id[0], + existing_chain_id[1] + 1, + ) not in chains_ids_allocated: + row = self.db_pool.simple_select_one_onecol_txn( + txn, + table="event_auth_chains", + keyvalues={ + "chain_id": existing_chain_id[0], + "sequence_number": existing_chain_id[1] + 1, + }, + retcol="event_id", + allow_none=True, + ) + if row: + chains_ids_allocated.add( + (existing_chain_id[0], existing_chain_id[1] + 1,) + ) + else: + new_chain_id = ( + existing_chain_id[0], + existing_chain_id[1] + 1, + ) if not new_chain_id: new_chain_id = (self._event_chain_id_gen.get_next_id_txn(txn), 1) + chains_ids_allocated.add(new_chain_id) + chain_map[event_id] = new_chain_id new_chains[event_id] = new_chain_id @@ -581,16 +606,13 @@ def _persist_event_auth_chain_txn( # discarding any that are reachable by other auth events, or # that have the same chain ID as the event. # 2. For each retained auth event we: - # 1. propose adding a link from the event's to the auth - # event's chain ID/sequence number; and - # 2. propose adding a link from the event to every chain - # reachable by the auth event. - # 3. Filter redundant links from the list of proposed links. - # 4. Persist the new links - # + # a. Add a link from the event's to the auth event's chain + # ID/sequence number; and + # b. Add a link from the event to every chain reachable by the + # auth event. # Step 1, fetch all existing links - chain_links = {} # type: Dict[Tuple[int, int], Set[Tuple[int, int]]] + chain_links = _LinkMap() rows = self.db_pool.simple_select_many_txn( txn, table="event_auth_chain_links", @@ -605,107 +627,54 @@ def _persist_event_auth_chain_txn( ), ) for row in rows: - chain_links.setdefault( - (row["origin_chain_id"], row["target_chain_id"]), set() - ).add((row["origin_sequence_number"], row["target_sequence_number"]),) + chain_links.add_link( + row["origin_chain_id"], + row["origin_sequence_number"], + row["target_chain_id"], + row["target_sequence_number"], + new=False, + ) - to_add = {} # type: Dict[Tuple[int, int], Set[Tuple[int, int]]] - for event_id in events_to_calc_chain_id_for: + # We do this in toplogical order to avoid adding redundant links. + for event_id in sorted_topologically( + events_to_calc_chain_id_for, event_to_auth_chain + ): chain_id, sequence_number = chain_map[event_id] # Filter out auth events that are reachable by other auth # events. We do this by looking at every permutation of pairs of - # auth events (A, B) to check if B is reachable from B. - reduction = set(event_to_auth_chain[event_id]) + # auth events (A, B) to check if B is reachable from A. + reduction = { + a_id + for a_id in event_to_auth_chain[event_id] + if chain_map[a_id][0] != chain_id + } for start_auth_id, end_auth_id in itertools.permutations( - [ - auth_id - for auth_id in event_to_auth_chain[event_id] - if auth_id in chain_map - ], - r=2, + event_to_auth_chain[event_id], r=2, ): - source_chain_id, source_seq_no = chain_map[start_auth_id] - target_chain_id, target_seq_no = chain_map[end_auth_id] - - if source_chain_id == chain_id: - # Discard auth events with in same chain. - reduction.discard(start_auth_id) - - links = chain_links.get((source_chain_id, target_chain_id), set()) - for link_start_seq, link_end_seq in links: - if ( - link_start_seq <= source_seq_no - and target_seq_no <= link_end_seq - ): - reduction.discard(end_auth_id) - break + if chain_links.exists_path_from( + *chain_map[start_auth_id], *chain_map[end_auth_id] + ): + reduction.discard(end_auth_id) # Step 2, figure out what the new links are from the reduced # list of auth events. for auth_id in reduction: auth_chain_id, auth_sequence_number = chain_map[auth_id] - # Step 2a, add link from event -> auth event - links = chain_links.setdefault((chain_id, auth_chain_id), set()) - links.add((sequence_number, auth_sequence_number)) - - # Step 2b, lookup up all links from auth events and add them - # as links from the event. - for (source_id, target_id), auth_links in chain_links.items(): - if source_id != auth_chain_id or target_id == chain_id: - continue - - for (source_seq, target_seq) in auth_links: - if source_seq <= auth_sequence_number: - to_add.setdefault((chain_id, target_id), set()).add( - (sequence_number, target_seq) - ) - - for key, values in to_add.items(): - chain_links.setdefault(key, set()).update(values) + # Step 2a, add link between the event and auth event + chain_links.add_link( + chain_id, sequence_number, auth_chain_id, auth_sequence_number + ) - # Step 3, filter out redundant links. - # - # We do this by comparing a proposed link with all existing links - # and other proposed links. - # - # Note: new links won't cause existing links in the DB to become - # redundant, as new links must start at newly persisted events - # (which won't be reachable by any existing events). - filtered_links = {} # type: Dict[Tuple[int, int], Set[Tuple[int, int]]] - for ((source_chain, target_chain), links) in to_add.items(): - filtered_chain_links = filtered_links.setdefault( - (source_chain, target_chain), set() - ) - for link_to_add in links: - for existing_link in chain_links[(source_chain, target_chain)]: - # If a link "crosses" another link then its redundant. - # For example in the following link 1 (L1) is redundant, - # as any event reachable via L1 is *also* reachable via - # L2. - # - # Chain A Chain B - # | | - # L1 |------ | - # | | | - # L2 |---- | -->| - # | | | - # | |--->| - # | | - # | | - # - # So we only need to keep links which *do not* cross, - # i.e. links that both start and end above or below an - # existing link. - if ( - link_to_add[0] < existing_link[0] - and link_to_add[1] < existing_link[1] - ) or ( - link_to_add[0] > existing_link[0] - and link_to_add[1] > existing_link[1] - ): - filtered_chain_links.add(link_to_add) + # Step 2b, add a link to chains reachable from the auth + # event. + for target_id, target_seq in chain_links.get_links_from( + auth_chain_id, auth_sequence_number + ): + chain_links.add_link( + chain_id, sequence_number, target_id, target_seq + ) self.db_pool.simple_insert_many_txn( txn, @@ -713,12 +682,16 @@ def _persist_event_auth_chain_txn( values=[ { "origin_chain_id": source_id, - "origin_sequence_number": target_id, - "target_chain_id": source_seq, + "origin_sequence_number": source_seq, + "target_chain_id": target_id, "target_sequence_number": target_seq, } - for (source_id, target_id), sequences in filtered_links.items() - for source_seq, target_seq in sequences + for ( + source_id, + source_seq, + target_id, + target_seq, + ) in chain_links.get_additions() ], ) @@ -1835,3 +1808,114 @@ def _update_backward_extremeties(self, txn, events): if not ev.internal_metadata.is_outlier() ], ) + + +@attr.s +class _LinkMap: + """A helper type for tracking links between chains. + """ + + maps = attr.ib(type=Dict[int, Dict[int, Dict[int, int]]], factory=dict) + additions = attr.ib(type=Set[Tuple[int, int, int, int]], factory=set) + + def add_link( + self, + src_chain: int, + src_seq: int, + target_chain: int, + target_seq: int, + new=True, + ): + """Add a new link between two chains, ensuring no redundant links are added. + + New links should be added in topological order. + + Args: + src_chain, + src_seq, + target_chain, + target_seq, + new (bool): Whether this is a "new" link, i.e. should it be returned + by `get_additions`. + """ + current_links = self.maps.setdefault(src_chain, {}).setdefault(target_chain, {}) + + if new: + # Check if the new link is redundant + for current_seq_src, current_seq_target in current_links.items(): + # If a link "crosses" another link then its redundant. For example + # in the following link 1 (L1) is redundant, as any event reachable + # via L1 is *also* reachable via L2. + # + # Chain A Chain B + # | | + # L1 |------ | + # | | | + # L2 |---- | -->| + # | | | + # | |--->| + # | | + # | | + # + # So we only need to keep links which *do not* cross, i.e. links + # that both start and end above or below an existing link. + # + # Note, since we add links in topological ordering we should never + # see `src_seq` less than `current_seq_src`. + + if current_seq_src <= src_seq and target_seq <= current_seq_target: + # This new link is redundant, nothing to do. + return + + self.additions.add((src_chain, src_seq, target_chain, target_seq)) + + current_links[src_seq] = target_seq + + def get_links_from( + self, source_id, src_seq + ) -> Generator[Tuple[int, int], None, None]: + """Gets the chains reachable from the given chain/sequence number. + + Yields: + The chain ID and sequence number the link points to. + """ + for target_id, sequence_numbers in self.maps.get(source_id, {}).items(): + for link_src_seq, target_seq in sequence_numbers.items(): + if link_src_seq <= src_seq: + yield target_id, target_seq + + def get_links_between( + self, source_chain: int, target_chain: int + ) -> Generator[Tuple[int, int], None, None]: + """Gets the links between two chains. + + Yields: + The source and target sequence numbers. + """ + + yield from self.maps.get(source_chain, {}).get(target_chain, {}).items() + + def get_additions(self) -> Generator[Tuple[int, int, int, int], None, None]: + """Gets any newly added links. + + Yields: + The source chain ID/sequence number and target chain ID/sequence number + """ + + for src_chain, src_seq, target_chain, _ in self.additions: + target_seq = self.maps.get(src_chain, {}).get(target_chain, {}).get(src_seq) + if target_seq is not None: + yield (src_chain, src_seq, target_chain, target_seq) + + def exists_path_from( + self, src_chain: int, src_seq: int, target_chain: int, target_seq: int, + ) -> bool: + """Checks if there is a path between the source chain ID/sequence and + target chain ID/sequence. + """ + links = self.get_links_between(src_chain, target_chain) + for link_start_seq, link_end_seq in links: + if link_start_seq <= src_seq and target_seq <= link_end_seq: + return True + + return False diff --git a/synapse/storage/persist_events.py b/synapse/storage/persist_events.py index 70e636b0bac0..00d1468dab1f 100644 --- a/synapse/storage/persist_events.py +++ b/synapse/storage/persist_events.py @@ -388,6 +388,8 @@ async def _persist_events( (event, context) ) + logger.info("Persisting event: %s", event) + for room_id, ev_ctx_rm in events_by_room.items(): latest_event_ids = await self.main_store.get_latest_event_ids_in_room( room_id @@ -401,6 +403,9 @@ async def _persist_events( # No change in extremities, so no change in state continue + logger.info("Old extrem: %s", latest_event_ids) + logger.info("New extrem: %s", new_latest_event_ids) + # there should always be at least one forward extremity. # (except during the initial persistence of the send_join # results, in which case there will be no existing diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index b5dc40df78b2..ba1b6606f20d 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -13,6 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import attr + +from synapse.events import _EventInternalMetadata + import tests.unittest import tests.utils @@ -170,7 +174,7 @@ def store_room(txn): "creator": "room_creator_user_id", "is_public": True, "room_version": "6", - "has_auth_chain_index": False, + "has_auth_chain_index": True, }, ) @@ -179,42 +183,37 @@ def store_room(txn): # We rudely fiddle with the appropriate tables directly, as that's much # easier than constructing events properly. - def insert_event(txn, event_id, stream_ordering): - - depth = depth_map[event_id] - - self.store.db_pool.simple_insert_txn( - txn, - table="events", - values={ - "event_id": event_id, - "room_id": room_id, - "depth": depth, - "topological_ordering": depth, - "type": "m.test", - "processed": True, - "outlier": False, - "stream_ordering": stream_ordering, - }, - ) + def insert_event(txn): + stream_ordering = 0 + + for event_id in auth_graph: + stream_ordering += 1 + depth = depth_map[event_id] + + self.store.db_pool.simple_insert_txn( + txn, + table="events", + values={ + "event_id": event_id, + "room_id": room_id, + "depth": depth, + "topological_ordering": depth, + "type": "m.test", + "processed": True, + "outlier": False, + "stream_ordering": stream_ordering, + }, + ) - self.store.db_pool.simple_insert_many_txn( + self.hs.datastores.persist_events._persist_event_auth_chain_txn( txn, - table="event_auth", - values=[ - {"event_id": event_id, "room_id": room_id, "auth_id": a} - for a in auth_graph[event_id] + [ + FakeEvent(event_id, room_id, auth_graph[event_id]) + for event_id in auth_graph ], ) - next_stream_ordering = 0 - for event_id in auth_graph: - next_stream_ordering += 1 - self.get_success( - self.store.db_pool.runInteraction( - "insert", insert_event, event_id, next_stream_ordering - ) - ) + self.get_success(self.store.db_pool.runInteraction("insert", insert_event,)) # Now actually test that various combinations give the right result: @@ -257,3 +256,21 @@ def insert_event(txn, event_id, stream_ordering): self.store.get_auth_chain_difference(room_id, [{"a"}]) ) self.assertSetEqual(difference, set()) + + +@attr.s +class FakeEvent: + event_id = attr.ib() + room_id = attr.ib() + auth_events = attr.ib() + + type = "foo" + state_key = "foo" + + internal_metadata = _EventInternalMetadata({}) + + def auth_event_ids(self): + return self.auth_events + + def is_state(self): + return True From bd30c9ed122765a60ed44a43eb6c24fb7bbdf77f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 4 Dec 2020 11:41:52 +0000 Subject: [PATCH 10/44] Fix take1 --- synapse/storage/databases/main/events.py | 76 +++++++++++++++++-- .../schema/delta/58/24_event_auth_chains.sql | 11 +++ 2 files changed, 80 insertions(+), 7 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 91da3580cc54..8fa311a70f16 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -452,13 +452,11 @@ def _persist_event_auth_chain_txn( row["room_id"]: bool(row["has_auth_chain_index"]) for row in rows } - event_ids = {event.event_id for event in events} + event_ids = {event.event_id: event for event in events} state_events = [ event for event in events - if event.is_state() - and room_to_index[event.room_id] - and not event.internal_metadata.is_out_of_band_membership() + if event.is_state() and room_to_index[event.room_id] ] if state_events: chain_map = {} @@ -468,6 +466,32 @@ def _persist_event_auth_chain_txn( events_to_calc_chain_id_for = set(event_to_types) event_to_auth_chain = {e.event_id: e.auth_event_ids() for e in state_events} + rows = self.db_pool.simple_select_many_txn( + txn, + table="event_auth_chain_to_calculate", + keyvalues={}, + column="room_id", + iterable={e.room_id for e in state_events}, + retcols=("event_id", "type", "state_key"), + ) + for row in rows: + event_id = row["event_id"] + etype = row["type"] + state_key = row["state_key"] + + auth_events = self.db_pool.simple_select_onecol_txn( + txn, + "event_auth", + keyvalues={"event_id": event_id}, + retcol="auth_id", + ) + if set(auth_events) - events_to_calc_chain_id_for: + continue + + events_to_calc_chain_id_for.add(event_id) + event_to_types[event_id] = (etype, state_key) + event_to_auth_chain[event_id] = auth_events + # First we get the chain ID and sequence numbers for the events' # auth events (that aren't also currently being persisted). # @@ -478,9 +502,9 @@ def _persist_event_auth_chain_txn( missing_auth_chains = { a_id - for e in state_events - for a_id in e.auth_event_ids() - if a_id not in event_ids + for auth_events in event_to_auth_chain.values() + for a_id in auth_events + if a_id not in events_to_calc_chain_id_for } # We loop here in case we find an out of band membership and need to @@ -526,6 +550,36 @@ def _persist_event_auth_chain_txn( else: chain_map[auth_id] = (chain_id, sequence_number) + for event_id in sorted_topologically( + event_to_auth_chain, event_to_auth_chain + ): + # if not event.internal_metadata.is_out_of_band_membership(): + # continue + + for auth_id in event_to_auth_chain[event_id]: + if ( + auth_id not in chain_map + and auth_id not in events_to_calc_chain_id_for + ): + events_to_calc_chain_id_for.discard(event_id) + + if event_id in event_ids: + event = event_ids[event_id] + self.db_pool.simple_insert_txn( + txn, + table="event_auth_chain_to_calculate", + values={ + "event_id": event.event_id, + "room_id": event.room_id, + "type": event.type, + "state_key": event.state_key, + }, + ) + break + + if not events_to_calc_chain_id_for: + return + # We now calculate the chain IDs/sequence numbers for the events. We # do this by looking at the chain ID and sequence number of any auth # event with the same type/state_key and incrementing the sequence @@ -589,6 +643,14 @@ def _persist_event_auth_chain_txn( ], ) + self.db_pool.simple_delete_many_txn( + txn, + table="event_auth_chain_to_calculate", + keyvalues={}, + column="event_id", + iterable=new_chains, + ) + # Now we need to calculate any new links between chains caused by # the new events. # diff --git a/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql b/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql index ce15020a8124..31e948acb992 100644 --- a/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql +++ b/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql @@ -36,5 +36,16 @@ CREATE TABLE event_auth_chain_links ( CREATE INDEX event_auth_chain_links_idx ON event_auth_chain_links (origin_chain_id, target_chain_id); +-- Events that we have persisted but not calculated auth chains for. +CREATE TABLE event_auth_chain_to_calculate ( + event_id TEXT PRIMARY KEY, + room_id TEXT NOT NULL, + type TEXT NOT NULL, + state_key TEXT NOT NULL +); + +CREATE INDEX event_auth_chain_to_calculate_rm_id ON event_auth_chain_to_calculate(room_id); + + -- Whether we've calculated the above index for a room. ALTER TABLE rooms ADD COLUMN has_auth_chain_index BOOLEAN; From 55f03b9b046b754987e70b85f0b05d5dfcb2fda0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 4 Dec 2020 14:48:32 +0000 Subject: [PATCH 11/44] Fixup --- synapse/storage/databases/main/events.py | 553 ++++++++++++----------- 1 file changed, 282 insertions(+), 271 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 8fa311a70f16..b49d003f735e 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -448,314 +448,325 @@ def _persist_event_auth_chain_txn( keyvalues={}, retcols=("room_id", "has_auth_chain_index"), ) - room_to_index = { + room_to_is_using_chain_index = { row["room_id"]: bool(row["has_auth_chain_index"]) for row in rows } - event_ids = {event.event_id: event for event in events} - state_events = [ - event + state_events = { + event.event_id: event for event in events - if event.is_state() and room_to_index[event.room_id] - ] - if state_events: - chain_map = {} - new_chains = {} + if event.is_state() and room_to_is_using_chain_index[event.room_id] + } - event_to_types = {e.event_id: (e.type, e.state_key) for e in state_events} - events_to_calc_chain_id_for = set(event_to_types) - event_to_auth_chain = {e.event_id: e.auth_event_ids() for e in state_events} + if not state_events: + return - rows = self.db_pool.simple_select_many_txn( - txn, - table="event_auth_chain_to_calculate", - keyvalues={}, - column="room_id", - iterable={e.room_id for e in state_events}, - retcols=("event_id", "type", "state_key"), - ) - for row in rows: - event_id = row["event_id"] - etype = row["type"] - state_key = row["state_key"] + # Map from event ID to chain ID/sequence number. + chain_map = {} # type: Dict[str, Tuple[int, int]] - auth_events = self.db_pool.simple_select_onecol_txn( - txn, - "event_auth", - keyvalues={"event_id": event_id}, - retcol="auth_id", - ) - if set(auth_events) - events_to_calc_chain_id_for: - continue + # We need to know the type/state_key and auth events of the events we're + # calculating chain IDs for. We don't rely on having the full Event + # instances as we'll potentially be pulling more events from the DB and + # we don't need the overhead of fetching/parsing the full event JSON. + event_to_types = { + e.event_id: (e.type, e.state_key) for e in state_events.values() + } + event_to_auth_chain = { + e.event_id: e.auth_event_ids() for e in state_events.values() + } - events_to_calc_chain_id_for.add(event_id) - event_to_types[event_id] = (etype, state_key) - event_to_auth_chain[event_id] = auth_events + # Set of event IDs to calculate chain ID/seq numbers for. + events_to_calc_chain_id_for = set(state_events) - # First we get the chain ID and sequence numbers for the events' - # auth events (that aren't also currently being persisted). - # - # Note that there there is an edge case here where we might not have - # calculated chains and sequence numbers for events that were "out - # of band". We handle this case by fetching the necessary info and - # adding it to the set of events to calculate chain IDs for. + # We check if there are any events for the room that need to be handled. + # These should just be out of band memberships, where we didn't have the + # auth chain when we first persisted. + rows = self.db_pool.simple_select_many_txn( + txn, + table="event_auth_chain_to_calculate", + keyvalues={}, + column="room_id", + iterable={e.room_id for e in state_events.values()}, + retcols=("event_id", "type", "state_key"), + ) + for row in rows: + event_id = row["event_id"] + event_type = row["type"] + state_key = row["state_key"] - missing_auth_chains = { - a_id - for auth_events in event_to_auth_chain.values() - for a_id in auth_events - if a_id not in events_to_calc_chain_id_for - } + auth_events = self.db_pool.simple_select_onecol_txn( + txn, "event_auth", keyvalues={"event_id": event_id}, retcol="auth_id", + ) - # We loop here in case we find an out of band membership and need to - # fetch their auth event info. - while missing_auth_chains: - sql = """ - SELECT event_id, events.type, state_key, chain_id, sequence_number - FROM events - INNER JOIN state_events USING (event_id) - LEFT JOIN event_auth_chains USING (event_id) - WHERE - """ - clause, args = make_in_list_sql_clause( - txn.database_engine, "event_id", missing_auth_chains, - ) - txn.execute(sql + clause, args) + events_to_calc_chain_id_for.add(event_id) + event_to_types[event_id] = (event_type, state_key) + event_to_auth_chain[event_id] = auth_events - missing_auth_chains.clear() + # First we get the chain ID and sequence numbers for the events' + # auth events (that aren't also currently being persisted). + # + # Note that there there is an edge case here where we might not have + # calculated chains and sequence numbers for events that were "out + # of band". We handle this case by fetching the necessary info and + # adding it to the set of events to calculate chain IDs for. + + missing_auth_chains = { + a_id + for auth_events in event_to_auth_chain.values() + for a_id in auth_events + if a_id not in events_to_calc_chain_id_for + } - for auth_id, etype, state_key, chain_id, sequence_number in txn: - event_to_types[auth_id] = (etype, state_key) + # We loop here in case we find an out of band membership and need to + # fetch their auth event info. + while missing_auth_chains: + sql = """ + SELECT event_id, events.type, state_key, chain_id, sequence_number + FROM events + INNER JOIN state_events USING (event_id) + LEFT JOIN event_auth_chains USING (event_id) + WHERE + """ + clause, args = make_in_list_sql_clause( + txn.database_engine, "event_id", missing_auth_chains, + ) + txn.execute(sql + clause, args) - if chain_id is None: - # No chain ID, so the event was persisted out of band. - # We add to list of events to calculate auth chains for. + missing_auth_chains.clear() - events_to_calc_chain_id_for.add(auth_id) + for auth_id, event_type, state_key, chain_id, sequence_number in txn: + event_to_types[auth_id] = (event_type, state_key) - event_to_auth_chain[ - auth_id - ] = self.db_pool.simple_select_onecol_txn( - txn, - "event_auth", - keyvalues={"event_id": auth_id}, - retcol="auth_id", - ) + if chain_id is None: + # No chain ID, so the event was persisted out of band. + # We add to list of events to calculate auth chains for. - missing_auth_chains.update( - e - for e in event_to_auth_chain[auth_id] - if e not in event_to_types - ) - else: - chain_map[auth_id] = (chain_id, sequence_number) + events_to_calc_chain_id_for.add(auth_id) - for event_id in sorted_topologically( - event_to_auth_chain, event_to_auth_chain - ): - # if not event.internal_metadata.is_out_of_band_membership(): - # continue - - for auth_id in event_to_auth_chain[event_id]: - if ( - auth_id not in chain_map - and auth_id not in events_to_calc_chain_id_for - ): - events_to_calc_chain_id_for.discard(event_id) - - if event_id in event_ids: - event = event_ids[event_id] - self.db_pool.simple_insert_txn( - txn, - table="event_auth_chain_to_calculate", - values={ - "event_id": event.event_id, - "room_id": event.room_id, - "type": event.type, - "state_key": event.state_key, - }, - ) - break - - if not events_to_calc_chain_id_for: - return + event_to_auth_chain[ + auth_id + ] = self.db_pool.simple_select_onecol_txn( + txn, + "event_auth", + keyvalues={"event_id": auth_id}, + retcol="auth_id", + ) - # We now calculate the chain IDs/sequence numbers for the events. We - # do this by looking at the chain ID and sequence number of any auth - # event with the same type/state_key and incrementing the sequence - # number by one. If there was no match or the chain ID/sequence - # number is already taken we generate a new chain. - # - # We need to do this in a topologically sorted order as we want to - # generate chain IDs/sequence numbers of an event's auth events - # before the event itself. - chains_ids_allocated = set() - for event_id in sorted_topologically( - events_to_calc_chain_id_for, event_to_auth_chain - ): - existing_chain_id = None - for auth_id in event_to_auth_chain[event_id]: - if event_to_types.get(event_id) == event_to_types.get(auth_id): - existing_chain_id = chain_map[auth_id] - - new_chain_id = None - if existing_chain_id: - # We found a chain ID/sequence number candidate, check its - # not already taken. - if ( - existing_chain_id[0], - existing_chain_id[1] + 1, - ) not in chains_ids_allocated: - row = self.db_pool.simple_select_one_onecol_txn( + missing_auth_chains.update( + e + for e in event_to_auth_chain[auth_id] + if e not in event_to_types + ) + else: + chain_map[auth_id] = (chain_id, sequence_number) + + # Now we check if we have any events where we don't have auth chain, + # this should only be out of band memberships. + for event_id in sorted_topologically(event_to_auth_chain, event_to_auth_chain): + for auth_id in event_to_auth_chain[event_id]: + if ( + auth_id not in chain_map + and auth_id not in events_to_calc_chain_id_for + ): + events_to_calc_chain_id_for.discard(event_id) + + # If this is an event we're trying to persist we added it to + # the list of events to calculate chain IDs for next time + # around. (Otherwise we will have already added it to the + # table). + event = state_events.get(event_id) + if event: + self.db_pool.simple_insert_txn( txn, - table="event_auth_chains", - keyvalues={ - "chain_id": existing_chain_id[0], - "sequence_number": existing_chain_id[1] + 1, + table="event_auth_chain_to_calculate", + values={ + "event_id": event.event_id, + "room_id": event.room_id, + "type": event.type, + "state_key": event.state_key, }, - retcol="event_id", - allow_none=True, ) - if row: - chains_ids_allocated.add( - (existing_chain_id[0], existing_chain_id[1] + 1,) - ) - else: - new_chain_id = ( - existing_chain_id[0], - existing_chain_id[1] + 1, - ) + break - if not new_chain_id: - new_chain_id = (self._event_chain_id_gen.get_next_id_txn(txn), 1) + if not events_to_calc_chain_id_for: + return - chains_ids_allocated.add(new_chain_id) + # We now calculate the chain IDs/sequence numbers for the events. We + # do this by looking at the chain ID and sequence number of any auth + # event with the same type/state_key and incrementing the sequence + # number by one. If there was no match or the chain ID/sequence + # number is already taken we generate a new chain. + # + # We need to do this in a topologically sorted order as we want to + # generate chain IDs/sequence numbers of an event's auth events + # before the event itself. + chains_ids_allocated = set() # type: Set[Tuple[int, int]] + new_chains = {} # type: Dict[str, Tuple[int, int]] + for event_id in sorted_topologically( + events_to_calc_chain_id_for, event_to_auth_chain + ): + existing_chain_id = None + for auth_id in event_to_auth_chain[event_id]: + if event_to_types.get(event_id) == event_to_types.get(auth_id): + existing_chain_id = chain_map[auth_id] + + new_chain_id = None + if existing_chain_id: + # We found a chain ID/sequence number candidate, check its + # not already taken. + proposed_new_id = existing_chain_id[0] + proposed_new_seq = existing_chain_id[1] + 1 + if (proposed_new_id, proposed_new_seq) not in chains_ids_allocated: + already_allocated = self.db_pool.simple_select_one_onecol_txn( + txn, + table="event_auth_chains", + keyvalues={ + "chain_id": proposed_new_id, + "sequence_number": proposed_new_seq, + }, + retcol="event_id", + allow_none=True, + ) + if already_allocated: + # Mark it as already allocated so we don't need to hit + # the DB again. + chains_ids_allocated.add((proposed_new_id, proposed_new_seq)) + else: + new_chain_id = ( + proposed_new_id, + proposed_new_seq, + ) - chain_map[event_id] = new_chain_id - new_chains[event_id] = new_chain_id + if not new_chain_id: + new_chain_id = (self._event_chain_id_gen.get_next_id_txn(txn), 1) - self.db_pool.simple_insert_many_txn( - txn, - table="event_auth_chains", - values=[ - {"event_id": event_id, "chain_id": c_id, "sequence_number": seq} - for event_id, (c_id, seq) in new_chains.items() - ], - ) + chains_ids_allocated.add(new_chain_id) - self.db_pool.simple_delete_many_txn( - txn, - table="event_auth_chain_to_calculate", - keyvalues={}, - column="event_id", - iterable=new_chains, - ) + chain_map[event_id] = new_chain_id + new_chains[event_id] = new_chain_id - # Now we need to calculate any new links between chains caused by - # the new events. - # - # Links are pairs of chain ID/sequence numbers such that for any - # event A (CA, SA) and any event B (CB, SB), B is in A's auth chain - # if and only if there is at least one link (CA, S1) -> (CB, S2) - # where SA >= S1 and S2 >= SB. - # - # We try and avoid adding redundant links to the table, e.g. if we - # have two links between two chains which both start/end at the - # sequence number event (or cross) then one can be safely dropped. - # - # To calculate new links we look at every new event and: - # 1. Fetch the chain ID/sequence numbers of its auth events, - # discarding any that are reachable by other auth events, or - # that have the same chain ID as the event. - # 2. For each retained auth event we: - # a. Add a link from the event's to the auth event's chain - # ID/sequence number; and - # b. Add a link from the event to every chain reachable by the - # auth event. - - # Step 1, fetch all existing links - chain_links = _LinkMap() - rows = self.db_pool.simple_select_many_txn( - txn, - table="event_auth_chain_links", - column="origin_chain_id", - iterable={chain_id for chain_id, _ in chain_map.values()}, - keyvalues={}, - retcols=( - "origin_chain_id", - "origin_sequence_number", - "target_chain_id", - "target_sequence_number", - ), + self.db_pool.simple_insert_many_txn( + txn, + table="event_auth_chains", + values=[ + {"event_id": event_id, "chain_id": c_id, "sequence_number": seq} + for event_id, (c_id, seq) in new_chains.items() + ], + ) + + self.db_pool.simple_delete_many_txn( + txn, + table="event_auth_chain_to_calculate", + keyvalues={}, + column="event_id", + iterable=new_chains, + ) + + # Now we need to calculate any new links between chains caused by + # the new events. + # + # Links are pairs of chain ID/sequence numbers such that for any + # event A (CA, SA) and any event B (CB, SB), B is in A's auth chain + # if and only if there is at least one link (CA, S1) -> (CB, S2) + # where SA >= S1 and S2 >= SB. + # + # We try and avoid adding redundant links to the table, e.g. if we + # have two links between two chains which both start/end at the + # sequence number event (or cross) then one can be safely dropped. + # + # To calculate new links we look at every new event and: + # 1. Fetch the chain ID/sequence numbers of its auth events, + # discarding any that are reachable by other auth events, or + # that have the same chain ID as the event. + # 2. For each retained auth event we: + # a. Add a link from the event's to the auth event's chain + # ID/sequence number; and + # b. Add a link from the event to every chain reachable by the + # auth event. + + # Step 1, fetch all existing links + chain_links = _LinkMap() + rows = self.db_pool.simple_select_many_txn( + txn, + table="event_auth_chain_links", + column="origin_chain_id", + iterable={chain_id for chain_id, _ in chain_map.values()}, + keyvalues={}, + retcols=( + "origin_chain_id", + "origin_sequence_number", + "target_chain_id", + "target_sequence_number", + ), + ) + for row in rows: + chain_links.add_link( + row["origin_chain_id"], + row["origin_sequence_number"], + row["target_chain_id"], + row["target_sequence_number"], + new=False, ) - for row in rows: - chain_links.add_link( - row["origin_chain_id"], - row["origin_sequence_number"], - row["target_chain_id"], - row["target_sequence_number"], - new=False, - ) - # We do this in toplogical order to avoid adding redundant links. - for event_id in sorted_topologically( - events_to_calc_chain_id_for, event_to_auth_chain + # We do this in toplogical order to avoid adding redundant links. + for event_id in sorted_topologically( + events_to_calc_chain_id_for, event_to_auth_chain + ): + chain_id, sequence_number = chain_map[event_id] + + # Filter out auth events that are reachable by other auth + # events. We do this by looking at every permutation of pairs of + # auth events (A, B) to check if B is reachable from A. + reduction = { + a_id + for a_id in event_to_auth_chain[event_id] + if chain_map[a_id][0] != chain_id + } + for start_auth_id, end_auth_id in itertools.permutations( + event_to_auth_chain[event_id], r=2, ): - chain_id, sequence_number = chain_map[event_id] - - # Filter out auth events that are reachable by other auth - # events. We do this by looking at every permutation of pairs of - # auth events (A, B) to check if B is reachable from A. - reduction = { - a_id - for a_id in event_to_auth_chain[event_id] - if chain_map[a_id][0] != chain_id - } - for start_auth_id, end_auth_id in itertools.permutations( - event_to_auth_chain[event_id], r=2, + if chain_links.exists_path_from( + *chain_map[start_auth_id], *chain_map[end_auth_id] ): - if chain_links.exists_path_from( - *chain_map[start_auth_id], *chain_map[end_auth_id] - ): - reduction.discard(end_auth_id) + reduction.discard(end_auth_id) + + # Step 2, figure out what the new links are from the reduced + # list of auth events. + for auth_id in reduction: + auth_chain_id, auth_sequence_number = chain_map[auth_id] - # Step 2, figure out what the new links are from the reduced - # list of auth events. - for auth_id in reduction: - auth_chain_id, auth_sequence_number = chain_map[auth_id] + # Step 2a, add link between the event and auth event + chain_links.add_link( + chain_id, sequence_number, auth_chain_id, auth_sequence_number + ) - # Step 2a, add link between the event and auth event + # Step 2b, add a link to chains reachable from the auth + # event. + for target_id, target_seq in chain_links.get_links_from( + auth_chain_id, auth_sequence_number + ): chain_links.add_link( - chain_id, sequence_number, auth_chain_id, auth_sequence_number + chain_id, sequence_number, target_id, target_seq ) - # Step 2b, add a link to chains reachable from the auth - # event. - for target_id, target_seq in chain_links.get_links_from( - auth_chain_id, auth_sequence_number - ): - chain_links.add_link( - chain_id, sequence_number, target_id, target_seq - ) - - self.db_pool.simple_insert_many_txn( - txn, - table="event_auth_chain_links", - values=[ - { - "origin_chain_id": source_id, - "origin_sequence_number": source_seq, - "target_chain_id": target_id, - "target_sequence_number": target_seq, - } - for ( - source_id, - source_seq, - target_id, - target_seq, - ) in chain_links.get_additions() - ], - ) + self.db_pool.simple_insert_many_txn( + txn, + table="event_auth_chain_links", + values=[ + { + "origin_chain_id": source_id, + "origin_sequence_number": source_seq, + "target_chain_id": target_id, + "target_sequence_number": target_seq, + } + for ( + source_id, + source_seq, + target_id, + target_seq, + ) in chain_links.get_additions() + ], + ) def _persist_transaction_ids_txn( self, From 3e98fb7a0a7dce1a0d8bf99ac1ed890ae128ed40 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 4 Dec 2020 15:01:57 +0000 Subject: [PATCH 12/44] More fixups --- synapse/storage/databases/main/events.py | 2 +- synapse/storage/persist_events.py | 5 ----- synapse/util/iterutils.py | 5 +++++ 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index b49d003f735e..40257163c66b 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -444,7 +444,7 @@ def _persist_event_auth_chain_txn( txn, table="rooms", column="room_id", - iterable={event.room_id for event in events}, + iterable={event.room_id for event in events if event.is_state()}, keyvalues={}, retcols=("room_id", "has_auth_chain_index"), ) diff --git a/synapse/storage/persist_events.py b/synapse/storage/persist_events.py index 00d1468dab1f..70e636b0bac0 100644 --- a/synapse/storage/persist_events.py +++ b/synapse/storage/persist_events.py @@ -388,8 +388,6 @@ async def _persist_events( (event, context) ) - logger.info("Persisting event: %s", event) - for room_id, ev_ctx_rm in events_by_room.items(): latest_event_ids = await self.main_store.get_latest_event_ids_in_room( room_id @@ -403,9 +401,6 @@ async def _persist_events( # No change in extremities, so no change in state continue - logger.info("Old extrem: %s", latest_event_ids) - logger.info("New extrem: %s", new_latest_event_ids) - # there should always be at least one forward extremity. # (except during the initial persistence of the send_join # results, in which case there will be no existing diff --git a/synapse/util/iterutils.py b/synapse/util/iterutils.py index e2428016be61..0b21aa56c09f 100644 --- a/synapse/util/iterutils.py +++ b/synapse/util/iterutils.py @@ -53,6 +53,11 @@ def chunk_seq(iseq: ISeq, maxlen: int) -> Iterable[ISeq]: def sorted_topologically( nodes: Iterable[T], graph: Dict[T, Collection[T]], ) -> Generator[T, None, None]: + """Given a set of nodes and a graph, yield the nodes in toplogical order. + + For example `sorted_topologically([1, 2], {1: [2]})` will yield `2, 1`. + """ + degree_map = {node: 0 for node in nodes} reverse_graph = {} # type: Dict[T, Set[T]] From 9087033be435a479b019cc8995467d3416e62c42 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 4 Dec 2020 15:54:29 +0000 Subject: [PATCH 13/44] Newsfile --- changelog.d/8879.misc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/8879.misc b/changelog.d/8879.misc index 6f9516b314c5..1a11e3094457 100644 --- a/changelog.d/8879.misc +++ b/changelog.d/8879.misc @@ -1 +1 @@ -Pass `room_id` to `get_auth_chain_difference`. +Improve efficiency of large state resolutions for new rooms. From 21b3ef0e8ba705df819db1a0bed87d43ad3fb269 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 4 Dec 2020 15:57:54 +0000 Subject: [PATCH 14/44] Test both new and old methods --- tests/storage/test_event_federation.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index ba1b6606f20d..a14540b9b281 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -17,6 +17,7 @@ from synapse.events import _EventInternalMetadata +from parameterized import parameterized import tests.unittest import tests.utils @@ -117,7 +118,8 @@ def insert_event(txn, i, room_id): r = self.get_success(self.store.get_rooms_with_many_extremities(5, 1, [room1])) self.assertTrue(r == [room2] or r == [room3]) - def test_auth_difference(self): + @parameterized.expand([(True,), (False,)]) + def test_auth_difference(self, use_chain_cover_index: bool): room_id = "@ROOM:local" # The silly auth graph we use to test the auth difference algorithm, @@ -174,7 +176,7 @@ def store_room(txn): "creator": "room_creator_user_id", "is_public": True, "room_version": "6", - "has_auth_chain_index": True, + "has_auth_chain_index": use_chain_cover_index, }, ) From fdaf4dafbd6b87c77bfa47acee1d89b298ec76bf Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 4 Dec 2020 15:59:40 +0000 Subject: [PATCH 15/44] Note --- synapse/util/iterutils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/util/iterutils.py b/synapse/util/iterutils.py index 0b21aa56c09f..cb8d958f5f27 100644 --- a/synapse/util/iterutils.py +++ b/synapse/util/iterutils.py @@ -58,6 +58,8 @@ def sorted_topologically( For example `sorted_topologically([1, 2], {1: [2]})` will yield `2, 1`. """ + # This is implemented by Kahn's algorithm. + degree_map = {node: 0 for node in nodes} reverse_graph = {} # type: Dict[T, Set[T]] From 7f5ac138c215a42ec28be97d2b48c1ba7ca8aa44 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 4 Dec 2020 16:07:32 +0000 Subject: [PATCH 16/44] isort --- tests/storage/test_event_federation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index a14540b9b281..3adf405edb27 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -14,10 +14,10 @@ # limitations under the License. import attr +from parameterized import parameterized from synapse.events import _EventInternalMetadata -from parameterized import parameterized import tests.unittest import tests.utils From afb7f80367450987048b84a20505ec4b2a837d6f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 7 Dec 2020 14:53:41 +0000 Subject: [PATCH 17/44] Don't add links where start and end chain are the same --- synapse/storage/databases/main/events.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 40257163c66b..3b6dd0856e34 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -745,6 +745,9 @@ def _persist_event_auth_chain_txn( for target_id, target_seq in chain_links.get_links_from( auth_chain_id, auth_sequence_number ): + if target_id == chain_id: + continue + chain_links.add_link( chain_id, sequence_number, target_id, target_seq ) From dec1f74bf3cdc08bc101c06bf81c239c3d8ef562 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 7 Dec 2020 14:55:53 +0000 Subject: [PATCH 18/44] Have exists_path_from handle same chain case correctly --- synapse/storage/databases/main/events.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 3b6dd0856e34..c6046e673417 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1989,6 +1989,9 @@ def exists_path_from( """Checks if there is a path between the source chain ID/sequence and target chain ID/sequence. """ + if src_chain == target_chain: + return target_seq <= src_seq + links = self.get_links_between(src_chain, target_chain) for link_start_seq, link_end_seq in links: if link_start_seq <= src_seq and target_seq <= link_end_seq: From 9279940347e56495be705b50901224563440280d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 7 Dec 2020 14:57:51 +0000 Subject: [PATCH 19/44] Add some tests --- synapse/storage/databases/main/events.py | 10 +- tests/storage/test_event_chain.py | 489 +++++++++++++++++++++++ 2 files changed, 497 insertions(+), 2 deletions(-) create mode 100644 tests/storage/test_event_chain.py diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index c6046e673417..33c183f5020c 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1901,7 +1901,7 @@ def add_link( target_chain: int, target_seq: int, new=True, - ): + ) -> bool: """Add a new link between two chains, ensuring no redundant links are added. New links should be added in topological order. @@ -1913,9 +1913,14 @@ def add_link( target_seq, new (bool): Whether this is a "new" link, i.e. should it be returned by `get_additions`. + + Returns: + True if a link was added, false if the given link was dropped as redundant """ current_links = self.maps.setdefault(src_chain, {}).setdefault(target_chain, {}) + assert src_chain != target_chain + if new: # Check if the new link is redundant for current_seq_src, current_seq_target in current_links.items(): @@ -1941,11 +1946,12 @@ def add_link( if current_seq_src <= src_seq and target_seq <= current_seq_target: # This new link is redundant, nothing to do. - return + return False self.additions.add((src_chain, src_seq, target_chain, target_seq)) current_links[src_seq] = target_seq + return True def get_links_from( self, source_id, src_seq diff --git a/tests/storage/test_event_chain.py b/tests/storage/test_event_chain.py new file mode 100644 index 000000000000..b714ff693d12 --- /dev/null +++ b/tests/storage/test_event_chain.py @@ -0,0 +1,489 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, List, Tuple + +from twisted.trial import unittest + +from synapse.api.constants import EventTypes +from synapse.api.room_versions import RoomVersions +from synapse.events import EventBase +from synapse.storage.databases.main.events import _LinkMap + +from tests.unittest import HomeserverTestCase + + +class EventChainStoreTestCase(HomeserverTestCase): + def prepare(self, reactor, clock, hs): + self.store = hs.get_datastore() + + def test_simple(self): + """Test that the example in `docs/auth_chain_difference_algorithm.md` + works. + """ + + event_factory = self.hs.get_event_builder_factory() + bob = "@creator:test" + alice = "@alice:test" + room_id = "!room:test" + + # Ensure that we have a rooms entry so that we generate the chain index. + self.get_success( + self.store.store_room( + room_id=room_id, + room_creator_user_id="", + is_public=True, + room_version=RoomVersions.V6, + ) + ) + + create = self.get_success( + event_factory.for_room_version( + RoomVersions.V6, + { + "type": EventTypes.Create, + "state_key": "", + "sender": bob, + "room_id": room_id, + "content": {"tag": "create"}, + }, + ).build(prev_event_ids=[], auth_event_ids=[]) + ) + + bob_join = self.get_success( + event_factory.for_room_version( + RoomVersions.V6, + { + "type": EventTypes.Member, + "state_key": bob, + "sender": bob, + "room_id": room_id, + "content": {"tag": "bob_join"}, + }, + ).build(prev_event_ids=[], auth_event_ids=[create.event_id]) + ) + + power = self.get_success( + event_factory.for_room_version( + RoomVersions.V6, + { + "type": EventTypes.PowerLevels, + "state_key": "", + "sender": bob, + "room_id": room_id, + "content": {"tag": "power"}, + }, + ).build( + prev_event_ids=[], auth_event_ids=[create.event_id, bob_join.event_id], + ) + ) + + alice_invite = self.get_success( + event_factory.for_room_version( + RoomVersions.V6, + { + "type": EventTypes.Member, + "state_key": alice, + "sender": bob, + "room_id": room_id, + "content": {"tag": "alice_invite"}, + }, + ).build( + prev_event_ids=[], + auth_event_ids=[create.event_id, bob_join.event_id, power.event_id], + ) + ) + + alice_join = self.get_success( + event_factory.for_room_version( + RoomVersions.V6, + { + "type": EventTypes.Member, + "state_key": alice, + "sender": alice, + "room_id": room_id, + "content": {"tag": "alice_join"}, + }, + ).build( + prev_event_ids=[], + auth_event_ids=[create.event_id, alice_invite.event_id, power.event_id], + ) + ) + + power_2 = self.get_success( + event_factory.for_room_version( + RoomVersions.V6, + { + "type": EventTypes.PowerLevels, + "state_key": "", + "sender": bob, + "room_id": room_id, + "content": {"tag": "power_2"}, + }, + ).build( + prev_event_ids=[], + auth_event_ids=[create.event_id, bob_join.event_id, power.event_id], + ) + ) + + bob_join_2 = self.get_success( + event_factory.for_room_version( + RoomVersions.V6, + { + "type": EventTypes.Member, + "state_key": bob, + "sender": bob, + "room_id": room_id, + "content": {"tag": "bob_join_2"}, + }, + ).build( + prev_event_ids=[], + auth_event_ids=[create.event_id, bob_join.event_id, power.event_id], + ) + ) + + alice_join2 = self.get_success( + event_factory.for_room_version( + RoomVersions.V6, + { + "type": EventTypes.Member, + "state_key": alice, + "sender": alice, + "room_id": room_id, + "content": {"tag": "alice_join2"}, + }, + ).build( + prev_event_ids=[], + auth_event_ids=[ + create.event_id, + alice_join.event_id, + power_2.event_id, + ], + ) + ) + + events = [ + create, + bob_join, + power, + alice_invite, + alice_join, + bob_join_2, + power_2, + alice_join2, + ] + + expected_links = [ + (bob_join, create), + (power, create), + (power, bob_join), + (alice_invite, create), + (alice_invite, power), + (alice_invite, bob_join), + (bob_join_2, power), + (alice_join2, power_2), + ] + + self.persist(events) + chain_map, link_map = self.fetch_chains(events) + + # Check that the expected links and only the expected links have been + # added. + self.assertEqual(len(expected_links), len(list(link_map.get_additions()))) + + for start, end in expected_links: + start_id, start_seq = chain_map[start.event_id] + end_id, end_seq = chain_map[end.event_id] + + self.assertIn( + (start_seq, end_seq), list(link_map.get_links_between(start_id, end_id)) + ) + + # Test that everything can reach the create event, but the create event + # can't reach anything. + for event in events[1:]: + self.assertTrue( + link_map.exists_path_from( + *chain_map[event.event_id], *chain_map[create.event_id] + ), + ) + + self.assertFalse( + link_map.exists_path_from( + *chain_map[create.event_id], *chain_map[event.event_id], + ), + ) + + def test_out_of_order_events(self): + """Test that we handle persisting events that we don't have the full + auth chain for yet (which should only happen for out of band memberships). + """ + event_factory = self.hs.get_event_builder_factory() + bob = "@creator:test" + alice = "@alice:test" + room_id = "!room:test" + + # Ensure that we have a rooms entry so that we generate the chain index. + self.get_success( + self.store.store_room( + room_id=room_id, + room_creator_user_id="", + is_public=True, + room_version=RoomVersions.V6, + ) + ) + + # First persist the base room. + create = self.get_success( + event_factory.for_room_version( + RoomVersions.V6, + { + "type": EventTypes.Create, + "state_key": "", + "sender": bob, + "room_id": room_id, + "content": {"tag": "create"}, + }, + ).build(prev_event_ids=[], auth_event_ids=[]) + ) + + bob_join = self.get_success( + event_factory.for_room_version( + RoomVersions.V6, + { + "type": EventTypes.Member, + "state_key": bob, + "sender": bob, + "room_id": room_id, + "content": {"tag": "bob_join"}, + }, + ).build(prev_event_ids=[], auth_event_ids=[create.event_id]) + ) + + power = self.get_success( + event_factory.for_room_version( + RoomVersions.V6, + { + "type": EventTypes.PowerLevels, + "state_key": "", + "sender": bob, + "room_id": room_id, + "content": {"tag": "power"}, + }, + ).build( + prev_event_ids=[], auth_event_ids=[create.event_id, bob_join.event_id], + ) + ) + + self.persist([create, bob_join, power]) + + # Now persist an invite and a couple of memberships out of order. + alice_invite = self.get_success( + event_factory.for_room_version( + RoomVersions.V6, + { + "type": EventTypes.Member, + "state_key": alice, + "sender": bob, + "room_id": room_id, + "content": {"tag": "alice_invite"}, + }, + ).build( + prev_event_ids=[], + auth_event_ids=[create.event_id, bob_join.event_id, power.event_id], + ) + ) + + alice_join = self.get_success( + event_factory.for_room_version( + RoomVersions.V6, + { + "type": EventTypes.Member, + "state_key": alice, + "sender": alice, + "room_id": room_id, + "content": {"tag": "alice_join"}, + }, + ).build( + prev_event_ids=[], + auth_event_ids=[create.event_id, alice_invite.event_id, power.event_id], + ) + ) + + alice_join2 = self.get_success( + event_factory.for_room_version( + RoomVersions.V6, + { + "type": EventTypes.Member, + "state_key": alice, + "sender": alice, + "room_id": room_id, + "content": {"tag": "alice_join2"}, + }, + ).build( + prev_event_ids=[], + auth_event_ids=[create.event_id, alice_join.event_id, power.event_id], + ) + ) + + self.persist([alice_join]) + self.persist([alice_join2]) + self.persist([alice_invite]) + + # The end result should be sane. + events = [create, bob_join, power, alice_invite, alice_join] + + chain_map, link_map = self.fetch_chains(events) + + expected_links = [ + (bob_join, create), + (power, create), + (power, bob_join), + (alice_invite, create), + (alice_invite, power), + (alice_invite, bob_join), + ] + + # Check that the expected links and only the expected links have been + # added. + self.assertEqual(len(expected_links), len(list(link_map.get_additions()))) + + for start, end in expected_links: + start_id, start_seq = chain_map[start.event_id] + end_id, end_seq = chain_map[end.event_id] + + self.assertIn( + (start_seq, end_seq), list(link_map.get_links_between(start_id, end_id)) + ) + + def persist( + self, events: List[EventBase], + ): + """Persist the given events and check that the links generated match + those given. + """ + + persist_events_store = self.hs.get_datastores().persist_events + + def _persist(txn): + # We need to persist the events to the events and state_events + # tables. + persist_events_store._store_event_txn(txn, [(e, {}) for e in events]) + + persist_events_store.db_pool.simple_insert_many_txn( + txn, + table="state_events", + values=[ + { + "event_id": event.event_id, + "room_id": event.room_id, + "type": event.type, + "state_key": event.state_key, + } + for event in events + ], + ) + + # Actually call the function that calculates the auth chain stuff. + persist_events_store._persist_event_auth_chain_txn(txn, events) + + self.get_success( + persist_events_store.db_pool.runInteraction("_persist", _persist,) + ) + + def fetch_chains( + self, events: List[EventBase] + ) -> Tuple[Dict[str, Tuple[int, int]], _LinkMap]: + + # Fetch the map from event ID -> (chain ID, sequence number) + rows = self.get_success( + self.store.db_pool.simple_select_many_batch( + table="event_auth_chains", + column="event_id", + iterable=[e.event_id for e in events], + retcols=("event_id", "chain_id", "sequence_number"), + keyvalues={}, + ) + ) + + chain_map = { + row["event_id"]: (row["chain_id"], row["sequence_number"]) for row in rows + } + + # Fetch all the links and pass them to the _LinkMap. + rows = self.get_success( + self.store.db_pool.simple_select_many_batch( + table="event_auth_chain_links", + column="origin_chain_id", + iterable=[chain_id for chain_id, _ in chain_map.values()], + retcols=( + "origin_chain_id", + "origin_sequence_number", + "target_chain_id", + "target_sequence_number", + ), + keyvalues={}, + ) + ) + + link_map = _LinkMap() + for row in rows: + added = link_map.add_link( + row["origin_chain_id"], + row["origin_sequence_number"], + row["target_chain_id"], + row["target_sequence_number"], + ) + + # We shouldn't have persisted any redundant links + self.assertTrue(added) + + return chain_map, link_map + + +class LinkMapTestCase(unittest.TestCase): + def test_simple(self): + """Basic tests for the LinkMap. + """ + link_map = _LinkMap() + + link_map.add_link(1, 1, 2, 1, new=False) + self.assertYieldsUnordered(link_map.get_links_between(1, 2), [(1, 1)]) + self.assertYieldsUnordered(link_map.get_links_from(1, 1), [(2, 1)]) + self.assertYieldsUnordered(link_map.get_additions(), []) + self.assertTrue(link_map.exists_path_from(1, 5, 2, 1)) + self.assertFalse(link_map.exists_path_from(1, 5, 2, 2)) + self.assertTrue(link_map.exists_path_from(1, 5, 1, 1)) + self.assertFalse(link_map.exists_path_from(1, 1, 1, 5)) + + # Attempting to add a redundant link is ignored. + link_map.add_link(1, 4, 2, 1) + self.assertYieldsUnordered(link_map.get_links_between(1, 2), [(1, 1)]) + + # Adding new non-redundant links works + link_map.add_link(1, 3, 2, 3) + self.assertYieldsUnordered(link_map.get_links_between(1, 2), [(1, 1), (3, 3)]) + + link_map.add_link(2, 5, 1, 3) + self.assertYieldsUnordered(link_map.get_links_between(2, 1), [(5, 3)]) + + self.assertYieldsUnordered( + link_map.get_additions(), [(1, 3, 2, 3), (2, 5, 1, 3)] + ) + + def assertYieldsUnordered(self, left, right): + """Test that the two iterables yield the same values, ignoring order. + """ + self.assertEqual(set(left), set(right)) From 6a74e215a6924628c6f30943d8e182de4df48768 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 7 Dec 2020 16:22:51 +0000 Subject: [PATCH 20/44] Fix unit tests on postgres --- tests/storage/test_event_chain.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/storage/test_event_chain.py b/tests/storage/test_event_chain.py index b714ff693d12..415fbef9370d 100644 --- a/tests/storage/test_event_chain.py +++ b/tests/storage/test_event_chain.py @@ -28,6 +28,7 @@ class EventChainStoreTestCase(HomeserverTestCase): def prepare(self, reactor, clock, hs): self.store = hs.get_datastore() + self._next_stream_ordering = 1 def test_simple(self): """Test that the example in `docs/auth_chain_difference_algorithm.md` @@ -377,6 +378,10 @@ def persist( persist_events_store = self.hs.get_datastores().persist_events + for e in events: + e.internal_metadata.stream_ordering = self._next_stream_ordering + self._next_stream_ordering += 1 + def _persist(txn): # We need to persist the events to the events and state_events # tables. From 654eff1e509768d7dffcded68d5b4c74e8ed9f21 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 7 Dec 2020 16:40:44 +0000 Subject: [PATCH 21/44] Add missing 'auth' --- docs/auth_chain_difference_algorithm.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/auth_chain_difference_algorithm.md b/docs/auth_chain_difference_algorithm.md index 79e16afb8bbe..590f2c934e4b 100644 --- a/docs/auth_chain_difference_algorithm.md +++ b/docs/auth_chain_difference_algorithm.md @@ -62,7 +62,7 @@ state sets consisting of: 1. `S1`: Alice's invite `(4,1)` and Bob's second join `(2,2)`; and 2. `S2`: Alice's second join `(4,3)` and Bob's first join `(2,1)`. -Using the index we see that the following chains are reachable from each: +Using the index we see that the following auth chains are reachable from each: 1. `S1`: `(1,1)`, `(2,2)`, `(3,1)` & `(4,1)` 2. `S2`: `(1,1)`, `(2,1)`, `(3,2)` & `(4,3)` From dbecefd89924975ea18757f7ed30f237af29af66 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 8 Dec 2020 10:43:42 +0000 Subject: [PATCH 22/44] Fixup typing for execute_values --- synapse/storage/database.py | 15 +++++++++------ .../storage/databases/main/event_federation.py | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 145dee524ffd..e5cd9c055db5 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -180,6 +180,9 @@ def __getattr__(self, name): _CallbackListEntry = Tuple["Callable[..., None]", Iterable[Any], Dict[str, Any]] +R = TypeVar("R") + + class LoggingTransaction: """An object that almost-transparently proxies for the 'txn' object passed to the constructor. Adds logging and metrics to the .execute() @@ -267,15 +270,18 @@ def execute_batch(self, sql: str, args: Iterable[Iterable[Any]]) -> None: for val in args: self.execute(sql, val) - def execute_values(self, sql: str, *args: Any, **kwargs) -> None: + def execute_values(self, sql: str, *args: Any) -> List[Tuple]: """Corresponds to psycopg2.extras.execute_values. Only available when using postgres. + + Always sets fetch=True when caling `execute_values`, so will return the + results. """ assert isinstance(self.database_engine, PostgresEngine) from psycopg2.extras import execute_values # type: ignore return self._do_execute( - lambda *x: execute_values(self.txn, *x, **kwargs), sql, *args + lambda *x: execute_values(self.txn, *x, fetch=True), sql, *args ) def execute(self, sql: str, *args: Any) -> None: @@ -288,7 +294,7 @@ def _make_sql_one_line(self, sql: str) -> str: "Strip newlines out of SQL so that the loggers in the DB are on one line" return " ".join(line.strip() for line in sql.splitlines() if line.strip()) - def _do_execute(self, func, sql: str, *args: Any) -> None: + def _do_execute(self, func: Callable[..., R], sql: str, *args: Any) -> R: sql = self._make_sql_one_line(sql) # TODO(paul): Maybe use 'info' and 'debug' for values? @@ -359,9 +365,6 @@ def interval(self, interval_duration_secs: float, limit: int = 3) -> str: return top_n_counters -R = TypeVar("R") - - class DatabasePool: """Wraps a single physical database and connection pool. diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 68c52a3bebcf..ef7009a4dc66 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -304,7 +304,7 @@ def _get_auth_chain_difference_using_chains_txn( for chain_id, (min_no, max_no) in chain_to_gap.items() ] - rows = txn.execute_values(sql, args, fetch=True) + rows = txn.execute_values(sql, args) result.update(r for r, in rows) else: # For SQLite we just fall back to doing a noddy for loop. From 123b4316d16b12d230ddce4cb9a5b861b5c9b3a7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 8 Dec 2020 10:45:24 +0000 Subject: [PATCH 23/44] Rename _get_auth_chain_difference_using_chains_txn and add comment --- synapse/storage/databases/main/event_federation.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index ef7009a4dc66..87c4d8e8360a 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -158,7 +158,7 @@ async def get_auth_chain_difference( if room["has_auth_chain_index"]: return await self.db_pool.runInteraction( "get_auth_chain_difference_chains", - self._get_auth_chain_difference_using_chains_txn, + self._get_auth_chain_difference_using_cover_index_txn, state_sets, ) else: @@ -168,7 +168,7 @@ async def get_auth_chain_difference( state_sets, ) - def _get_auth_chain_difference_using_chains_txn( + def _get_auth_chain_difference_using_cover_index_txn( self, txn, state_sets: List[Set[str]] ) -> Set[str]: """Calculates the auth chain difference using the chain index. @@ -321,6 +321,10 @@ def _get_auth_chain_difference_using_chains_txn( def _get_auth_chain_difference_txn( self, txn, state_sets: List[Set[str]] ) -> Set[str]: + """Calculates the auth chain difference using a breadth first search. + + This is used when we don't have a cover index for the room. + """ # Algorithm Description # ~~~~~~~~~~~~~~~~~~~~~ From 883e922019f4a43e7fb45c64470d4654acf28027 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 8 Dec 2020 14:40:16 +0000 Subject: [PATCH 24/44] Add some definitions --- docs/auth_chain_difference_algorithm.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/docs/auth_chain_difference_algorithm.md b/docs/auth_chain_difference_algorithm.md index 590f2c934e4b..759d1d3a00d2 100644 --- a/docs/auth_chain_difference_algorithm.md +++ b/docs/auth_chain_difference_algorithm.md @@ -3,7 +3,16 @@ The auth chain difference algorithm is used by V2 state resolution, where a naive implementation can be a significant source of CPU and DB usage. -The auth chain difference of a set of state sets is the union minus the +### Definitions + +A *state set* is a set of state events; e.g. the input of a state resolution +algorithm is a collection of state sets. + +The *auth chain* of a set of events are all the events' auth events and *their* +auth events, recursively (i.e. the events reachable by walking the graph induced +by an event's auth events links). + +The *auth chain difference* of a collection of state sets is the union minus the intersection of the sets of auth chains corresponding to the state sets, i.e an event is in the auth chain difference if it is reachable by walking the auth event graph from at least one of the state sets but not from *all* of the state From 988f25adce0d481cfaf8ad6bb3b51c447f0f6eeb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 8 Dec 2020 14:45:00 +0000 Subject: [PATCH 25/44] Fixup link confusion --- docs/auth_chain_difference_algorithm.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/auth_chain_difference_algorithm.md b/docs/auth_chain_difference_algorithm.md index 759d1d3a00d2..ba1a8a34bd62 100644 --- a/docs/auth_chain_difference_algorithm.md +++ b/docs/auth_chain_difference_algorithm.md @@ -23,13 +23,14 @@ sets. Synapse computes auth chain differences by pre-computing a "chain cover" index for the auth chain in a room, allowing efficient reachability queries like "is event A in the auth chain of event B". This is done by assigning every event a -*chain ID* and *sequence number* and having map of *links* such that A is -reachable by B (i.e. `A` is in the auth chain of `B`) if and only if either: +*chain ID* and *sequence number* (e.g. `(5,3)`), and having a map of *links* +between chains (e.g. `(5,3) -> (2,4)`) such that A is reachable by B (i.e. `A` +is in the auth chain of `B`) if and only if either: 1. A and B have the same chain ID and `A`'s sequence number is less than `B`'s sequence number; or 2. there is a link `L` between `B`'s chain ID and `A`'s chain ID such that - `L.seq_no` <= `B.seq_no` and `A.seq_no` <= `L.seq_no`. + `L.start_seq_no` <= `B.seq_no` and `A.seq_no` <= `L.end_seq_no`. There are actually two variants, one where we store links from each chain to every other reachable chain (the transitive closure of the links graph), and one From 08ec78ba77a5f3751fd88c4fad9f74b6b48128c6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 8 Dec 2020 14:51:22 +0000 Subject: [PATCH 26/44] Make para less dense (hopefully) --- docs/auth_chain_difference_algorithm.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/auth_chain_difference_algorithm.md b/docs/auth_chain_difference_algorithm.md index ba1a8a34bd62..cbd161cb259d 100644 --- a/docs/auth_chain_difference_algorithm.md +++ b/docs/auth_chain_difference_algorithm.md @@ -48,9 +48,10 @@ are those that would be remove in the second variant described above). ![Example](auth_chain_diff.dot.png) -Note that we don't add links between every event and its auth events, as that is -redundant (under both variants), e.g. all events point to the create event, but -each chain only needs the one link from it's base to the create event. +Note that we don't include all links between events and their auth events, as +most of those links would be redundant. For example, all events point to the +create event, but each chain only needs the one link from it's base to the +create event. ## Using the Index From 024c80285f03cdc00cf53751d49522035f8c7154 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 8 Dec 2020 14:56:28 +0000 Subject: [PATCH 27/44] Add note about auth chain --- docs/auth_chain_difference_algorithm.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/auth_chain_difference_algorithm.md b/docs/auth_chain_difference_algorithm.md index cbd161cb259d..67e815c551bd 100644 --- a/docs/auth_chain_difference_algorithm.md +++ b/docs/auth_chain_difference_algorithm.md @@ -65,6 +65,10 @@ by looking at the chain ID and sequence numbers reachable from each state set: numbers between the maximum sequence number reachable from *any* state set and the minimum reachable by *all* state sets (if any). +Note that steps 2 is effectively calculating the auth chain for each state set +(in terms of chain IDs and sequence numbers), and step 3 is calculating the +difference between the union and intersection of the auth chains. + ### Worked Examplee For example, if we take the above graph and try and get the difference between From 4cc769fa95ccd8b13e88eb3777151acd2b749caa Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 8 Dec 2020 14:58:55 +0000 Subject: [PATCH 28/44] Be explicit --- docs/auth_chain_difference_algorithm.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/auth_chain_difference_algorithm.md b/docs/auth_chain_difference_algorithm.md index 67e815c551bd..b6e56ab50aa7 100644 --- a/docs/auth_chain_difference_algorithm.md +++ b/docs/auth_chain_difference_algorithm.md @@ -69,7 +69,7 @@ Note that steps 2 is effectively calculating the auth chain for each state set (in terms of chain IDs and sequence numbers), and step 3 is calculating the difference between the union and intersection of the auth chains. -### Worked Examplee +### Worked Example For example, if we take the above graph and try and get the difference between state sets consisting of: @@ -77,7 +77,9 @@ state sets consisting of: 1. `S1`: Alice's invite `(4,1)` and Bob's second join `(2,2)`; and 2. `S2`: Alice's second join `(4,3)` and Bob's first join `(2,1)`. -Using the index we see that the following auth chains are reachable from each: +Using the index we see that the following auth chains are reachable from each +state set: + 1. `S1`: `(1,1)`, `(2,2)`, `(3,1)` & `(4,1)` 2. `S2`: `(1,1)`, `(2,1)`, `(3,2)` & `(4,3)` @@ -89,5 +91,5 @@ And so, for each the ranges that are in the auth chain difference: level). 4. Chain 4: The range `(1, 3]` (corresponding to both of Alice's joins). -So the final result is: Bob's second join, the second power level and both of -Alice's joins. +So the final result is: Bob's second join `(2,2)`, the second power level +`(3,2)` and both of Alice's joins `(4,2)` & `(4,3)`. From 7d75efbd6c9dcae67bd8184674c30a65a9000c3d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 8 Dec 2020 14:59:48 +0000 Subject: [PATCH 29/44] rm variant --- docs/auth_chain_difference_algorithm.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/auth_chain_difference_algorithm.md b/docs/auth_chain_difference_algorithm.md index b6e56ab50aa7..81006d7697fd 100644 --- a/docs/auth_chain_difference_algorithm.md +++ b/docs/auth_chain_difference_algorithm.md @@ -32,19 +32,19 @@ is in the auth chain of `B`) if and only if either: 2. there is a link `L` between `B`'s chain ID and `A`'s chain ID such that `L.start_seq_no` <= `B.seq_no` and `A.seq_no` <= `L.end_seq_no`. -There are actually two variants, one where we store links from each chain to -every other reachable chain (the transitive closure of the links graph), and one -where we remove redundant links (the transitive reduction of the links graph) -e.g. if we have chains `C3 -> C2 -> C1` then the link `C3 -> C1` would not be -stored. Synapse uses the former variant so that it doesn't need to recurse to -test reachability between chains. +There are actually two potential implementations, one where we store links from +each chain to every other reachable chain (the transitive closure of the links +graph), and one where we remove redundant links (the transitive reduction of the +links graph) e.g. if we have chains `C3 -> C2 -> C1` then the link `C3 -> C1` +would not be stored. Synapse uses the former implementations so that it doesn't +need to recurse to test reachability between chains. ### Example An example auth graph would look like the following, where chains have been formed based on type/state_key and are denoted by colour and are labelled with `(chain ID, sequence number)`. Links are denoted by the arrows (links in grey -are those that would be remove in the second variant described above). +are those that would be remove in the second implementation described above). ![Example](auth_chain_diff.dot.png) From 92b5e4b157e307c95f9e1e65030248ecb0f00bbb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 8 Dec 2020 15:17:42 +0000 Subject: [PATCH 30/44] Add note about current algo --- docs/auth_chain_difference_algorithm.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/auth_chain_difference_algorithm.md b/docs/auth_chain_difference_algorithm.md index 81006d7697fd..da748e5157bc 100644 --- a/docs/auth_chain_difference_algorithm.md +++ b/docs/auth_chain_difference_algorithm.md @@ -18,6 +18,19 @@ event is in the auth chain difference if it is reachable by walking the auth event graph from at least one of the state sets but not from *all* of the state sets. +## Breadth First Walk Algorithm + +A way of calculating the auth chain difference without calculating the full auth +chains for each state set is to do a parallel breadth first walk (ordered by +depth) of each state set's auth chain. By tracking which events are reachable +from each state set we can finish early if every pending event is reachable from +every state set. + +This can work well for state sets that have a small auth chain difference, but +can be very inefficient for larger differences. However, this algorithm is still +used if we don't have a chain cover index for the room (e.g. because we're in +the process of indexing it). + ## Chain Cover Index Synapse computes auth chain differences by pre-computing a "chain cover" index From a9552c2a6e18e84e50892fb9a29f717aeac01f90 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 8 Dec 2020 15:19:49 +0000 Subject: [PATCH 31/44] Update docs/auth_chain_difference_algorithm.md Co-authored-by: Patrick Cloke --- docs/auth_chain_difference_algorithm.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/auth_chain_difference_algorithm.md b/docs/auth_chain_difference_algorithm.md index da748e5157bc..30f72a70dae1 100644 --- a/docs/auth_chain_difference_algorithm.md +++ b/docs/auth_chain_difference_algorithm.md @@ -84,7 +84,7 @@ difference between the union and intersection of the auth chains. ### Worked Example -For example, if we take the above graph and try and get the difference between +For example, given the above graph, we can calculate the difference between state sets consisting of: 1. `S1`: Alice's invite `(4,1)` and Bob's second join `(2,2)`; and From 7cc6d7efbb237441ae1c1fdf736b388cdcf6f9f4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 8 Dec 2020 15:27:38 +0000 Subject: [PATCH 32/44] Fix up _LinkMap --- synapse/storage/databases/main/events.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 33c183f5020c..82cf1c682a32 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1886,12 +1886,17 @@ def _update_backward_extremeties(self, txn, events): ) -@attr.s +@attr.s(slots=True) class _LinkMap: """A helper type for tracking links between chains. """ + # Stores the set of links as nested maps: source chain ID -> target chain ID + # -> source sequence number -> target sequence number. maps = attr.ib(type=Dict[int, Dict[int, Dict[int, int]]], factory=dict) + + # Stores the links that have been added (with new set to true), as tuples of + # `(source chain ID, source sequence no, target chain ID, target sequence no.)` additions = attr.ib(type=Set[Tuple[int, int, int, int]], factory=set) def add_link( @@ -1900,18 +1905,18 @@ def add_link( src_seq: int, target_chain: int, target_seq: int, - new=True, + new: bool = True, ) -> bool: """Add a new link between two chains, ensuring no redundant links are added. New links should be added in topological order. Args: - src_chain, - src_seq, - target_chain, - target_seq, - new (bool): Whether this is a "new" link, i.e. should it be returned + src_chain: The chain ID of the source of the link, + src_seq: The sequence number of the source of the link, + target_chain: The chain ID of the target of the link, + target_seq: The sequence number of the target of the link, + new: Whether this is a "new" link, i.e. should it be returned by `get_additions`. Returns: @@ -1954,7 +1959,7 @@ def add_link( return True def get_links_from( - self, source_id, src_seq + self, source_id: int, src_seq: int, ) -> Generator[Tuple[int, int], None, None]: """Gets the chains reachable from the given chain/sequence number. From 5fa05f2dd703c14ce2b9d1aea508d6943817b003 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 8 Dec 2020 15:32:46 +0000 Subject: [PATCH 33/44] Fix up event_chain tests --- tests/storage/test_event_chain.py | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/tests/storage/test_event_chain.py b/tests/storage/test_event_chain.py index 415fbef9370d..fcd4b833e9f5 100644 --- a/tests/storage/test_event_chain.py +++ b/tests/storage/test_event_chain.py @@ -465,30 +465,24 @@ def test_simple(self): link_map = _LinkMap() link_map.add_link(1, 1, 2, 1, new=False) - self.assertYieldsUnordered(link_map.get_links_between(1, 2), [(1, 1)]) - self.assertYieldsUnordered(link_map.get_links_from(1, 1), [(2, 1)]) - self.assertYieldsUnordered(link_map.get_additions(), []) + self.assertCountEqual(link_map.get_links_between(1, 2), [(1, 1)]) + self.assertCountEqual(link_map.get_links_from(1, 1), [(2, 1)]) + self.assertCountEqual(link_map.get_additions(), []) self.assertTrue(link_map.exists_path_from(1, 5, 2, 1)) self.assertFalse(link_map.exists_path_from(1, 5, 2, 2)) self.assertTrue(link_map.exists_path_from(1, 5, 1, 1)) self.assertFalse(link_map.exists_path_from(1, 1, 1, 5)) # Attempting to add a redundant link is ignored. - link_map.add_link(1, 4, 2, 1) - self.assertYieldsUnordered(link_map.get_links_between(1, 2), [(1, 1)]) + self.assertFalse(link_map.add_link(1, 4, 2, 1)) + self.assertCountEqual(link_map.get_links_between(1, 2), [(1, 1)]) # Adding new non-redundant links works - link_map.add_link(1, 3, 2, 3) - self.assertYieldsUnordered(link_map.get_links_between(1, 2), [(1, 1), (3, 3)]) + self.assertTrue(link_map.add_link(1, 3, 2, 3)) + self.assertCountEqual(link_map.get_links_between(1, 2), [(1, 1), (3, 3)]) - link_map.add_link(2, 5, 1, 3) - self.assertYieldsUnordered(link_map.get_links_between(2, 1), [(5, 3)]) + self.assertTrue(link_map.add_link(2, 5, 1, 3)) + self.assertCountEqual(link_map.get_links_between(2, 1), [(5, 3)]) + self.assertCountEqual(link_map.get_links_between(1, 2), [(1, 1), (3, 3)]) - self.assertYieldsUnordered( - link_map.get_additions(), [(1, 3, 2, 3), (2, 5, 1, 3)] - ) - - def assertYieldsUnordered(self, left, right): - """Test that the two iterables yield the same values, ignoring order. - """ - self.assertEqual(set(left), set(right)) + self.assertCountEqual(link_map.get_additions(), [(1, 3, 2, 3), (2, 5, 1, 3)]) From e3d0be43638dbcb1411954ee2eb0d72eaec987d4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 9 Dec 2020 14:20:11 +0000 Subject: [PATCH 34/44] Make sorted_topologically stable and add tests --- synapse/util/iterutils.py | 21 ++++++++++++++---- tests/util/test_itertools.py | 41 +++++++++++++++++++++++++++++++++++- 2 files changed, 57 insertions(+), 5 deletions(-) diff --git a/synapse/util/iterutils.py b/synapse/util/iterutils.py index cb8d958f5f27..f7b4857a8464 100644 --- a/synapse/util/iterutils.py +++ b/synapse/util/iterutils.py @@ -13,8 +13,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import heapq from itertools import islice -from typing import Dict, Generator, Iterable, Iterator, Sequence, Set, Tuple, TypeVar +from typing import ( + Dict, + Generator, + Iterable, + Iterator, + Mapping, + Sequence, + Set, + Tuple, + TypeVar, +) from synapse.types import Collection @@ -51,7 +62,7 @@ def chunk_seq(iseq: ISeq, maxlen: int) -> Iterable[ISeq]: def sorted_topologically( - nodes: Iterable[T], graph: Dict[T, Collection[T]], + nodes: Iterable[T], graph: Mapping[T, Collection[T]], ) -> Generator[T, None, None]: """Given a set of nodes and a graph, yield the nodes in toplogical order. @@ -75,12 +86,14 @@ def sorted_topologically( reverse_graph.setdefault(node, set()) zero_degree = [node for node, degree in degree_map.items() if degree == 0] + heapq.heapify(zero_degree) + while zero_degree: - node = zero_degree.pop() + node = heapq.heappop(zero_degree) yield node for edge in reverse_graph[node]: if edge in degree_map: degree_map[edge] -= 1 if degree_map[edge] == 0: - zero_degree.append(edge) + heapq.heappush(zero_degree, edge) diff --git a/tests/util/test_itertools.py b/tests/util/test_itertools.py index 0ab0a914836a..1184cea5a33f 100644 --- a/tests/util/test_itertools.py +++ b/tests/util/test_itertools.py @@ -12,7 +12,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from synapse.util.iterutils import chunk_seq +from typing import Dict, List + +from synapse.util.iterutils import chunk_seq, sorted_topologically from tests.unittest import TestCase @@ -45,3 +47,40 @@ def test_empty_input(self): self.assertEqual( list(parts), [], ) + + +class SortTopologically(TestCase): + def test_empty(self): + "Test that an empty graph works correctly" + + graph = {} # type: Dict[int, List[int]] + self.assertEqual(list(sorted_topologically([], graph)), []) + + def test_disconnected(self): + "Test that a graph with no edges work" + + graph = {1: [], 2: []} # type: Dict[int, List[int]] + + # For disconnected nodes the output is simply sorted. + self.assertEqual(list(sorted_topologically([1, 2], graph)), [1, 2]) + + def test_linear(self): + "Test that a simple `4 -> 3 -> 2 -> 1` graph works" + + graph = {1: [], 2: [1], 3: [2], 4: [3]} # type: Dict[int, List[int]] + + self.assertEqual(list(sorted_topologically([4, 3, 2, 1], graph)), [1, 2, 3, 4]) + + def test_subset(self): + "Test that only sorting a subset of the graph works" + graph = {1: [], 2: [1], 3: [2], 4: [3]} # type: Dict[int, List[int]] + + self.assertEqual(list(sorted_topologically([4, 3], graph)), [3, 4]) + + def test_fork(self): + "Test that a forked graph works" + graph = {1: [], 2: [1], 3: [1], 4: [2, 3]} # type: Dict[int, List[int]] + + # Valid orderings are `[1, 3, 2, 4]` or `[1, 2, 3, 4]`, but we should + # always get the same one. + self.assertEqual(list(sorted_topologically([4, 3, 2, 1], graph)), [1, 2, 3, 4]) From cdb88c24ff60adab343576cc37f61f9303adca30 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 9 Dec 2020 15:03:59 +0000 Subject: [PATCH 35/44] Make _LinkMap use tuples --- synapse/storage/databases/main/events.py | 98 ++++++++++++++---------- tests/storage/test_event_chain.py | 28 ++++--- 2 files changed, 69 insertions(+), 57 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 82cf1c682a32..5156de9b5213 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -409,7 +409,11 @@ def _persist_events_txn( def _persist_event_auth_chain_txn( self, txn: LoggingTransaction, events: List[EventBase], - ): + ) -> None: + + # We only care about state events, so this if there are no state events. + if not any(e.is_state() for e in events): + return # We want to store event_auth mappings for rejected events, as they're # used in state res v2. @@ -439,7 +443,8 @@ def _persist_event_auth_chain_txn( # # See: docs/auth_chain_difference_algorithm.md - # We ignore rooms that we aren't filling the chain cover index for. + # We ignore legacy rooms that we aren't filling the chain cover index + # for. rows = self.db_pool.simple_select_many_txn( txn, table="rooms", @@ -448,14 +453,14 @@ def _persist_event_auth_chain_txn( keyvalues={}, retcols=("room_id", "has_auth_chain_index"), ) - room_to_is_using_chain_index = { - row["room_id"]: bool(row["has_auth_chain_index"]) for row in rows + rooms_using_chain_index = { + row["room_id"] for row in rows if row["has_auth_chain_index"] } state_events = { event.event_id: event for event in events - if event.is_state() and room_to_is_using_chain_index[event.room_id] + if event.is_state() and event.room_id in rooms_using_chain_index } if not state_events: @@ -478,9 +483,9 @@ def _persist_event_auth_chain_txn( # Set of event IDs to calculate chain ID/seq numbers for. events_to_calc_chain_id_for = set(state_events) - # We check if there are any events for the room that need to be handled. - # These should just be out of band memberships, where we didn't have the - # auth chain when we first persisted. + # We check if there are any events that need to be handled in the rooms + # we're looking at. These should just be out of band memberships, where + # we didn't have the auth chain when we first persisted. rows = self.db_pool.simple_select_many_txn( txn, table="event_auth_chain_to_calculate", @@ -494,6 +499,9 @@ def _persist_event_auth_chain_txn( event_type = row["type"] state_key = row["state_key"] + # (We could pull out the auth events for all rows at once using + # simple_select_many, but this case happens rarely and almost always + # with a single row.) auth_events = self.db_pool.simple_select_onecol_txn( txn, "event_auth", keyvalues={"event_id": event_id}, retcol="auth_id", ) @@ -570,7 +578,7 @@ def _persist_event_auth_chain_txn( ): events_to_calc_chain_id_for.discard(event_id) - # If this is an event we're trying to persist we added it to + # If this is an event we're trying to persist we add it to # the list of events to calculate chain IDs for next time # around. (Otherwise we will have already added it to the # table). @@ -586,6 +594,9 @@ def _persist_event_auth_chain_txn( "state_key": event.state_key, }, ) + + # We stop checking the event's auth events since we've + # discarded it. break if not events_to_calc_chain_id_for: @@ -600,8 +611,8 @@ def _persist_event_auth_chain_txn( # We need to do this in a topologically sorted order as we want to # generate chain IDs/sequence numbers of an event's auth events # before the event itself. - chains_ids_allocated = set() # type: Set[Tuple[int, int]] - new_chains = {} # type: Dict[str, Tuple[int, int]] + chains_tuples_allocated = set() # type: Set[Tuple[int, int]] + new_chain_tuples = {} # type: Dict[str, Tuple[int, int]] for event_id in sorted_topologically( events_to_calc_chain_id_for, event_to_auth_chain ): @@ -609,14 +620,15 @@ def _persist_event_auth_chain_txn( for auth_id in event_to_auth_chain[event_id]: if event_to_types.get(event_id) == event_to_types.get(auth_id): existing_chain_id = chain_map[auth_id] + break - new_chain_id = None + new_chain_tuple = None if existing_chain_id: # We found a chain ID/sequence number candidate, check its # not already taken. proposed_new_id = existing_chain_id[0] proposed_new_seq = existing_chain_id[1] + 1 - if (proposed_new_id, proposed_new_seq) not in chains_ids_allocated: + if (proposed_new_id, proposed_new_seq) not in chains_tuples_allocated: already_allocated = self.db_pool.simple_select_one_onecol_txn( txn, table="event_auth_chains", @@ -630,27 +642,27 @@ def _persist_event_auth_chain_txn( if already_allocated: # Mark it as already allocated so we don't need to hit # the DB again. - chains_ids_allocated.add((proposed_new_id, proposed_new_seq)) + chains_tuples_allocated.add((proposed_new_id, proposed_new_seq)) else: - new_chain_id = ( + new_chain_tuple = ( proposed_new_id, proposed_new_seq, ) - if not new_chain_id: - new_chain_id = (self._event_chain_id_gen.get_next_id_txn(txn), 1) + if not new_chain_tuple: + new_chain_tuple = (self._event_chain_id_gen.get_next_id_txn(txn), 1) - chains_ids_allocated.add(new_chain_id) + chains_tuples_allocated.add(new_chain_tuple) - chain_map[event_id] = new_chain_id - new_chains[event_id] = new_chain_id + chain_map[event_id] = new_chain_tuple + new_chain_tuples[event_id] = new_chain_tuple self.db_pool.simple_insert_many_txn( txn, table="event_auth_chains", values=[ {"event_id": event_id, "chain_id": c_id, "sequence_number": seq} - for event_id, (c_id, seq) in new_chains.items() + for event_id, (c_id, seq) in new_chain_tuples.items() ], ) @@ -659,7 +671,7 @@ def _persist_event_auth_chain_txn( table="event_auth_chain_to_calculate", keyvalues={}, column="event_id", - iterable=new_chains, + iterable=new_chain_tuples, ) # Now we need to calculate any new links between chains caused by @@ -684,7 +696,8 @@ def _persist_event_auth_chain_txn( # b. Add a link from the event to every chain reachable by the # auth event. - # Step 1, fetch all existing links + # Step 1, fetch all existing links from all the chains we've seen + # referenced. chain_links = _LinkMap() rows = self.db_pool.simple_select_many_txn( txn, @@ -701,10 +714,8 @@ def _persist_event_auth_chain_txn( ) for row in rows: chain_links.add_link( - row["origin_chain_id"], - row["origin_sequence_number"], - row["target_chain_id"], - row["target_sequence_number"], + (row["origin_chain_id"], row["origin_sequence_number"]), + (row["target_chain_id"], row["target_sequence_number"]), new=False, ) @@ -726,7 +737,7 @@ def _persist_event_auth_chain_txn( event_to_auth_chain[event_id], r=2, ): if chain_links.exists_path_from( - *chain_map[start_auth_id], *chain_map[end_auth_id] + chain_map[start_auth_id], chain_map[end_auth_id] ): reduction.discard(end_auth_id) @@ -737,19 +748,19 @@ def _persist_event_auth_chain_txn( # Step 2a, add link between the event and auth event chain_links.add_link( - chain_id, sequence_number, auth_chain_id, auth_sequence_number + (chain_id, sequence_number), (auth_chain_id, auth_sequence_number) ) # Step 2b, add a link to chains reachable from the auth # event. for target_id, target_seq in chain_links.get_links_from( - auth_chain_id, auth_sequence_number + (auth_chain_id, auth_sequence_number) ): if target_id == chain_id: continue chain_links.add_link( - chain_id, sequence_number, target_id, target_seq + (chain_id, sequence_number), (target_id, target_seq) ) self.db_pool.simple_insert_many_txn( @@ -1901,10 +1912,8 @@ class _LinkMap: def add_link( self, - src_chain: int, - src_seq: int, - target_chain: int, - target_seq: int, + src_tuple: Tuple[int, int], + target_tuple: Tuple[int, int], new: bool = True, ) -> bool: """Add a new link between two chains, ensuring no redundant links are added. @@ -1912,16 +1921,17 @@ def add_link( New links should be added in topological order. Args: - src_chain: The chain ID of the source of the link, - src_seq: The sequence number of the source of the link, - target_chain: The chain ID of the target of the link, - target_seq: The sequence number of the target of the link, + src_tuple: The chain ID/sequence number of the source of the link. + target_tuple: The chain ID/sequence number of the target of the link. new: Whether this is a "new" link, i.e. should it be returned by `get_additions`. Returns: True if a link was added, false if the given link was dropped as redundant """ + src_chain, src_seq = src_tuple + target_chain, target_seq = target_tuple + current_links = self.maps.setdefault(src_chain, {}).setdefault(target_chain, {}) assert src_chain != target_chain @@ -1959,14 +1969,15 @@ def add_link( return True def get_links_from( - self, source_id: int, src_seq: int, + self, src_tuple: Tuple[int, int] ) -> Generator[Tuple[int, int], None, None]: """Gets the chains reachable from the given chain/sequence number. Yields: The chain ID and sequence number the link points to. """ - for target_id, sequence_numbers in self.maps.get(source_id, {}).items(): + src_chain, src_seq = src_tuple + for target_id, sequence_numbers in self.maps.get(src_chain, {}).items(): for link_src_seq, target_seq in sequence_numbers.items(): if link_src_seq <= src_seq: yield target_id, target_seq @@ -1995,11 +2006,14 @@ def get_additions(self) -> Generator[Tuple[int, int, int, int], None, None]: yield (src_chain, src_seq, target_chain, target_seq) def exists_path_from( - self, src_chain: int, src_seq: int, target_chain: int, target_seq: int, + self, src_tuple: Tuple[int, int], target_tuple: Tuple[int, int], ) -> bool: """Checks if there is a path between the source chain ID/sequence and target chain ID/sequence. """ + src_chain, src_seq = src_tuple + target_chain, target_seq = target_tuple + if src_chain == target_chain: return target_seq <= src_seq diff --git a/tests/storage/test_event_chain.py b/tests/storage/test_event_chain.py index fcd4b833e9f5..52d7d34b1757 100644 --- a/tests/storage/test_event_chain.py +++ b/tests/storage/test_event_chain.py @@ -217,13 +217,13 @@ def test_simple(self): for event in events[1:]: self.assertTrue( link_map.exists_path_from( - *chain_map[event.event_id], *chain_map[create.event_id] + chain_map[event.event_id], chain_map[create.event_id] ), ) self.assertFalse( link_map.exists_path_from( - *chain_map[create.event_id], *chain_map[event.event_id], + chain_map[create.event_id], chain_map[event.event_id], ), ) @@ -446,10 +446,8 @@ def fetch_chains( link_map = _LinkMap() for row in rows: added = link_map.add_link( - row["origin_chain_id"], - row["origin_sequence_number"], - row["target_chain_id"], - row["target_sequence_number"], + (row["origin_chain_id"], row["origin_sequence_number"]), + (row["target_chain_id"], row["target_sequence_number"]), ) # We shouldn't have persisted any redundant links @@ -464,24 +462,24 @@ def test_simple(self): """ link_map = _LinkMap() - link_map.add_link(1, 1, 2, 1, new=False) + link_map.add_link((1, 1), (2, 1), new=False) self.assertCountEqual(link_map.get_links_between(1, 2), [(1, 1)]) - self.assertCountEqual(link_map.get_links_from(1, 1), [(2, 1)]) + self.assertCountEqual(link_map.get_links_from((1, 1)), [(2, 1)]) self.assertCountEqual(link_map.get_additions(), []) - self.assertTrue(link_map.exists_path_from(1, 5, 2, 1)) - self.assertFalse(link_map.exists_path_from(1, 5, 2, 2)) - self.assertTrue(link_map.exists_path_from(1, 5, 1, 1)) - self.assertFalse(link_map.exists_path_from(1, 1, 1, 5)) + self.assertTrue(link_map.exists_path_from((1, 5), (2, 1))) + self.assertFalse(link_map.exists_path_from((1, 5), (2, 2))) + self.assertTrue(link_map.exists_path_from((1, 5), (1, 1))) + self.assertFalse(link_map.exists_path_from((1, 1), (1, 5))) # Attempting to add a redundant link is ignored. - self.assertFalse(link_map.add_link(1, 4, 2, 1)) + self.assertFalse(link_map.add_link((1, 4), (2, 1))) self.assertCountEqual(link_map.get_links_between(1, 2), [(1, 1)]) # Adding new non-redundant links works - self.assertTrue(link_map.add_link(1, 3, 2, 3)) + self.assertTrue(link_map.add_link((1, 3), (2, 3))) self.assertCountEqual(link_map.get_links_between(1, 2), [(1, 1), (3, 3)]) - self.assertTrue(link_map.add_link(2, 5, 1, 3)) + self.assertTrue(link_map.add_link((2, 5), (1, 3))) self.assertCountEqual(link_map.get_links_between(2, 1), [(5, 3)]) self.assertCountEqual(link_map.get_links_between(1, 2), [(1, 1), (3, 3)]) From 0f91c864fd5aa748e33a055663f6aa00de9a525f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 9 Dec 2020 15:46:13 +0000 Subject: [PATCH 36/44] Review comments --- .../databases/main/event_federation.py | 31 +++++++++---------- .../schema/delta/58/24_event_auth_chains.sql | 3 +- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 87c4d8e8360a..633ab47c398a 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -189,7 +189,7 @@ def _get_auth_chain_difference_using_cover_index_txn( # All the chains that we've found that are reachable from the state # sets. - seen_chains = set() # type: Set[str] + seen_chains = set() # type: Set[int] sql = """ SELECT event_id, chain_id, sequence_number @@ -217,11 +217,9 @@ def _get_auth_chain_difference_using_cover_index_txn( for event_id in state_set: chain_id, seq_no = chain_info[event_id] - curr = chains.setdefault(chain_id, seq_no) - if curr < seq_no: - chains[chain_id] = seq_no + chains[chain_id] = max(seq_no, chains.get(chain_id, 0)) - # Now we lok up all links for the chains we have, adding chains to + # Now we look up all links for the chains we have, adding chains to # set_to_chain that are reachable from each set. sql = """ SELECT @@ -231,7 +229,9 @@ def _get_auth_chain_difference_using_cover_index_txn( WHERE %s """ - for batch in batch_iter(seen_chains, 1000): + # (We need to take a copy of `seen_chains` as we want to mutate it in + # the loop) + for batch in batch_iter(set(seen_chains), 1000): clause, args = make_in_list_sql_clause( txn.database_engine, "origin_chain_id", batch ) @@ -248,11 +248,10 @@ def _get_auth_chain_difference_using_cover_index_txn( # the link is less than the max sequence number in the # origin chain. if origin_sequence_number <= chains.get(origin_chain_id, 0): - curr = chains.setdefault( - target_chain_id, target_sequence_number + chains[target_sequence_number] = max( + target_sequence_number, + chains.get(target_sequence_number, 0), ) - if curr < target_sequence_number: - chains[target_chain_id] = target_sequence_number seen_chains.add(target_chain_id) @@ -262,15 +261,13 @@ def _get_auth_chain_difference_using_cover_index_txn( # difference. result = set() - chain_to_gap = {} + # Mapping from chain ID to the range of sequence numbers that should be + # pulled from the database. + chain_to_gap = {} # type: Dict[int, Tuple[int, int]] + for chain_id in seen_chains: min_seq_no = min(chains.get(chain_id, 0) for chains in set_to_chain) - - max_seq_no = 0 - for chains in set_to_chain: - s = chains.get(chain_id) - if s: - max_seq_no = max(max_seq_no, s) + max_seq_no = max(chains.get(chain_id, 0) for chains in set_to_chain) if min_seq_no < max_seq_no: # We have a non empty gap, try and fill it from the events that diff --git a/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql b/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql index 31e948acb992..729196cfd54f 100644 --- a/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql +++ b/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql @@ -36,7 +36,8 @@ CREATE TABLE event_auth_chain_links ( CREATE INDEX event_auth_chain_links_idx ON event_auth_chain_links (origin_chain_id, target_chain_id); --- Events that we have persisted but not calculated auth chains for. +-- Events that we have persisted but not calculated auth chains for, +-- e.g. out of band memberships (where we don't have the auth chain) CREATE TABLE event_auth_chain_to_calculate ( event_id TEXT PRIMARY KEY, room_id TEXT NOT NULL, From 888450a48ccc9170629f338810a41504659ccb5b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 9 Dec 2020 16:35:40 +0000 Subject: [PATCH 37/44] Fix typo --- synapse/storage/databases/main/event_federation.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 633ab47c398a..f3fa8d3d5f56 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -248,9 +248,8 @@ def _get_auth_chain_difference_using_cover_index_txn( # the link is less than the max sequence number in the # origin chain. if origin_sequence_number <= chains.get(origin_chain_id, 0): - chains[target_sequence_number] = max( - target_sequence_number, - chains.get(target_sequence_number, 0), + chains[target_chain_id] = max( + target_sequence_number, chains.get(target_chain_id, 0), ) seen_chains.add(target_chain_id) From c9422b6d365fdd2b13286a086ecba4f347ca569d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 5 Jan 2021 15:22:52 +0000 Subject: [PATCH 38/44] Handle rooms the server used to be in correctly. We may not have an entry in the rooms table for old rooms that the server is no longer in. If so then we should set the `has_auth_chain_index` to false. --- synapse/storage/databases/main/room.py | 28 ++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index c7812dc7e8ec..d88a93470d14 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1179,6 +1179,18 @@ async def upsert_room_on_join(self, room_id: str, room_version: RoomVersion): Called when we join a room over federation, and overwrites any room version currently in the table. """ + # It's possible that we already have events for the room in our DB + # without a corresponding room entry. If we do then we don't want to + # mark the room as having an auth chain cover index. + max_ordering = await self.db_pool.simple_select_one_onecol( + table="events", + keyvalues={"room_id": room_id}, + retcol="MAX(stream_ordering)", + allow_none=True, + desc="upsert_room_on_join", + ) + has_auth_chain_index = max_ordering is None + await self.db_pool.simple_upsert( desc="upsert_room_on_join", table="rooms", @@ -1187,7 +1199,7 @@ async def upsert_room_on_join(self, room_id: str, room_version: RoomVersion): insertion_values={ "is_public": False, "creator": "", - "has_auth_chain_index": True, + "has_auth_chain_index": has_auth_chain_index, }, # rooms has a unique constraint on room_id, so no need to lock when doing an # emulated upsert. @@ -1252,6 +1264,18 @@ async def maybe_store_room_on_outlier_membership( When we receive an invite or any other event over federation that may relate to a room we are not in, store the version of the room if we don't already know the room version. """ + # It's possible that we already have events for the room in our DB + # without a corresponding room entry. If we do then we don't want to + # mark the room as having an auth chain cover index. + max_ordering = await self.db_pool.simple_select_one_onecol( + table="events", + keyvalues={"room_id": room_id}, + retcol="MAX(stream_ordering)", + allow_none=True, + desc="maybe_store_room_on_outlier_membership", + ) + has_auth_chain_index = max_ordering is None + await self.db_pool.simple_upsert( desc="maybe_store_room_on_outlier_membership", table="rooms", @@ -1261,7 +1285,7 @@ async def maybe_store_room_on_outlier_membership( "room_version": room_version.identifier, "is_public": False, "creator": "", - "has_auth_chain_index": True, + "has_auth_chain_index": has_auth_chain_index, }, # rooms has a unique constraint on room_id, so no need to lock when doing an # emulated upsert. From c8758afba59e54842f98f60fe1777ae93c6ef90c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 6 Jan 2021 11:42:04 +0000 Subject: [PATCH 39/44] Handle case where we don't have chain info for an event --- .../databases/main/event_federation.py | 48 ++++-- tests/storage/test_event_federation.py | 163 ++++++++++++++++++ 2 files changed, 199 insertions(+), 12 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index f3fa8d3d5f56..8326640d2064 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -25,6 +25,7 @@ from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.databases.main.signatures import SignatureWorkerStore from synapse.storage.engines import PostgresEngine +from synapse.storage.types import Cursor from synapse.types import Collection from synapse.util.caches.descriptors import cached from synapse.util.caches.lrucache import LruCache @@ -33,6 +34,11 @@ logger = logging.getLogger(__name__) +class _NoChainCoverIndex(Exception): + def __init__(self, room_id: str): + super().__init__("Unexpectedly no chain cover for events in %s" % (room_id,)) + + class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBaseStore): def __init__(self, database: DatabasePool, db_conn, hs): super().__init__(database, db_conn, hs) @@ -156,20 +162,26 @@ async def get_auth_chain_difference( # algorithm. room = await self.get_room(room_id) if room["has_auth_chain_index"]: - return await self.db_pool.runInteraction( - "get_auth_chain_difference_chains", - self._get_auth_chain_difference_using_cover_index_txn, - state_sets, - ) - else: - return await self.db_pool.runInteraction( - "get_auth_chain_difference", - self._get_auth_chain_difference_txn, - state_sets, - ) + try: + return await self.db_pool.runInteraction( + "get_auth_chain_difference_chains", + self._get_auth_chain_difference_using_cover_index_txn, + room_id, + state_sets, + ) + except _NoChainCoverIndex: + # For whatever reason we don't actually have a chain cover index + # for the events in question, so we fall back to the old method. + pass + + return await self.db_pool.runInteraction( + "get_auth_chain_difference", + self._get_auth_chain_difference_txn, + state_sets, + ) def _get_auth_chain_difference_using_cover_index_txn( - self, txn, state_sets: List[Set[str]] + self, txn: Cursor, room_id: str, state_sets: List[Set[str]] ) -> Set[str]: """Calculates the auth chain difference using the chain index. @@ -207,6 +219,18 @@ def _get_auth_chain_difference_using_cover_index_txn( seen_chains.add(chain_id) chain_to_event.setdefault(chain_id, {})[sequence_number] = event_id + # Check that we actually have a chain ID for all the events. + events_missing_chain_info = initial_events.difference(chain_info) + if events_missing_chain_info: + # This can happen due to e.g. downgrade/upgrade of the server. We + # raise an exception and fall back to the previous algorithm. + logger.info( + "Unexpectedly found that events don't have chain IDs in room %s: %s", + room_id, + events_missing_chain_info, + ) + raise _NoChainCoverIndex(room_id) + # Corresponds to `state_sets`, except as a map from chain ID to max # sequence number reachable from the state set. set_to_chain = [] # type: List[Dict[int, int]] diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index 3adf405edb27..b3d736b641a8 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -259,6 +259,169 @@ def insert_event(txn): ) self.assertSetEqual(difference, set()) + def test_auth_difference_partial_cover(self): + """Test that we correctly handle rooms where not all events have a chain + cover calculated. This can happen due to a downgrade/upgrade. + """ + + room_id = "@ROOM:local" + + # The silly auth graph we use to test the auth difference algorithm, + # where the top are the most recent events. + # + # A B + # \ / + # D E + # \ | + # ` F C + # | /| + # G ยด | + # | \ | + # H I + # | | + # K J + + auth_graph = { + "a": ["e"], + "b": ["e"], + "c": ["g", "i"], + "d": ["f"], + "e": ["f"], + "f": ["g"], + "g": ["h", "i"], + "h": ["k"], + "i": ["j"], + "k": [], + "j": [], + } + + depth_map = { + "a": 7, + "b": 7, + "c": 4, + "d": 6, + "e": 6, + "f": 5, + "g": 3, + "h": 2, + "i": 2, + "k": 1, + "j": 1, + } + + # We rudely fiddle with the appropriate tables directly, as that's much + # easier than constructing events properly. + + def insert_event(txn): + # First insert the room and mark it has having a chain cover. + self.store.db_pool.simple_insert_txn( + txn, + "rooms", + { + "room_id": room_id, + "creator": "room_creator_user_id", + "is_public": True, + "room_version": "6", + "has_auth_chain_index": True, + }, + ) + + stream_ordering = 0 + + for event_id in auth_graph: + stream_ordering += 1 + depth = depth_map[event_id] + + self.store.db_pool.simple_insert_txn( + txn, + table="events", + values={ + "event_id": event_id, + "room_id": room_id, + "depth": depth, + "topological_ordering": depth, + "type": "m.test", + "processed": True, + "outlier": False, + "stream_ordering": stream_ordering, + }, + ) + + # Insert all events apart from 'B' + self.hs.datastores.persist_events._persist_event_auth_chain_txn( + txn, + [ + FakeEvent(event_id, room_id, auth_graph[event_id]) + for event_id in auth_graph + if event_id != "b" + ], + ) + + # Now we insert the event 'B' without a chain cover, by temporarily + # pretending the room doesn't have a chain cover. + + self.store.db_pool.simple_update_txn( + txn, + table="rooms", + keyvalues={"room_id": room_id}, + updatevalues={"has_auth_chain_index": False}, + ) + + self.hs.datastores.persist_events._persist_event_auth_chain_txn( + txn, [FakeEvent("b", room_id, auth_graph["b"])], + ) + + self.store.db_pool.simple_update_txn( + txn, + table="rooms", + keyvalues={"room_id": room_id}, + updatevalues={"has_auth_chain_index": True}, + ) + + self.get_success(self.store.db_pool.runInteraction("insert", insert_event,)) + + # Now actually test that various combinations give the right result: + + difference = self.get_success( + self.store.get_auth_chain_difference(room_id, [{"a"}, {"b"}]) + ) + self.assertSetEqual(difference, {"a", "b"}) + + difference = self.get_success( + self.store.get_auth_chain_difference(room_id, [{"a"}, {"b"}, {"c"}]) + ) + self.assertSetEqual(difference, {"a", "b", "c", "e", "f"}) + + difference = self.get_success( + self.store.get_auth_chain_difference(room_id, [{"a", "c"}, {"b"}]) + ) + self.assertSetEqual(difference, {"a", "b", "c"}) + + difference = self.get_success( + self.store.get_auth_chain_difference(room_id, [{"a", "c"}, {"b", "c"}]) + ) + self.assertSetEqual(difference, {"a", "b"}) + + difference = self.get_success( + self.store.get_auth_chain_difference(room_id, [{"a"}, {"b"}, {"d"}]) + ) + self.assertSetEqual(difference, {"a", "b", "d", "e"}) + + difference = self.get_success( + self.store.get_auth_chain_difference(room_id, [{"a"}, {"b"}, {"c"}, {"d"}]) + ) + self.assertSetEqual(difference, {"a", "b", "c", "d", "e", "f"}) + + difference = self.get_success( + self.store.get_auth_chain_difference(room_id, [{"a"}, {"b"}, {"e"}]) + ) + self.assertSetEqual(difference, {"a", "b"}) + + difference = self.get_success( + self.store.get_auth_chain_difference(room_id, [{"a"}]) + ) + self.assertSetEqual(difference, set()) + @attr.s class FakeEvent: From d64f5f856e897f7f5ab0604fa63fc8d7e1311f72 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 11 Jan 2021 15:00:24 +0000 Subject: [PATCH 40/44] Typo Co-authored-by: Patrick Cloke --- tests/storage/test_event_federation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index b3d736b641a8..cdf3222c0105 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -313,7 +313,7 @@ def test_auth_difference_partial_cover(self): # easier than constructing events properly. def insert_event(txn): - # First insert the room and mark it has having a chain cover. + # First insert the room and mark it as having a chain cover. self.store.db_pool.simple_insert_txn( txn, "rooms", From 6071effb477d5f076021fba902fd756f3abff2d0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 11 Jan 2021 15:10:31 +0000 Subject: [PATCH 41/44] Split out a has_auth_chain_index --- synapse/storage/databases/main/room.py | 49 +++++++++++++++++--------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index fe8c7995d706..284f2ce77c29 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1166,6 +1166,37 @@ def set_room_is_public(self, room_id, is_public): # It's overridden by RoomStore for the synapse master. raise NotImplementedError() + async def has_auth_chain_index(self, room_id: str) -> bool: + """Check if the room has (or can have) a chain cover index. + + Defaults to True if we don't have an entry in `rooms` table nor any + events for the room. + """ + + has_auth_chain_index = await self.db_pool.simple_select_one_onecol( + table="rooms", + keyvalues={"room_id": room_id}, + retcol="has_auth_chain_index", + desc="has_auth_chain_index", + allow_none=True, + ) + + if has_auth_chain_index: + return True + + # It's possible that we already have events for the room in our DB + # without a corresponding room entry. If we do then we don't want to + # mark the room as having an auth chain cover index. + max_ordering = await self.db_pool.simple_select_one_onecol( + table="events", + keyvalues={"room_id": room_id}, + retcol="MAX(stream_ordering)", + allow_none=True, + desc="upsert_room_on_join", + ) + + return max_ordering is None + class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore, SearchStore): def __init__(self, database: DatabasePool, db_conn, hs): @@ -1182,14 +1213,7 @@ async def upsert_room_on_join(self, room_id: str, room_version: RoomVersion): # It's possible that we already have events for the room in our DB # without a corresponding room entry. If we do then we don't want to # mark the room as having an auth chain cover index. - max_ordering = await self.db_pool.simple_select_one_onecol( - table="events", - keyvalues={"room_id": room_id}, - retcol="MAX(stream_ordering)", - allow_none=True, - desc="upsert_room_on_join", - ) - has_auth_chain_index = max_ordering is None + has_auth_chain_index = await self.has_auth_chain_index(room_id) await self.db_pool.simple_upsert( desc="upsert_room_on_join", @@ -1267,14 +1291,7 @@ async def maybe_store_room_on_outlier_membership( # It's possible that we already have events for the room in our DB # without a corresponding room entry. If we do then we don't want to # mark the room as having an auth chain cover index. - max_ordering = await self.db_pool.simple_select_one_onecol( - table="events", - keyvalues={"room_id": room_id}, - retcol="MAX(stream_ordering)", - allow_none=True, - desc="maybe_store_room_on_outlier_membership", - ) - has_auth_chain_index = max_ordering is None + has_auth_chain_index = await self.has_auth_chain_index(room_id) await self.db_pool.simple_upsert( desc="maybe_store_room_on_outlier_membership", From 368d3b84795fcca256c374cc8220d8942dc69c6b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 11 Jan 2021 15:13:02 +0000 Subject: [PATCH 42/44] Update docstring --- tests/storage/test_event_federation.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index cdf3222c0105..9d04a066d838 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -261,7 +261,9 @@ def insert_event(txn): def test_auth_difference_partial_cover(self): """Test that we correctly handle rooms where not all events have a chain - cover calculated. This can happen due to a downgrade/upgrade. + cover calculated. This can happen in some obscure edge cases, including + during the background update that calculates the chain cover for old + rooms. """ room_id = "@ROOM:local" From bea2c47aaa7a9ee18bfe553a78b089192b4583c4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 11 Jan 2021 15:14:22 +0000 Subject: [PATCH 43/44] Move to schema 59 --- .../{58/24_event_auth_chains.sql => 59/04_event_auth_chains.sql} | 0 .../04_event_auth_chains.sql.postgres} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename synapse/storage/databases/main/schema/delta/{58/24_event_auth_chains.sql => 59/04_event_auth_chains.sql} (100%) rename synapse/storage/databases/main/schema/delta/{58/24_event_auth_chains.sql.postgres => 59/04_event_auth_chains.sql.postgres} (100%) diff --git a/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql b/synapse/storage/databases/main/schema/delta/59/04_event_auth_chains.sql similarity index 100% rename from synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql rename to synapse/storage/databases/main/schema/delta/59/04_event_auth_chains.sql diff --git a/synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql.postgres b/synapse/storage/databases/main/schema/delta/59/04_event_auth_chains.sql.postgres similarity index 100% rename from synapse/storage/databases/main/schema/delta/58/24_event_auth_chains.sql.postgres rename to synapse/storage/databases/main/schema/delta/59/04_event_auth_chains.sql.postgres From 8c1e32c9af9684bc79077f981aa921d92ec06933 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 6 Jan 2021 17:00:19 +0000 Subject: [PATCH 44/44] Fix tests after merge from develop --- tests/storage/test_event_chain.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/tests/storage/test_event_chain.py b/tests/storage/test_event_chain.py index 52d7d34b1757..83c377824b1f 100644 --- a/tests/storage/test_event_chain.py +++ b/tests/storage/test_event_chain.py @@ -387,20 +387,6 @@ def _persist(txn): # tables. persist_events_store._store_event_txn(txn, [(e, {}) for e in events]) - persist_events_store.db_pool.simple_insert_many_txn( - txn, - table="state_events", - values=[ - { - "event_id": event.event_id, - "room_id": event.room_id, - "type": event.type, - "state_key": event.state_key, - } - for event in events - ], - ) - # Actually call the function that calculates the auth chain stuff. persist_events_store._persist_event_auth_chain_txn(txn, events)