From 8d486b379e37d2823903ac3c1b56df550bce3e43 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 27 Sep 2022 17:31:49 +0100 Subject: [PATCH 1/3] Update UPSERT comment now that native upserts are the default --- synapse/storage/database.py | 51 +++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 6cc88aad3238..2993d1446f12 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -1141,17 +1141,48 @@ async def simple_upsert( desc: str = "simple_upsert", lock: bool = True, ) -> bool: - """ + """Insert a row with values + insertion_values; on conflict, update with values. + + All of our supported databases accept the nonstandard "upsert" statement in + their dialect of SQL. We call this a "native upsert". The syntax looks roughly + like: + + INSERT INTO table VALUES (values + insertion_values) + ON CONFLICT (keyvalues) + DO UPDATE SET (values); -- overwrite `values` columns only + + If (values) is empty, the resulting query is slighlty simpler: + + INSERT INTO table VALUES (insertion_values) + ON CONFLICT (keyvalues) + DO NOTHING; -- do not overwrite any columns + + This function is a helper to build such queries. + + In order for upserts to make sense, the database must be able to determine when + an upsert CONFLICTs with an existing row. Postgres and SQLite ensure this by + requiring that a unique index exist on the column names used to detect a + conflict (i.e. `keyvalues.keys()`). + + If there is no such index, we can "emulate" an upsert with a SELECT followed + by either an INSERT or an UPDATE. This is unsafe: we cannot make the same + atomicity guarantees that a native upsert can and are very vulnerable to races + and crashes. Therefore if we wish to upsert without an appropriate unique index, + we must either: + + 1. Acquire a table-level lock before the emulated upsert (`lock=True`), or + 2. VERY CAREFULLY ensure that we are the only thread and worker which will be + writing to this table, in which case we can proceed without a lock + (`lock=False`). + + Generally speaking, you should use `lock=True`. If the table in question has a + unique index[*], this class will use a native upsert (which is atomic and so can + ignore the `lock` argument). Otherwise this class will use an emulated upsert, + in which case we want the safer option unless we been VERY CAREFUL. - `lock` should generally be set to True (the default), but can be set - to False if either of the following are true: - 1. there is a UNIQUE INDEX on the key columns. In this case a conflict - will cause an IntegrityError in which case this function will retry - the update. - 2. we somehow know that we are the only thread which will be updating - this table. - As an additional note, this parameter only matters for old SQLite versions - because we will use native upserts otherwise. + [*]: This class is aware that some tables have unique indices added as + background updates. It checks at runtime to see if those updates are + pending: if not, they are deemed safe for use with native upserts. Args: table: The table to upsert into From 089ace2c156de31f7eb2cb14e6c50beabac1138c Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 27 Sep 2022 17:34:49 +0100 Subject: [PATCH 2/3] Changelog --- changelog.d/13924.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/13924.misc diff --git a/changelog.d/13924.misc b/changelog.d/13924.misc new file mode 100644 index 000000000000..7770b6f03f2b --- /dev/null +++ b/changelog.d/13924.misc @@ -0,0 +1 @@ +Update an innaccurate comment in Synapse's upsert database helper. From c46f10340a36f2e3255c576c6e0679cc075cb29d Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 29 Sep 2022 18:46:30 +0100 Subject: [PATCH 3/3] Reword comment --- synapse/storage/database.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 2993d1446f12..bb28ded1b5a1 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -1180,9 +1180,18 @@ async def simple_upsert( ignore the `lock` argument). Otherwise this class will use an emulated upsert, in which case we want the safer option unless we been VERY CAREFUL. - [*]: This class is aware that some tables have unique indices added as - background updates. It checks at runtime to see if those updates are - pending: if not, they are deemed safe for use with native upserts. + [*]: Some tables have unique indices added to them in the background. Those + tables `T` are keys in the dictionary UNIQUE_INDEX_BACKGROUND_UPDATES, + where `T` maps to the background update that adds a unique index to `T`. + This dictionary is maintained by hand. + + At runtime, we constantly check to see if each of these background updates + has run. If so, we deem the coresponding table safe to upsert into, because + we can now use a native insert to do so. If not, we deem the table unsafe + to upsert into and require an emulated upsert. + + Tables that do not appear in this dictionary are assumed to have an + appropriate unique index and therefore be safe to upsert into. Args: table: The table to upsert into