Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Make sqlite database migrations transactional again, part two #14926

Merged
merged 2 commits into from
Jan 31, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/14926.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a regression introduced in Synapse 1.69.0 which can result in database corruption when database migrations are interrupted on sqlite.
5 changes: 3 additions & 2 deletions synapse/storage/engines/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,9 @@ def executescript(cursor: CursorType, script: str) -> None:

This is not provided by DBAPI2, and so needs engine-specific support.

Some database engines may automatically COMMIT the ongoing transaction both
before and after executing the script.
Some database engines may automatically COMMIT the ongoing transaction before
executing the script in its own transaction. The script transaction is left
open and it is the responsibility of the caller to commit it.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some database engines may automatically COMMIT the ongoing transaction

I wonder if we should make this a hard guarantee of this function, so that we have the same semantics across postgres and sqlite.

For example, suppose we need to apply two migrations A and B to upgrade the database schema. IIUC, under postgres, synapse will run

BEGIN TRANSACTION;
...
<A>;
<B>;
COMMIT;

whereas under sqlite we will now run

BEGIN TRANSACTION;
...
COMMIT; BEGIN TRANSACTION;
<A>;
COMMIT; BEGIN TRANSACTION;
<B>;
COMMIT;

I'm not sure if that's a difference we should worry about. WDYT?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm in favour of the idea. It's much easier to reason about things when both database engines behave the same way.

I gave that a go in bb340f7. Let's hope it passes CI.

"""
...

Expand Down
6 changes: 4 additions & 2 deletions synapse/storage/engines/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,14 +135,16 @@ def executescript(cursor: sqlite3.Cursor, script: str) -> None:
> than one statement with it, it will raise a Warning. Use executescript() if
> you want to execute multiple SQL statements with one call.

The script is wrapped in transaction control statemnets, since the docs for
The script is prefixed with a `BEGIN TRANSACTION`, since the docs for
`executescript` warn:

> If there is a pending transaction, an implicit COMMIT statement is executed
> first. No other implicit transaction control is performed; any transaction
> control must be added to sql_script.
"""
cursor.executescript(f"BEGIN TRANSACTION;\n{script}\nCOMMIT;")
# The implementation of `executescript` can be found at
# https://github.com/python/cpython/blob/3.11/Modules/_sqlite/cursor.c#L1035.
cursor.executescript(f"BEGIN TRANSACTION; {script}")


# Following functions taken from: https://github.com/coleifer/peewee
Expand Down
96 changes: 96 additions & 0 deletions tests/storage/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from synapse.server import HomeServer
from synapse.storage.database import (
DatabasePool,
LoggingDatabaseConnection,
LoggingTransaction,
make_tuple_comparison_clause,
)
Expand All @@ -37,6 +38,101 @@ def test_native_tuple_comparison(self) -> None:
self.assertEqual(args, [1, 2])


class ExecuteScriptTestCase(unittest.HomeserverTestCase):
"""Tests for `BaseDatabaseEngine.executescript` implementations."""
DMRobertson marked this conversation as resolved.
Show resolved Hide resolved

def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
self.store = hs.get_datastores().main
self.db_pool: DatabasePool = self.store.db_pool
self.get_success(
self.db_pool.runInteraction(
"create",
lambda txn: txn.execute("CREATE TABLE foo (name TEXT PRIMARY KEY)"),
)
)

def test_transaction(self) -> None:
"""Test that all statements are run in a single transaction."""

def run(conn: LoggingDatabaseConnection) -> None:
cur = conn.cursor(txn_name="test_transaction")
self.db_pool.engine.executescript(
cur,
";".join(
[
"INSERT INTO foo (name) VALUES ('transaction test')",
# This next statement will fail. When `executescript` is not
# transactional, the previous row will be observed later.
"INSERT INTO foo (name) VALUES ('transaction test')",
]
),
)

self.get_failure(
self.db_pool.runWithConnection(run),
self.db_pool.engine.module.IntegrityError,
)

self.assertIsNone(
self.get_success(
self.db_pool.simple_select_one_onecol(
"foo",
keyvalues={"name": "transaction test"},
retcol="name",
allow_none=True,
)
),
"executescript is not running statements inside a transaction",
)

def test_commit(self) -> None:
"""Test that the script transaction remains open and can be committed."""

def run(conn: LoggingDatabaseConnection) -> None:
cur = conn.cursor(txn_name="test_commit")
self.db_pool.engine.executescript(
cur, "INSERT INTO foo (name) VALUES ('commit test')"
)
cur.execute("COMMIT")

self.get_success(self.db_pool.runWithConnection(run))

self.assertIsNotNone(
self.get_success(
self.db_pool.simple_select_one_onecol(
"foo",
keyvalues={"name": "commit test"},
retcol="name",
allow_none=True,
)
),
)

def test_rollback(self) -> None:
"""Test that the script transaction remains open and can be rolled back."""

def run(conn: LoggingDatabaseConnection) -> None:
cur = conn.cursor(txn_name="test_rollback")
self.db_pool.engine.executescript(
cur, "INSERT INTO foo (name) VALUES ('rollback test')"
)
cur.execute("ROLLBACK")

self.get_success(self.db_pool.runWithConnection(run))

self.assertIsNone(
self.get_success(
self.db_pool.simple_select_one_onecol(
"foo",
keyvalues={"name": "rollback test"},
retcol="name",
allow_none=True,
)
),
"executescript is not leaving the script transaction open",
)


class CallbacksTestCase(unittest.HomeserverTestCase):
"""Tests for transaction callbacks."""

Expand Down