From 0a18cb0bbc80f28555d85faf49b86b5c97ea51b7 Mon Sep 17 00:00:00 2001 From: Kunal Mehta Date: Wed, 12 Jan 2022 12:34:55 -0800 Subject: [PATCH] Use special "deleted" journalist for associations with deleted users Currently when a journalist is deleted, most referential tables are updated with `journalist_id=NULL`, forcing all clients to accomodate that case. Instead, we are now going to either re-associate those rows with a special "deleted" journalist account, or delete them outright. All journalist_id columns are now NOT NULL to enforce this. Tables with rows migrated to "deleted": * replies * seen_files * seen_messages * seen_replies Tables with rows that are deleted outright: * journalist_login_attempt * revoked_tokens The "deleted" journalist account is a real account that exists in the database, but cannot be logged into and has no passphase set. It is not possible to delete it nor is it shown in the admin listing of journalists. It is lazily created on demand using a special DeletedJournalist subclass that bypasses username and passphrase validation. Journalist objects must now be deleted by calling the new delete() function on them. Trying to directly `db.session.delete(journalist)` will most likely fail with an Exception because of rows that weren't migrated first. The migration step looks for any existing rows in those tables with journalist_id=NULL and either migrates them to "deleted" or deletes them. Then all the columns are changed to be NOT NULL. Fixes #6192. --- ...c7536e8_make_journalist_id_non_nullable.py | 128 ++++++++++++++++++ securedrop/journalist_app/admin.py | 10 +- securedrop/loaddata.py | 2 +- securedrop/models.py | 84 +++++++++--- .../migrations/migration_2e24fc7536e8.py | 106 +++++++++++++++ securedrop/tests/test_journalist.py | 49 +++++++ securedrop/tests/utils/db_helper.py | 2 +- 7 files changed, 360 insertions(+), 21 deletions(-) create mode 100644 securedrop/alembic/versions/2e24fc7536e8_make_journalist_id_non_nullable.py create mode 100644 securedrop/tests/migrations/migration_2e24fc7536e8.py diff --git a/securedrop/alembic/versions/2e24fc7536e8_make_journalist_id_non_nullable.py b/securedrop/alembic/versions/2e24fc7536e8_make_journalist_id_non_nullable.py new file mode 100644 index 00000000000..217cd5676f5 --- /dev/null +++ b/securedrop/alembic/versions/2e24fc7536e8_make_journalist_id_non_nullable.py @@ -0,0 +1,128 @@ +"""make journalist_id non-nullable + +Revision ID: 2e24fc7536e8 +Revises: de00920916bf +Create Date: 2022-01-12 19:31:06.186285 + +""" +import uuid + +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = '2e24fc7536e8' +down_revision = 'de00920916bf' +branch_labels = None +depends_on = None + + +def create_deleted() -> int: + """manually insert a "deleted" journalist user. + + We need to do it this way since the model will reflect the current state of + the schema, not what it is at the current migration step + + It should be basically identical to what DeletedJournalist.get() does + """ + op.execute(sa.text( + f"""\ + INSERT INTO journalists (uuid, username, session_nonce) + VALUES (:uuid, "deleted", 0); + """ + ).bindparams(uuid=str(uuid.uuid4()))) + # Get the autoincrement ID back + conn = op.get_bind() + result = conn.execute('SELECT id FROM journalists WHERE username="deleted";').fetchall() + return result[0][0] + + +def migrate_nulls(): + """migrate existing journalist_id=NULL over to deleted or delete them""" + op.execute("DELETE FROM journalist_login_attempt WHERE journalist_id IS NULL;") + op.execute("DELETE FROM revoked_tokens WHERE journalist_id IS NULL;") + # Look to see if we have data to migrate + tables = ('replies', 'seen_files', 'seen_messages', 'seen_replies') + needs_migration = [] + conn = op.get_bind() + for table in tables: + result = conn.execute(f'SELECT 1 FROM {table} WHERE journalist_id IS NULL;').first() + if result is not None: + needs_migration.append(table) + + if not needs_migration: + return + + deleted_id = create_deleted() + for table in needs_migration: + op.execute(sa.text( + f'UPDATE {table} SET journalist_id=:journalist_id WHERE journalist_id IS NULL;' + ).bindparams(journalist_id=deleted_id)) + + +def upgrade(): + migrate_nulls() + + with op.batch_alter_table('journalist_login_attempt', schema=None) as batch_op: + batch_op.alter_column('journalist_id', + existing_type=sa.INTEGER(), + nullable=False) + + with op.batch_alter_table('replies', schema=None) as batch_op: + batch_op.alter_column('journalist_id', + existing_type=sa.INTEGER(), + nullable=False) + + with op.batch_alter_table('revoked_tokens', schema=None) as batch_op: + batch_op.alter_column('journalist_id', + existing_type=sa.INTEGER(), + nullable=False) + + with op.batch_alter_table('seen_files', schema=None) as batch_op: + batch_op.alter_column('journalist_id', + existing_type=sa.INTEGER(), + nullable=False) + + with op.batch_alter_table('seen_messages', schema=None) as batch_op: + batch_op.alter_column('journalist_id', + existing_type=sa.INTEGER(), + nullable=False) + + with op.batch_alter_table('seen_replies', schema=None) as batch_op: + batch_op.alter_column('journalist_id', + existing_type=sa.INTEGER(), + nullable=False) + + +def downgrade(): + # We do not un-migrate the data back to journalist_id=NULL + + with op.batch_alter_table('seen_replies', schema=None) as batch_op: + batch_op.alter_column('journalist_id', + existing_type=sa.INTEGER(), + nullable=True) + + with op.batch_alter_table('seen_messages', schema=None) as batch_op: + batch_op.alter_column('journalist_id', + existing_type=sa.INTEGER(), + nullable=True) + + with op.batch_alter_table('seen_files', schema=None) as batch_op: + batch_op.alter_column('journalist_id', + existing_type=sa.INTEGER(), + nullable=True) + + with op.batch_alter_table('revoked_tokens', schema=None) as batch_op: + batch_op.alter_column('journalist_id', + existing_type=sa.INTEGER(), + nullable=True) + + with op.batch_alter_table('replies', schema=None) as batch_op: + batch_op.alter_column('journalist_id', + existing_type=sa.INTEGER(), + nullable=True) + + with op.batch_alter_table('journalist_login_attempt', schema=None) as batch_op: + batch_op.alter_column('journalist_id', + existing_type=sa.INTEGER(), + nullable=True) diff --git a/securedrop/journalist_app/admin.py b/securedrop/journalist_app/admin.py index e23f98b3105..6e895f7d2e9 100644 --- a/securedrop/journalist_app/admin.py +++ b/securedrop/journalist_app/admin.py @@ -30,7 +30,7 @@ def make_blueprint(config: SDConfig) -> Blueprint: @view.route('/', methods=('GET', 'POST')) @admin_required def index() -> str: - users = Journalist.query.all() + users = Journalist.query.filter(Journalist.username != "deleted").all() return render_template("admin.html", users=users) @view.route('/config', methods=('GET', 'POST')) @@ -288,8 +288,14 @@ def delete_user(user_id: int) -> werkzeug.Response: current_app.logger.error( "Admin {} tried to delete itself".format(g.user.username)) abort(403) + elif user.username == "deleted": + # Do not flash because the interface does not expose this. + # It can only happen by manually crafting a POST request + current_app.logger.error( + "Admin {} tried to delete \"deleted\" user".format(g.user.username)) + abort(403) elif user: - db.session.delete(user) + user.delete() db.session.commit() flash(gettext("Deleted user '{user}'.").format( user=user.username), "notification") diff --git a/securedrop/loaddata.py b/securedrop/loaddata.py index e56ef082785..e0c8fb6c563 100755 --- a/securedrop/loaddata.py +++ b/securedrop/loaddata.py @@ -382,7 +382,7 @@ def load(args: argparse.Namespace) -> None: # delete one journalist _, _, journalist_to_be_deleted = journalists - db.session.delete(journalist_to_be_deleted) + journalist_to_be_deleted.delete() db.session.commit() diff --git a/securedrop/models.py b/securedrop/models.py index b7771b2dc6d..5c4db14bd62 100644 --- a/securedrop/models.py +++ b/securedrop/models.py @@ -2,6 +2,7 @@ import binascii import datetime import base64 +import itertools import os import pyotp import qrcode @@ -254,7 +255,7 @@ class Reply(db.Model): id = Column(Integer, primary_key=True) uuid = Column(String(36), unique=True, nullable=False) - journalist_id = Column(Integer, ForeignKey('journalists.id')) + journalist_id = Column(Integer, ForeignKey('journalists.id'), nullable=False) journalist = relationship( "Journalist", backref=backref( @@ -294,18 +295,16 @@ def __repr__(self) -> str: return '' % (self.filename) def to_json(self) -> 'Dict[str, Any]': - journalist_username = "deleted" - journalist_first_name = "" - journalist_last_name = "" - journalist_uuid = "deleted" - if self.journalist: - journalist_username = self.journalist.username - journalist_first_name = self.journalist.first_name - journalist_last_name = self.journalist.last_name + journalist_username = self.journalist.username + journalist_first_name = self.journalist.first_name or "" + journalist_last_name = self.journalist.last_name or "" + if journalist_username == "deleted": + # special-case the "deleted" user + journalist_uuid = "deleted" + else: journalist_uuid = self.journalist.uuid seen_by = [ r.journalist.uuid for r in SeenReply.query.filter(SeenReply.reply_id == self.id) - if r.journalist ] json_reply = { 'source_url': url_for('api.single_source', @@ -434,10 +433,17 @@ class Journalist(db.Model): ) # type: Column[Optional[datetime.datetime]] last_access = Column(DateTime) # type: Column[Optional[datetime.datetime]] passphrase_hash = Column(String(256)) # type: Column[Optional[str]] + login_attempts = relationship( "JournalistLoginAttempt", - backref="journalist" + backref="journalist", + cascade="all, delete" ) # type: RelationshipProperty[JournalistLoginAttempt] + revoked_tokens = relationship( + "RevokedToken", + backref="journalist", + cascade="all, delete" + ) # type: RelationshipProperty[RevokedToken] MIN_USERNAME_LEN = 3 MIN_NAME_LEN = 0 @@ -446,7 +452,7 @@ class Journalist(db.Model): def __init__(self, username: str, - password: str, + password: 'Optional[str]', first_name: 'Optional[str]' = None, last_name: 'Optional[str]' = None, is_admin: bool = False, @@ -784,13 +790,57 @@ def to_json(self, all_info: bool = True) -> Dict[str, Any]: return json_user + def delete(self) -> None: + """delete a journalist, migrating some data over to the "deleted" journalist + + Callers must commit the session themselves + """ + deleted = DeletedJournalist.get() + # All the rows that should be reassociated with the "deleted" journalist + reassociate = itertools.chain( + Reply.query.filter_by(journalist_id=self.id).all(), + SeenFile.query.filter_by(journalist_id=self.id).all(), + SeenMessage.query.filter_by(journalist_id=self.id).all(), + SeenReply.query.filter_by(journalist_id=self.id).all() + ) + for obj in reassociate: + obj.journalist_id = deleted.id + db.session.add(obj) + + # For the rest of the associated data we rely on cascading deletions + db.session.delete(self) + + +class DeletedJournalist(Journalist): + """A system user that represents deleted journalists for referential integrity""" + def __init__(self) -> None: + super().__init__(username='deleted', password=None) + + @staticmethod + def get() -> 'Journalist': + deleted = Journalist.query.filter_by(username='deleted').one_or_none() + if deleted is None: + # Lazily create + deleted = DeletedJournalist() + db.session.add(deleted) + return deleted + + @classmethod + def check_username_acceptable(cls, username: str) -> None: + """override parent to disable this check""" + return None + + def set_password(self, passphrase: 'Optional[str]') -> None: + """override parent to disable this, we set no password""" + return None + class SeenFile(db.Model): __tablename__ = "seen_files" __table_args__ = (db.UniqueConstraint("file_id", "journalist_id"),) id = Column(Integer, primary_key=True) file_id = Column(Integer, ForeignKey("submissions.id"), nullable=False) - journalist_id = Column(Integer, ForeignKey("journalists.id"), nullable=True) + journalist_id = Column(Integer, ForeignKey("journalists.id"), nullable=False) file = relationship( "Submission", backref=backref("seen_files", lazy="dynamic", cascade="all,delete") ) # type: RelationshipProperty[Submission] @@ -804,7 +854,7 @@ class SeenMessage(db.Model): __table_args__ = (db.UniqueConstraint("message_id", "journalist_id"),) id = Column(Integer, primary_key=True) message_id = Column(Integer, ForeignKey("submissions.id"), nullable=False) - journalist_id = Column(Integer, ForeignKey("journalists.id"), nullable=True) + journalist_id = Column(Integer, ForeignKey("journalists.id"), nullable=False) message = relationship( "Submission", backref=backref("seen_messages", lazy="dynamic", cascade="all,delete") ) # type: RelationshipProperty[Submission] @@ -818,7 +868,7 @@ class SeenReply(db.Model): __table_args__ = (db.UniqueConstraint("reply_id", "journalist_id"),) id = Column(Integer, primary_key=True) reply_id = Column(Integer, ForeignKey("replies.id"), nullable=False) - journalist_id = Column(Integer, ForeignKey("journalists.id"), nullable=True) + journalist_id = Column(Integer, ForeignKey("journalists.id"), nullable=False) reply = relationship( "Reply", backref=backref("seen_replies", cascade="all,delete") ) # type: RelationshipProperty[Reply] @@ -838,7 +888,7 @@ class JournalistLoginAttempt(db.Model): DateTime, default=datetime.datetime.utcnow ) # type: Column[Optional[datetime.datetime]] - journalist_id = Column(Integer, ForeignKey('journalists.id')) + journalist_id = Column(Integer, ForeignKey('journalists.id'), nullable=False) def __init__(self, journalist: Journalist) -> None: self.journalist = journalist @@ -853,7 +903,7 @@ class RevokedToken(db.Model): __tablename__ = 'revoked_tokens' id = Column(Integer, primary_key=True) - journalist_id = Column(Integer, ForeignKey('journalists.id')) + journalist_id = Column(Integer, ForeignKey('journalists.id'), nullable=False) token = db.Column(db.Text, nullable=False, unique=True) diff --git a/securedrop/tests/migrations/migration_2e24fc7536e8.py b/securedrop/tests/migrations/migration_2e24fc7536e8.py new file mode 100644 index 00000000000..3599982d221 --- /dev/null +++ b/securedrop/tests/migrations/migration_2e24fc7536e8.py @@ -0,0 +1,106 @@ +import uuid + +from sqlalchemy import text + +from db import db +from journalist_app import create_app +from .helpers import random_datetime + + +class UpgradeTester: + """Insert a Reply, SeenReply and JournalistLoginAttempt with journalist_id=NULL. + Verify that the first two are reassociated to the "Deleted" user, while the last + is deleted outright. + """ + + def __init__(self, config): + """This function MUST accept an argument named `config`. + You will likely want to save a reference to the config in your + class, so you can access the database later. + """ + self.config = config + self.app = create_app(config) + + def load_data(self): + """This function loads data into the database and filesystem. It is + executed before the upgrade. + """ + with self.app.app_context(): + params = { + 'uuid': str(uuid.uuid4()), + 'journalist_id': None, + 'source_id': 0, + 'filename': 'dummy.txt', + 'size': 1, + 'checksum': '', + 'deleted_by_source': False, + } + sql = """\ + INSERT INTO replies (uuid, journalist_id, source_id, filename, + size, checksum, deleted_by_source) + VALUES (:uuid, :journalist_id, :source_id, :filename, + :size, :checksum, :deleted_by_source);""" + db.engine.execute(text(sql), **params) + # Insert a SeenReply corresponding to the just-inserted reply + db.engine.execute(text( + """\ + INSERT INTO seen_replies (reply_id, journalist_id) + VALUES (1, NULL); + """ + )) + # Insert a JournalistLoginAttempt + db.engine.execute(text( + """\ + INSERT INTO journalist_login_attempt (timestamp, journalist_id) + VALUES (:timestamp, NULL) + """ + ), timestamp=random_datetime(nullable=False)) + + def check_upgrade(self): + """This function is run after the upgrade and verifies the state + of the database or filesystem. It MUST raise an exception if the + check fails. + """ + with self.app.app_context(): + deleted_id = db.engine.execute( + 'SELECT id FROM journalists WHERE username="deleted"' + ).first()[0] + replies = db.engine.execute( + text('SELECT journalist_id FROM replies')).fetchall() + assert len(replies) == 1 + # And the journalist_id matches our "deleted" one + assert replies[0][0] == deleted_id + seen_replies = db.engine.execute( + text('SELECT journalist_id FROM seen_replies')).fetchall() + assert len(seen_replies) == 1 + # And the journalist_id matches our "deleted" one + assert seen_replies[0][0] == deleted_id + login_attempts = db.engine.execute( + text('SELECT * FROM journalist_login_attempt')).fetchall() + # The NULL login attempt was deleted outright + assert login_attempts == [] + + +class DowngradeTester: + """Downgrading only makes fields nullable again, which is a + non-destructive and safe operation""" + + def __init__(self, config): + """This function MUST accept an argument named `config`. + You will likely want to save a reference to the config in your + class, so you can access the database later. + """ + self.config = config + + def load_data(self): + """This function loads data into the database and filesystem. It is + executed before the downgrade. + """ + pass + + def check_downgrade(self): + """This function is run after the downgrade and verifies the state + of the database or filesystem. It MUST raise an exception if the + check fails. + """ + pass diff --git a/securedrop/tests/test_journalist.py b/securedrop/tests/test_journalist.py index 628d723cee9..6d234a60ecc 100644 --- a/securedrop/tests/test_journalist.py +++ b/securedrop/tests/test_journalist.py @@ -28,9 +28,11 @@ import models from db import db from models import ( + DeletedJournalist, InvalidPasswordLength, InstanceConfig, Journalist, + JournalistLoginAttempt, Reply, SeenFile, SeenMessage, @@ -3371,3 +3373,50 @@ def test_app_error_handlers_defined(journalist_app): for status_code in [400, 401, 403, 404, 500]: # This will raise KeyError if an app-wide error handler is not defined assert journalist_app.error_handler_spec[None][status_code] + + +def test_deleted_journalist_creation(journalist_app): + """test creating a DeletedJournalist object works""" + deleted = DeletedJournalist() + assert deleted.username == 'deleted' + assert deleted.passphrase_hash is None + db.session.add(deleted) + db.session.commit() + # Can be queried as a normal Journalist object + found = Journalist.query.filter_by(username='deleted').one() + assert found.username == 'deleted' + assert deleted.uuid == found.uuid + + +def test_lazy_deleted_journalist_creation(journalist_app): + """test lazy creation of DeletedJournalist works""" + not_found = Journalist.query.filter_by(username='deleted').one_or_none() + assert not_found is None, "deleted journalist doesn't exist yet" + deleted = DeletedJournalist.get() + db.session.commit() + # Can be found as a normal Journalist object + found = Journalist.query.filter_by(username='deleted').one() + assert deleted.uuid == found.uuid + deleted2 = DeletedJournalist.get() + assert deleted.uuid == deleted2.uuid, "same DeletedJournalist returned" + + +def test_journalist_deletion(journalist_app): + """test deleting a journalist and see data reassociated to "deleted" journalist""" + # Create a journalist that's seen two replies and has a login attempt + source, _ = utils.db_helper.init_source() + journalist, _ = utils.db_helper.init_journalist() + utils.db_helper.reply(journalist, source, 2) + db.session.add(JournalistLoginAttempt(journalist)) + db.session.commit() + # Only one login attempt in the table + assert len(JournalistLoginAttempt.query.all()) == 1 + # Delete the journalist + journalist.delete() + db.session.commit() + # Verify the "deleted" journalist has 2 associated rows of both types + deleted = DeletedJournalist.get() + assert len(Reply.query.filter_by(journalist_id=deleted.id).all()) == 2 + assert len(SeenReply.query.filter_by(journalist_id=deleted.id).all()) == 2 + # And there are no login attempts + assert JournalistLoginAttempt.query.all() == [] diff --git a/securedrop/tests/utils/db_helper.py b/securedrop/tests/utils/db_helper.py index 67349b25a9c..ab147211c93 100644 --- a/securedrop/tests/utils/db_helper.py +++ b/securedrop/tests/utils/db_helper.py @@ -51,7 +51,7 @@ def delete_journalist(journalist): :returns: None """ - db.session.delete(journalist) + journalist.delete() db.session.commit()