Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Simplify reap_monthly_active_users #7558

Merged
merged 2 commits into from
May 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/7558.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Simplify `reap_monthly_active_users`.
100 changes: 41 additions & 59 deletions synapse/storage/data_stores/main/monthly_active_users.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from twisted.internet import defer

from synapse.storage._base import SQLBaseStore
from synapse.storage.database import Database
from synapse.storage.database import Database, make_in_list_sql_clause
from synapse.util.caches.descriptors import cached

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -187,75 +187,57 @@ def _reap_users(txn, reserved_users):
"""

thirty_days_ago = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30)
query_args = [thirty_days_ago]
base_sql = "DELETE FROM monthly_active_users WHERE timestamp < ?"

# Need if/else since 'AND user_id NOT IN ({})' fails on Postgres
# when len(reserved_users) == 0. Works fine on sqlite.
if len(reserved_users) > 0:
# questionmarks is a hack to overcome sqlite not supporting
# tuples in 'WHERE IN %s'
question_marks = ",".join("?" * len(reserved_users))

query_args.extend(reserved_users)
sql = base_sql + " AND user_id NOT IN ({})".format(question_marks)
else:
sql = base_sql

txn.execute(sql, query_args)
in_clause, in_clause_args = make_in_list_sql_clause(
self.database_engine, "user_id", reserved_users
)

txn.execute(
"DELETE FROM monthly_active_users WHERE timestamp < ? AND NOT %s"
% (in_clause,),
[thirty_days_ago] + in_clause_args,
)

if self._limit_usage_by_mau:
# If MAU user count still exceeds the MAU threshold, then delete on
# a least recently active basis.
# Note it is not possible to write this query using OFFSET due to
# incompatibilities in how sqlite and postgres support the feature.
# sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present
# While Postgres does not require 'LIMIT', but also does not support
# Sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present,
# while Postgres does not require 'LIMIT', but also does not support
# negative LIMIT values. So there is no way to write it that both can
# support
if len(reserved_users) == 0:
sql = """
DELETE FROM monthly_active_users
WHERE user_id NOT IN (
SELECT user_id FROM monthly_active_users
ORDER BY timestamp DESC
LIMIT ?
)
"""
txn.execute(sql, ((self._max_mau_value),))
# Need if/else since 'AND user_id NOT IN ({})' fails on Postgres
# when len(reserved_users) == 0. Works fine on sqlite.
else:
# Must be >= 0 for postgres
num_of_non_reserved_users_to_remove = max(
self._max_mau_value - len(reserved_users), 0
)

# It is important to filter reserved users twice to guard
# against the case where the reserved user is present in the
# SELECT, meaning that a legitmate mau is deleted.
sql = """
DELETE FROM monthly_active_users
WHERE user_id NOT IN (
SELECT user_id FROM monthly_active_users
WHERE user_id NOT IN ({})
ORDER BY timestamp DESC
LIMIT ?
)
AND user_id NOT IN ({})
""".format(
question_marks, question_marks
# Limit must be >= 0 for postgres
num_of_non_reserved_users_to_remove = max(
self._max_mau_value - len(reserved_users), 0
)

# It is important to filter reserved users twice to guard
# against the case where the reserved user is present in the
# SELECT, meaning that a legitimate mau is deleted.
sql = """
DELETE FROM monthly_active_users
WHERE user_id NOT IN (
SELECT user_id FROM monthly_active_users
WHERE NOT %s
ORDER BY timestamp DESC
LIMIT ?
)

query_args = [
*reserved_users,
num_of_non_reserved_users_to_remove,
*reserved_users,
]

txn.execute(sql, query_args)

# It seems poor to invalidate the whole cache, Postgres supports
AND NOT %s
""" % (
in_clause,
in_clause,
)

query_args = (
in_clause_args
+ [num_of_non_reserved_users_to_remove]
+ in_clause_args
)
txn.execute(sql, query_args)

# It seems poor to invalidate the whole cache. Postgres supports
# 'Returning' which would allow me to invalidate only the
# specific users, but sqlite has no way to do this and instead
# I would need to SELECT and the DELETE which without locking
Expand Down