Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Merge pull request #6340 from matrix-org/babolivier/pagination_query
Browse files Browse the repository at this point in the history
Fix the SQL SELECT query in _paginate_room_events_txn
  • Loading branch information
babolivier authored Nov 8, 2019
2 parents f713c01 + b16fa43 commit 963ffb6
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 8 deletions.
1 change: 1 addition & 0 deletions changelog.d/6340.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Implement label-based filtering on `/sync` and `/messages` ([MSC2326](https://github.com/matrix-org/matrix-doc/pull/2326)).
40 changes: 32 additions & 8 deletions synapse/storage/data_stores/main/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -871,14 +871,38 @@ def _paginate_room_events_txn(

args.append(int(limit))

sql = (
"SELECT DISTINCT event_id, topological_ordering, stream_ordering"
" FROM events"
" LEFT JOIN event_labels USING (event_id, room_id, topological_ordering)"
" WHERE outlier = ? AND room_id = ? AND %(bounds)s"
" ORDER BY topological_ordering %(order)s,"
" stream_ordering %(order)s LIMIT ?"
) % {"bounds": bounds, "order": order}
select_keywords = "SELECT"
join_clause = ""
if event_filter and event_filter.labels:
# If we're not filtering on a label, then joining on event_labels will
# return as many row for a single event as the number of labels it has. To
# avoid this, only join if we're filtering on at least one label.
join_clause = """
LEFT JOIN event_labels
USING (event_id, room_id, topological_ordering)
"""
if len(event_filter.labels) > 1:
# Using DISTINCT in this SELECT query is quite expensive, because it
# requires the engine to sort on the entire (not limited) result set,
# i.e. the entire events table. We only need to use it when we're
# filtering on more than two labels, because that's the only scenario
# in which we can possibly to get multiple times the same event ID in
# the results.
select_keywords += "DISTINCT"

sql = """
%(select_keywords)s event_id, topological_ordering, stream_ordering
FROM events
%(join_clause)s
WHERE outlier = ? AND room_id = ? AND %(bounds)s
ORDER BY topological_ordering %(order)s,
stream_ordering %(order)s LIMIT ?
""" % {
"select_keywords": select_keywords,
"join_clause": join_clause,
"bounds": bounds,
"order": order,
}

txn.execute(sql, args)

Expand Down

0 comments on commit 963ffb6

Please sign in to comment.