Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Remove functionality associated with unused historical stats tables #9721

Merged
merged 6 commits into from
Jul 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/9721.removal
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Remove functionality associated with the unused `room_stats_historical` and `user_stats_historical` tables. Contributed by @xmunoz.
50 changes: 5 additions & 45 deletions docs/room_and_user_statistics.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
Room and User Statistics
========================

Synapse maintains room and user statistics (as well as a cache of room state),
in various tables. These can be used for administrative purposes but are also
used when generating the public room directory.
Synapse maintains room and user statistics in various tables. These can be used
for administrative purposes but are also used when generating the public room
directory.


# Synapse Developer Documentation
Expand All @@ -15,48 +15,8 @@ used when generating the public room directory.
* **subject**: Something we are tracking stats about – currently a room or user.
* **current row**: An entry for a subject in the appropriate current statistics
table. Each subject can have only one.
* **historical row**: An entry for a subject in the appropriate historical
statistics table. Each subject can have any number of these.

### Overview

Stats are maintained as time series. There are two kinds of column:

* absolute columns – where the value is correct for the time given by `end_ts`
in the stats row. (Imagine a line graph for these values)
* They can also be thought of as 'gauges' in Prometheus, if you are familiar.
* per-slice columns – where the value corresponds to how many of the occurrences
occurred within the time slice given by `(end_ts − bucket_size)…end_ts`
or `start_ts…end_ts`. (Imagine a histogram for these values)

Stats are maintained in two tables (for each type): current and historical.

Current stats correspond to the present values. Each subject can only have one
entry.

Historical stats correspond to values in the past. Subjects may have multiple
entries.

## Concepts around the management of stats

### Current rows

Current rows contain the most up-to-date statistics for a room.
They only contain absolute columns

### Historical rows

Historical rows can always be considered to be valid for the time slice and
end time specified.

* historical rows will not exist for every time slice – they will be omitted
if there were no changes. In this case, the following assumptions can be
made to interpolate/recreate missing rows:
- absolute fields have the same values as in the preceding row
- per-slice fields are zero (`0`)
* historical rows will not be retained forever – rows older than a configurable
time will be purged.

#### Purge

The purging of historical rows is not yet implemented.
Stats correspond to the present values. Current rows contain the most up-to-date
statistics for a room. Each subject can only have one entry.
5 changes: 0 additions & 5 deletions docs/sample_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2652,11 +2652,6 @@ stats:
#
#enabled: false

# The size of each timeslice in the room_stats_historical and
# user_stats_historical tables, as a time period. Defaults to "1d".
#
#bucket_size: 1h


# Server Notices room configuration
#
Expand Down
9 changes: 0 additions & 9 deletions synapse/config/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,9 @@ class StatsConfig(Config):

def read_config(self, config, **kwargs):
self.stats_enabled = True
self.stats_bucket_size = 86400 * 1000
stats_config = config.get("stats", None)
if stats_config:
self.stats_enabled = stats_config.get("enabled", self.stats_enabled)
self.stats_bucket_size = self.parse_duration(
stats_config.get("bucket_size", "1d")
)
if not self.stats_enabled:
logger.warning(ROOM_STATS_DISABLED_WARN)

Expand All @@ -59,9 +55,4 @@ def generate_config_section(self, config_dir_path, server_name, **kwargs):
# correctly.
#
#enabled: false

# The size of each timeslice in the room_stats_historical and
# user_stats_historical tables, as a time period. Defaults to "1d".
#
#bucket_size: 1h
"""
27 changes: 0 additions & 27 deletions synapse/handlers/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ def __init__(self, hs: "HomeServer"):
self.clock = hs.get_clock()
self.notifier = hs.get_notifier()
self.is_mine_id = hs.is_mine_id
self.stats_bucket_size = hs.config.stats_bucket_size

self.stats_enabled = hs.config.stats_enabled

Expand Down Expand Up @@ -106,20 +105,6 @@ async def _unsafe_process(self) -> None:
room_deltas = {}
user_deltas = {}

# Then count deltas for total_events and total_event_bytes.
(
room_count,
user_count,
) = await self.store.get_changes_room_total_events_and_bytes(
self.pos, max_pos
)

for room_id, fields in room_count.items():
room_deltas.setdefault(room_id, Counter()).update(fields)

for user_id, fields in user_count.items():
user_deltas.setdefault(user_id, Counter()).update(fields)

logger.debug("room_deltas: %s", room_deltas)
logger.debug("user_deltas: %s", user_deltas)

Expand Down Expand Up @@ -181,12 +166,10 @@ async def _handle_deltas(

event_content = {} # type: JsonDict

sender = None
if event_id is not None:
event = await self.store.get_event(event_id, allow_none=True)
if event:
event_content = event.content or {}
sender = event.sender

# All the values in this dict are deltas (RELATIVE changes)
room_stats_delta = room_to_stats_deltas.setdefault(room_id, Counter())
Expand Down Expand Up @@ -244,12 +227,6 @@ async def _handle_deltas(
room_stats_delta["joined_members"] += 1
elif membership == Membership.INVITE:
room_stats_delta["invited_members"] += 1

if sender and self.is_mine_id(sender):
user_to_stats_deltas.setdefault(sender, Counter())[
"invites_sent"
] += 1

elif membership == Membership.LEAVE:
room_stats_delta["left_members"] += 1
elif membership == Membership.BAN:
Expand Down Expand Up @@ -279,10 +256,6 @@ async def _handle_deltas(
room_state["is_federatable"] = (
event_content.get("m.federate", True) is True
)
if sender and self.is_mine_id(sender):
user_to_stats_deltas.setdefault(sender, Counter())[
"rooms_created"
] += 1
elif typ == EventTypes.JoinRules:
room_state["join_rules"] = event_content.get("join_rule")
elif typ == EventTypes.RoomHistoryVisibility:
Expand Down
1 change: 0 additions & 1 deletion synapse/storage/databases/main/purge_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,6 @@ def _purge_room_txn(self, txn, room_id: str) -> List[int]:
"room_memberships",
"room_stats_state",
"room_stats_current",
"room_stats_historical",
"room_stats_earliest_token",
"rooms",
"stream_ordering_to_exterm",
Expand Down
Loading