-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Cache user IDs instead of profile objects #13573
Changes from 4 commits
1a1738f
38aece4
627017e
d2c07a2
e49e716
0c54e9a
63f4380
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Cache user IDs instead of profiles to reduce cache memory usage (renames cache `_get_joined_profile_from_event_id` to `_get_user_id_from_membership_event_id`). Contributed by Nick @ Beeper (@fizzadar). | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -835,9 +835,9 @@ async def get_mutual_rooms_between_users( | |
|
||
return shared_room_ids or frozenset() | ||
|
||
async def get_joined_users_from_state( | ||
async def get_joined_user_ids_from_state( | ||
self, room_id: str, state: StateMap[str], state_entry: "_StateCacheEntry" | ||
) -> Dict[str, ProfileInfo]: | ||
) -> Set[str]: | ||
state_group: Union[object, int] = state_entry.state_group | ||
if not state_group: | ||
# If state_group is None it means it has yet to be assigned a | ||
|
@@ -848,25 +848,25 @@ async def get_joined_users_from_state( | |
|
||
assert state_group is not None | ||
with Measure(self._clock, "get_joined_users_from_state"): | ||
return await self._get_joined_users_from_context( | ||
return await self._get_joined_user_ids_from_context( | ||
room_id, state_group, state, context=state_entry | ||
) | ||
|
||
@cached(num_args=2, iterable=True, max_entries=100000) | ||
async def _get_joined_users_from_context( | ||
async def _get_joined_user_ids_from_context( | ||
self, | ||
room_id: str, | ||
state_group: Union[object, int], | ||
current_state_ids: StateMap[str], | ||
event: Optional[EventBase] = None, | ||
context: Optional["_StateCacheEntry"] = None, | ||
) -> Dict[str, ProfileInfo]: | ||
) -> Set[str]: | ||
# We don't use `state_group`, it's there so that we can cache based | ||
# on it. However, it's important that it's never None, since two current_states | ||
# with a state_group of None are likely to be different. | ||
assert state_group is not None | ||
|
||
users_in_room = {} | ||
users_in_room = set() | ||
member_event_ids = [ | ||
e_id | ||
for key, e_id in current_state_ids.items() | ||
|
@@ -879,19 +879,19 @@ async def _get_joined_users_from_context( | |
# If we do then we can reuse that result and simply update it with | ||
# any membership changes in `delta_ids` | ||
if context.prev_group and context.delta_ids: | ||
prev_res = self._get_joined_users_from_context.cache.get_immediate( | ||
prev_res = self._get_joined_user_ids_from_context.cache.get_immediate( | ||
(room_id, context.prev_group), None | ||
) | ||
if prev_res and isinstance(prev_res, dict): | ||
users_in_room = dict(prev_res) | ||
if prev_res and isinstance(prev_res, set): | ||
users_in_room = prev_res | ||
member_event_ids = [ | ||
e_id | ||
for key, e_id in context.delta_ids.items() | ||
if key[0] == EventTypes.Member | ||
] | ||
for etype, state_key in context.delta_ids: | ||
if etype == EventTypes.Member: | ||
users_in_room.pop(state_key, None) | ||
users_in_room.discard(state_key) | ||
|
||
# We check if we have any of the member event ids in the event cache | ||
# before we ask the DB | ||
|
@@ -908,42 +908,36 @@ async def _get_joined_users_from_context( | |
ev_entry = event_map.get(event_id) | ||
if ev_entry and not ev_entry.event.rejected_reason: | ||
if ev_entry.event.membership == Membership.JOIN: | ||
users_in_room[ev_entry.event.state_key] = ProfileInfo( | ||
display_name=ev_entry.event.content.get("displayname", None), | ||
avatar_url=ev_entry.event.content.get("avatar_url", None), | ||
) | ||
users_in_room.add(ev_entry.event.state_key) | ||
else: | ||
missing_member_event_ids.append(event_id) | ||
|
||
if missing_member_event_ids: | ||
event_to_memberships = await self._get_joined_profiles_from_event_ids( | ||
event_to_memberships = await self._get_user_ids_from_membership_event_ids( | ||
missing_member_event_ids | ||
) | ||
users_in_room.update(row for row in event_to_memberships.values() if row) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, the removal of the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, no, this is actually because we're filtering out non-joins, which @cachedList will return as None There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed by #13600 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oops, thank you for fixing that so quick 🙏! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was excited to test the change on my server, so deployed it and saw everything light up in red :D I am a bit scared that none of the tests spotted this.... |
||
users_in_room.update(event_to_memberships.values()) | ||
|
||
if event is not None and event.type == EventTypes.Member: | ||
if event.membership == Membership.JOIN: | ||
if event.event_id in member_event_ids: | ||
users_in_room[event.state_key] = ProfileInfo( | ||
display_name=event.content.get("displayname", None), | ||
avatar_url=event.content.get("avatar_url", None), | ||
) | ||
users_in_room.add(event.state_key) | ||
|
||
return users_in_room | ||
|
||
@cached(max_entries=10000) | ||
def _get_joined_profile_from_event_id( | ||
def _get_user_id_from_membership_event_id( | ||
self, event_id: str | ||
) -> Optional[Tuple[str, ProfileInfo]]: | ||
raise NotImplementedError() | ||
|
||
@cachedList( | ||
cached_method_name="_get_joined_profile_from_event_id", | ||
cached_method_name="_get_user_id_from_membership_event_id", | ||
list_name="event_ids", | ||
) | ||
async def _get_joined_profiles_from_event_ids( | ||
async def _get_user_ids_from_membership_event_ids( | ||
self, event_ids: Iterable[str] | ||
) -> Dict[str, Optional[Tuple[str, ProfileInfo]]]: | ||
) -> Dict[str, str]: | ||
"""For given set of member event_ids check if they point to a join | ||
event and if so return the associated user and profile info. | ||
|
||
|
@@ -958,21 +952,13 @@ async def _get_joined_profiles_from_event_ids( | |
table="room_memberships", | ||
column="event_id", | ||
iterable=event_ids, | ||
retcols=("user_id", "display_name", "avatar_url", "event_id"), | ||
retcols=("user_id", "event_id"), | ||
keyvalues={"membership": Membership.JOIN}, | ||
batch_size=1000, | ||
desc="_get_joined_profiles_from_event_ids", | ||
desc="_get_user_ids_from_membership_event_ids", | ||
) | ||
|
||
return { | ||
row["event_id"]: ( | ||
row["user_id"], | ||
ProfileInfo( | ||
avatar_url=row["avatar_url"], display_name=row["display_name"] | ||
), | ||
) | ||
for row in rows | ||
} | ||
return {row["event_id"]: row["user_id"] for row in rows} | ||
|
||
@cached(max_entries=10000) | ||
async def is_host_joined(self, room_id: str, host: str) -> bool: | ||
|
@@ -1131,12 +1117,12 @@ async def _get_joined_hosts( | |
else: | ||
# The cache doesn't match the state group or prev state group, | ||
# so we calculate the result from first principles. | ||
joined_users = await self.get_joined_users_from_state( | ||
joined_user_ids = await self.get_joined_user_ids_from_state( | ||
room_id, state, state_entry | ||
) | ||
|
||
cache.hosts_to_joined_users = {} | ||
for user_id in joined_users: | ||
for user_id in joined_user_ids: | ||
host = intern_string(get_domain_from_id(user_id)) | ||
cache.hosts_to_joined_users.setdefault(host, set()).add(user_id) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm really cautious about renaming the caches if I'm honest, especially for something like this. I wonder if we should explicitly keep the old name, by adding a
name
param to@cached
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah totally makes sense, esp. if anyone has tweaked the cache factor (like us). Added in e49e716 & 0c54e9a.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Speedy!