From 89c40db86b4a39bf9af8c89dc609bc4c8aab6421 Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Fri, 30 Dec 2022 17:36:36 +0000 Subject: [PATCH 01/10] Remove special-case method for new memberships only, use more generic method --- synapse/handlers/user_directory.py | 39 +++++++++++++----------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 3610b6bf785e..5197dbe275c1 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -357,28 +357,11 @@ async def _handle_room_membership_event( # so, ensure we have a directory entry for them. (For local users, # the rest of the application calls `handle_local_profile_change`.) if is_remote: - await self._upsert_directory_entry_for_remote_user(state_key, event_id) + await self._handle_possible_remote_profile_change( + state_key, room_id, None, event_id + ) await self._track_user_joined_room(room_id, state_key) - async def _upsert_directory_entry_for_remote_user( - self, user_id: str, event_id: str - ) -> None: - """A remote user has just joined a room. Ensure they have an entry in - the user directory. The caller is responsible for making sure they're - remote. - """ - event = await self.store.get_event(event_id, allow_none=True) - # It isn't expected for this event to not exist, but we - # don't want the entire background process to break. - if event is None: - return - - logger.debug("Adding new user to dir, %r", user_id) - - await self.store.update_profile_in_user_dir( - user_id, event.content.get("displayname"), event.content.get("avatar_url") - ) - async def _track_user_joined_room(self, room_id: str, joining_user_id: str) -> None: """Someone's just joined a room. Update `users_in_public_rooms` or `users_who_share_private_rooms` as appropriate. @@ -466,9 +449,15 @@ async def _handle_possible_remote_profile_change( database if there are. This is intended for remote users only. The caller is responsible for checking that the given user is remote. """ - if not prev_event_id or not event_id: + + if not event_id: return + if not prev_event_id: + # If we don't have an older event to fall back on, just fetch the same + # event itself. + prev_event_id = event_id + prev_event = await self.store.get_event(prev_event_id, allow_none=True) event = await self.store.get_event(event_id, allow_none=True) @@ -490,5 +479,11 @@ async def _handle_possible_remote_profile_change( if not isinstance(new_avatar, str): new_avatar = prev_avatar - if prev_name != new_name or prev_avatar != new_avatar: + if ( + prev_name != new_name + or prev_avatar != new_avatar + or prev_event_id == event_id + ): + # Only update if something has changed, or we didn't have a previous event + # in the first place. await self.store.update_profile_in_user_dir(user_id, new_name, new_avatar) From 1198bee179f848265b58bcbf56482a675881279b Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Fri, 30 Dec 2022 12:18:37 +0000 Subject: [PATCH 02/10] Only collect profiles from state events in public rooms --- synapse/handlers/user_directory.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 5197dbe275c1..967da309f919 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -467,6 +467,16 @@ async def _handle_possible_remote_profile_change( if event.membership != Membership.JOIN: return + is_public = await self.store.is_room_world_readable_or_publicly_joinable( + room_id + ) + if not is_public: + # Don't collect user profiles from private rooms as they are not guaranteed + # to be the same as the user's global profile. + # TODO For correctness, need to fetch the user's real profile and store that + # to the directory. + return + prev_name = prev_event.content.get("displayname") new_name = event.content.get("displayname") # If the new name is an unexpected form, do not update the directory. From 3257912d2dc3fe0b754553f816bc84ea3fc2952e Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Fri, 30 Dec 2022 13:37:37 +0000 Subject: [PATCH 03/10] Add a table to track stale remote user profiles --- .../30_user_directory_stale_remote_users.sql | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 synapse/storage/schema/main/delta/73/30_user_directory_stale_remote_users.sql diff --git a/synapse/storage/schema/main/delta/73/30_user_directory_stale_remote_users.sql b/synapse/storage/schema/main/delta/73/30_user_directory_stale_remote_users.sql new file mode 100644 index 000000000000..dcb38f3d7b68 --- /dev/null +++ b/synapse/storage/schema/main/delta/73/30_user_directory_stale_remote_users.sql @@ -0,0 +1,39 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Table containing a list of remote users whose profiles may have changed +-- since their last update in the user directory. +CREATE TABLE user_directory_stale_remote_users ( + -- The User ID of the remote user whose profile may be stale. + user_id TEXT NOT NULL PRIMARY KEY, + + -- The server name of the user. + user_server_name TEXT NOT NULL, + + -- The timestamp (in ms) after which we should next try to request the user's + -- latest profile. + next_try_at_ts BIGINT NOT NULL, + + -- The number of retries so far. + -- 0 means we have not yet attempted to refresh the profile. + -- Used for calculating exponential backoff. + retry_counter INTEGER NOT NULL +); + +-- Create an index so we can easily query upcoming servers to try. +CREATE INDEX user_directory_stale_remote_users_next_try_idx ON user_directory_stale_remote_users(next_try_at_ts, user_server_name); + +-- Create an index so we can easily query upcoming users to try for a particular server. +CREATE INDEX user_directory_stale_remote_users_next_try_by_server_idx ON user_directory_stale_remote_users(user_server_name, next_try_at_ts); From 331969cd338f91121f5679967d7b14adb5c10b3f Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Fri, 30 Dec 2022 13:44:21 +0000 Subject: [PATCH 04/10] Add store methods to set and delete rows in this new table --- .../storage/databases/main/user_directory.py | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index 14ef5b040db1..f01653602a59 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -53,6 +53,7 @@ from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.types import ( JsonDict, + UserID, UserProfile, get_domain_from_id, get_localpart_from_id, @@ -473,11 +474,42 @@ async def is_room_world_readable_or_publicly_joinable(self, room_id: str) -> boo return False + async def set_remote_user_profile_in_user_dir_stale( + self, user_id: str, next_try_at_ms: int, retry_counter: int + ) -> None: + """ + Marks a remote user as having a possibly-stale user directory profile. + + Args: + user_id: the remote user who may have a stale profile on this server. + next_try_at_ms: timestamp in ms after which the user directory profile can be + refreshed. + retry_counter: number of failures in refreshing the profile so far. Used for + exponential backoff calculations. + """ + assert not self.hs.is_mine_id( + user_id + ), "Can't mark a local user as a stale remote user." + + server_name = UserID.from_string(user_id).domain + + await self.db_pool.simple_upsert( + table="user_directory_stale_remote_users", + keyvalues={"user_id": user_id}, + values={ + "next_try_at_ts": next_try_at_ms, + "retry_counter": retry_counter, + "user_server_name": server_name, + }, + desc="set_remote_user_profile_in_user_dir_stale", + ) + async def update_profile_in_user_dir( self, user_id: str, display_name: Optional[str], avatar_url: Optional[str] ) -> None: """ Update or add a user's profile in the user directory. + If the user is remote, the profile will be marked as not stale. """ # If the display name or avatar URL are unexpected types, replace with None. display_name = non_null_str_or_none(display_name) @@ -491,6 +523,14 @@ def _update_profile_in_user_dir_txn(txn: LoggingTransaction) -> None: values={"display_name": display_name, "avatar_url": avatar_url}, ) + if not self.hs.is_mine_id(user_id): + # Remote users: Make sure the profile is not marked as stale anymore. + self.db_pool.simple_delete_txn( + txn, + table="user_directory_stale_remote_users", + keyvalues={"user_id": user_id}, + ) + if isinstance(self.database_engine, PostgresEngine): # We weight the localpart most highly, then display name and finally # server name From ab3b191c70ac99bee1e630f2b30427aeca80e102 Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Fri, 30 Dec 2022 13:47:47 +0000 Subject: [PATCH 05/10] Mark remote profiles as stale when a member state event comes in to a private room --- synapse/handlers/user_directory.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 967da309f919..d17adc19f42f 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -28,6 +28,11 @@ logger = logging.getLogger(__name__) +# Don't refresh a stale user directory entry, using a Federation /profile request, +# for 60 seconds. This gives time for other state events to arrive (which will +# then be coalesced such that only one /profile request is made). +USER_DIRECTORY_STALE_REFRESH_TIME_MS = 60 * 1000 + class UserDirectoryHandler(StateDeltasHandler): """Handles queries and updates for the user_directory. @@ -473,8 +478,12 @@ async def _handle_possible_remote_profile_change( if not is_public: # Don't collect user profiles from private rooms as they are not guaranteed # to be the same as the user's global profile. - # TODO For correctness, need to fetch the user's real profile and store that - # to the directory. + now_ts = self.clock.time_msec() + await self.store.set_remote_user_profile_in_user_dir_stale( + user_id, + next_try_at_ms=now_ts + USER_DIRECTORY_STALE_REFRESH_TIME_MS, + retry_counter=0, + ) return prev_name = prev_event.content.get("displayname") From 327bcef6392ce03ef7d6bf6bbf1e810965ce8348 Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Fri, 30 Dec 2022 18:23:21 +0000 Subject: [PATCH 06/10] Newsfile Signed-off-by: Olivier Wilkinson (reivilibre) --- changelog.d/14755.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/14755.bugfix diff --git a/changelog.d/14755.bugfix b/changelog.d/14755.bugfix new file mode 100644 index 000000000000..12f979e9d041 --- /dev/null +++ b/changelog.d/14755.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug in which the user directory would assume any remote membership state events represent a profile change. \ No newline at end of file From 5a543d6be593b88a5deb7c822a8f6997c6ec8003 Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Mon, 13 Feb 2023 18:19:50 +0000 Subject: [PATCH 07/10] Simplify by removing Optionality of `event_id` --- synapse/handlers/user_directory.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index d17adc19f42f..0f2eb7c3d4ff 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -448,16 +448,13 @@ async def _handle_possible_remote_profile_change( user_id: str, room_id: str, prev_event_id: Optional[str], - event_id: Optional[str], + event_id: str, ) -> None: """Check member event changes for any profile changes and update the database if there are. This is intended for remote users only. The caller is responsible for checking that the given user is remote. """ - if not event_id: - return - if not prev_event_id: # If we don't have an older event to fall back on, just fetch the same # event itself. From 79669730fe7de7d40e9ad26dd470dc9874be7164 Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Mon, 13 Feb 2023 18:24:32 +0000 Subject: [PATCH 08/10] Replace names and avatars with None if they're set to dodgy things I think this makes more sense anyway. --- synapse/handlers/user_directory.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 0f2eb7c3d4ff..bd19dccfe6f2 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -485,15 +485,15 @@ async def _handle_possible_remote_profile_change( prev_name = prev_event.content.get("displayname") new_name = event.content.get("displayname") - # If the new name is an unexpected form, do not update the directory. + # If the new name is an unexpected form, replace with None. if not isinstance(new_name, str): - new_name = prev_name + new_name = None prev_avatar = prev_event.content.get("avatar_url") new_avatar = event.content.get("avatar_url") - # If the new avatar is an unexpected form, do not update the directory. + # If the new avatar is an unexpected form, replace with None. if not isinstance(new_avatar, str): - new_avatar = prev_avatar + new_avatar = None if ( prev_name != new_name From 2e52bf94bc8e698df969e9e2e510810fa186f501 Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Tue, 14 Feb 2023 10:07:30 +0000 Subject: [PATCH 09/10] Move schema delta to 74 (I missed the boat?) --- .../01_user_directory_stale_remote_users.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename synapse/storage/schema/main/delta/{73/30_user_directory_stale_remote_users.sql => 74/01_user_directory_stale_remote_users.sql} (100%) diff --git a/synapse/storage/schema/main/delta/73/30_user_directory_stale_remote_users.sql b/synapse/storage/schema/main/delta/74/01_user_directory_stale_remote_users.sql similarity index 100% rename from synapse/storage/schema/main/delta/73/30_user_directory_stale_remote_users.sql rename to synapse/storage/schema/main/delta/74/01_user_directory_stale_remote_users.sql From 3280675804fb7bda0def27861e022549e6faf065 Mon Sep 17 00:00:00 2001 From: "Olivier Wilkinson (reivilibre)" Date: Wed, 15 Feb 2023 15:01:50 +0000 Subject: [PATCH 10/10] Turns out these can be None after all --- synapse/handlers/user_directory.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index bd19dccfe6f2..0815be79fa54 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -205,8 +205,8 @@ async def _handle_deltas(self, deltas: List[Dict[str, Any]]) -> None: typ = delta["type"] state_key = delta["state_key"] room_id = delta["room_id"] - event_id = delta["event_id"] - prev_event_id = delta["prev_event_id"] + event_id: Optional[str] = delta["event_id"] + prev_event_id: Optional[str] = delta["prev_event_id"] logger.debug("Handling: %r %r, %s", typ, state_key, event_id) @@ -302,8 +302,8 @@ async def _handle_room_publicity_change( async def _handle_room_membership_event( self, room_id: str, - prev_event_id: str, - event_id: str, + prev_event_id: Optional[str], + event_id: Optional[str], state_key: str, ) -> None: """Process a single room membershp event. @@ -353,7 +353,8 @@ async def _handle_room_membership_event( # Handle any profile changes for remote users. # (For local users the rest of the application calls # `handle_local_profile_change`.) - if is_remote: + # Only process if there is an event_id. + if is_remote and event_id is not None: await self._handle_possible_remote_profile_change( state_key, room_id, prev_event_id, event_id ) @@ -361,7 +362,8 @@ async def _handle_room_membership_event( # This may be the first time we've seen a remote user. If # so, ensure we have a directory entry for them. (For local users, # the rest of the application calls `handle_local_profile_change`.) - if is_remote: + # Only process if there is an event_id. + if is_remote and event_id is not None: await self._handle_possible_remote_profile_change( state_key, room_id, None, event_id )