This repository has been archived by the owner on Apr 26, 2024. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Instrument the federation/backfill part of /messages
#13489
Merged
MadLittleMods
merged 16 commits into
develop
from
madlittlemods/instrument-federation-part-of-messages
Aug 16, 2022
Merged
Changes from all commits
Commits
Show all changes
16 commits
Select commit
Hold shift + click to select a range
aeaa36d
Instrument the federation/backfill part of /messages
MadLittleMods 6a389cd
Add changelog
MadLittleMods eb20203
Also explain to append
MadLittleMods e3c2e11
Make sure to re-use the tags for @tag_args
MadLittleMods caa5ee9
`Collection` for multiple iterable readble list
MadLittleMods 1f4911b
Record exception in span
MadLittleMods e564f7a
Refactor recursive code so we can wrap just the redaction part
MadLittleMods ac1b8d5
Separate tag for event length
MadLittleMods 878d9ce
Fix missing .length
MadLittleMods af7ec77
Fix typo
MadLittleMods 92e3e6a
Use while recursion
MadLittleMods 799c3d5
Refactor back to iterative function
MadLittleMods abb1383
Merge branch 'develop' into madlittlemods/instrument-federation-part-…
MadLittleMods 43eab68
Update changelog to stand on its own since it can't merge anymore
MadLittleMods 047e2bb
Merge branch 'develop' into madlittlemods/instrument-federation-part-…
MadLittleMods 1c4d8bb
Merge branch 'madlittlemods/instrument-federation-part-of-messages' o…
MadLittleMods File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Instrument the federation/backfill part of `/messages` for understandable traces in Jaeger. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -59,7 +59,13 @@ | |
from synapse.events.snapshot import EventContext | ||
from synapse.federation.federation_client import InvalidResponseError | ||
from synapse.logging.context import nested_logging_context | ||
from synapse.logging.opentracing import trace | ||
from synapse.logging.opentracing import ( | ||
SynapseTags, | ||
set_tag, | ||
start_active_span, | ||
tag_args, | ||
trace, | ||
) | ||
from synapse.metrics.background_process_metrics import run_as_background_process | ||
from synapse.replication.http.devices import ReplicationUserDevicesResyncRestServlet | ||
from synapse.replication.http.federation import ( | ||
|
@@ -410,6 +416,7 @@ async def check_join_restrictions( | |
prev_member_event, | ||
) | ||
|
||
@trace | ||
async def process_remote_join( | ||
self, | ||
origin: str, | ||
|
@@ -715,7 +722,7 @@ async def _get_missing_events_for_pdu( | |
|
||
@trace | ||
async def _process_pulled_events( | ||
self, origin: str, events: Iterable[EventBase], backfilled: bool | ||
self, origin: str, events: Collection[EventBase], backfilled: bool | ||
) -> None: | ||
"""Process a batch of events we have pulled from a remote server | ||
|
||
|
@@ -730,6 +737,15 @@ async def _process_pulled_events( | |
backfilled: True if this is part of a historical batch of events (inhibits | ||
notification to clients, and validation of device keys.) | ||
""" | ||
set_tag( | ||
SynapseTags.FUNC_ARG_PREFIX + "event_ids", | ||
str([event.event_id for event in events]), | ||
) | ||
set_tag( | ||
SynapseTags.FUNC_ARG_PREFIX + "event_ids.length", | ||
str(len(events)), | ||
) | ||
set_tag(SynapseTags.FUNC_ARG_PREFIX + "backfilled", str(backfilled)) | ||
logger.debug( | ||
"processing pulled backfilled=%s events=%s", | ||
backfilled, | ||
|
@@ -753,6 +769,7 @@ async def _process_pulled_events( | |
await self._process_pulled_event(origin, ev, backfilled=backfilled) | ||
|
||
@trace | ||
@tag_args | ||
async def _process_pulled_event( | ||
self, origin: str, event: EventBase, backfilled: bool | ||
) -> None: | ||
|
@@ -854,6 +871,7 @@ async def _process_pulled_event( | |
else: | ||
raise | ||
|
||
@trace | ||
async def _compute_event_context_with_maybe_missing_prevs( | ||
self, dest: str, event: EventBase | ||
) -> EventContext: | ||
|
@@ -970,6 +988,8 @@ async def _compute_event_context_with_maybe_missing_prevs( | |
event, state_ids_before_event=state_map, partial_state=partial_state | ||
) | ||
|
||
@trace | ||
@tag_args | ||
async def _get_state_ids_after_missing_prev_event( | ||
self, | ||
destination: str, | ||
|
@@ -1009,10 +1029,10 @@ async def _get_state_ids_after_missing_prev_event( | |
logger.debug("Fetching %i events from cache/store", len(desired_events)) | ||
have_events = await self._store.have_seen_events(room_id, desired_events) | ||
|
||
missing_desired_events = desired_events - have_events | ||
missing_desired_event_ids = desired_events - have_events | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some (doesn't fix all of the cases in this function) |
||
logger.debug( | ||
"We are missing %i events (got %i)", | ||
len(missing_desired_events), | ||
len(missing_desired_event_ids), | ||
len(have_events), | ||
) | ||
|
||
|
@@ -1024,13 +1044,30 @@ async def _get_state_ids_after_missing_prev_event( | |
# already have a bunch of the state events. It would be nice if the | ||
# federation api gave us a way of finding out which we actually need. | ||
|
||
missing_auth_events = set(auth_event_ids) - have_events | ||
missing_auth_events.difference_update( | ||
await self._store.have_seen_events(room_id, missing_auth_events) | ||
missing_auth_event_ids = set(auth_event_ids) - have_events | ||
missing_auth_event_ids.difference_update( | ||
await self._store.have_seen_events(room_id, missing_auth_event_ids) | ||
) | ||
logger.debug("We are also missing %i auth events", len(missing_auth_events)) | ||
logger.debug("We are also missing %i auth events", len(missing_auth_event_ids)) | ||
|
||
missing_events = missing_desired_events | missing_auth_events | ||
missing_event_ids = missing_desired_event_ids | missing_auth_event_ids | ||
|
||
set_tag( | ||
SynapseTags.RESULT_PREFIX + "missing_auth_event_ids", | ||
str(missing_auth_event_ids), | ||
) | ||
set_tag( | ||
SynapseTags.RESULT_PREFIX + "missing_auth_event_ids.length", | ||
str(len(missing_auth_event_ids)), | ||
) | ||
set_tag( | ||
SynapseTags.RESULT_PREFIX + "missing_desired_event_ids", | ||
str(missing_desired_event_ids), | ||
) | ||
set_tag( | ||
SynapseTags.RESULT_PREFIX + "missing_desired_event_ids.length", | ||
str(len(missing_desired_event_ids)), | ||
) | ||
|
||
# Making an individual request for each of 1000s of events has a lot of | ||
# overhead. On the other hand, we don't really want to fetch all of the events | ||
|
@@ -1041,13 +1078,13 @@ async def _get_state_ids_after_missing_prev_event( | |
# | ||
# TODO: might it be better to have an API which lets us do an aggregate event | ||
# request | ||
if (len(missing_events) * 10) >= len(auth_event_ids) + len(state_event_ids): | ||
if (len(missing_event_ids) * 10) >= len(auth_event_ids) + len(state_event_ids): | ||
logger.debug("Requesting complete state from remote") | ||
await self._get_state_and_persist(destination, room_id, event_id) | ||
else: | ||
logger.debug("Fetching %i events from remote", len(missing_events)) | ||
logger.debug("Fetching %i events from remote", len(missing_event_ids)) | ||
await self._get_events_and_persist( | ||
destination=destination, room_id=room_id, event_ids=missing_events | ||
destination=destination, room_id=room_id, event_ids=missing_event_ids | ||
) | ||
|
||
# We now need to fill out the state map, which involves fetching the | ||
|
@@ -1104,6 +1141,14 @@ async def _get_state_ids_after_missing_prev_event( | |
event_id, | ||
failed_to_fetch, | ||
) | ||
set_tag( | ||
SynapseTags.RESULT_PREFIX + "failed_to_fetch", | ||
str(failed_to_fetch), | ||
) | ||
set_tag( | ||
SynapseTags.RESULT_PREFIX + "failed_to_fetch.length", | ||
str(len(failed_to_fetch)), | ||
) | ||
|
||
if remote_event.is_state() and remote_event.rejected_reason is None: | ||
state_map[ | ||
|
@@ -1112,6 +1157,8 @@ async def _get_state_ids_after_missing_prev_event( | |
|
||
return state_map | ||
|
||
@trace | ||
@tag_args | ||
async def _get_state_and_persist( | ||
self, destination: str, room_id: str, event_id: str | ||
) -> None: | ||
|
@@ -1133,6 +1180,7 @@ async def _get_state_and_persist( | |
destination=destination, room_id=room_id, event_ids=(event_id,) | ||
) | ||
|
||
@trace | ||
async def _process_received_pdu( | ||
self, | ||
origin: str, | ||
|
@@ -1283,6 +1331,7 @@ async def _resync_device(self, sender: str) -> None: | |
except Exception: | ||
logger.exception("Failed to resync device for %s", sender) | ||
|
||
@trace | ||
async def _handle_marker_event(self, origin: str, marker_event: EventBase) -> None: | ||
"""Handles backfilling the insertion event when we receive a marker | ||
event that points to one. | ||
|
@@ -1414,6 +1463,8 @@ async def backfill_event_id( | |
|
||
return event_from_response | ||
|
||
@trace | ||
@tag_args | ||
async def _get_events_and_persist( | ||
self, destination: str, room_id: str, event_ids: Collection[str] | ||
) -> None: | ||
|
@@ -1459,6 +1510,7 @@ async def get_event(event_id: str) -> None: | |
logger.info("Fetched %i events of %i requested", len(events), len(event_ids)) | ||
await self._auth_and_persist_outliers(room_id, events) | ||
|
||
@trace | ||
async def _auth_and_persist_outliers( | ||
self, room_id: str, events: Iterable[EventBase] | ||
) -> None: | ||
|
@@ -1477,6 +1529,16 @@ async def _auth_and_persist_outliers( | |
""" | ||
event_map = {event.event_id: event for event in events} | ||
|
||
event_ids = event_map.keys() | ||
set_tag( | ||
SynapseTags.FUNC_ARG_PREFIX + "event_ids", | ||
str(event_ids), | ||
) | ||
set_tag( | ||
SynapseTags.FUNC_ARG_PREFIX + "event_ids.length", | ||
str(len(event_ids)), | ||
) | ||
|
||
# filter out any events we have already seen. This might happen because | ||
# the events were eagerly pushed to us (eg, during a room join), or because | ||
# another thread has raced against us since we decided to request the event. | ||
|
@@ -1593,6 +1655,7 @@ async def prep(event: EventBase) -> None: | |
backfilled=True, | ||
) | ||
|
||
@trace | ||
async def _check_event_auth( | ||
self, origin: Optional[str], event: EventBase, context: EventContext | ||
) -> None: | ||
|
@@ -1631,6 +1694,14 @@ async def _check_event_auth( | |
claimed_auth_events = await self._load_or_fetch_auth_events_for_event( | ||
origin, event | ||
) | ||
set_tag( | ||
SynapseTags.RESULT_PREFIX + "claimed_auth_events", | ||
str([ev.event_id for ev in claimed_auth_events]), | ||
) | ||
set_tag( | ||
SynapseTags.RESULT_PREFIX + "claimed_auth_events.length", | ||
str(len(claimed_auth_events)), | ||
) | ||
|
||
# ... and check that the event passes auth at those auth events. | ||
# https://spec.matrix.org/v1.3/server-server-api/#checks-performed-on-receipt-of-a-pdu: | ||
|
@@ -1728,6 +1799,7 @@ async def _check_event_auth( | |
) | ||
context.rejected = RejectedReason.AUTH_ERROR | ||
|
||
@trace | ||
async def _maybe_kick_guest_users(self, event: EventBase) -> None: | ||
if event.type != EventTypes.GuestAccess: | ||
return | ||
|
@@ -1935,6 +2007,8 @@ async def _load_or_fetch_auth_events_for_event( | |
# instead we raise an AuthError, which will make the caller ignore it. | ||
raise AuthError(code=HTTPStatus.FORBIDDEN, msg="Auth events could not be found") | ||
|
||
@trace | ||
@tag_args | ||
async def _get_remote_auth_chain_for_event( | ||
self, destination: str, room_id: str, event_id: str | ||
) -> None: | ||
|
@@ -1963,6 +2037,7 @@ async def _get_remote_auth_chain_for_event( | |
|
||
await self._auth_and_persist_outliers(room_id, remote_auth_events) | ||
|
||
@trace | ||
async def _run_push_actions_and_persist_event( | ||
self, event: EventBase, context: EventContext, backfilled: bool = False | ||
) -> None: | ||
|
@@ -2071,8 +2146,17 @@ async def persist_events_and_notify( | |
self._message_handler.maybe_schedule_expiry(event) | ||
|
||
if not backfilled: # Never notify for backfilled events | ||
for event in events: | ||
await self._notify_persisted_event(event, max_stream_token) | ||
with start_active_span("notify_persisted_events"): | ||
set_tag( | ||
SynapseTags.RESULT_PREFIX + "event_ids", | ||
str([ev.event_id for ev in events]), | ||
) | ||
set_tag( | ||
SynapseTags.RESULT_PREFIX + "event_ids.length", | ||
str(len(events)), | ||
) | ||
for event in events: | ||
await self._notify_persisted_event(event, max_stream_token) | ||
|
||
return max_stream_token.stream | ||
|
||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We could make
set_tag
smart enough to accept aCollection
and do this extralength
tag for us.