Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Pass back errors to the client when trying multiple federation destinations #9868

Merged
merged 9 commits into from
Apr 27, 2021
1 change: 1 addition & 0 deletions changelog.d/9868.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a long-standing bug where errors from federation did not propagate to the client.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The changes all look sensible, but I'm not entirely sure which errors which were previously swallowed are now propagated correctly. Do you have any examples?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well the rationale for the work is the 403 error in #9814 is now passed back to the client as a 403, instead of a 400.

Copy link
Member Author

@clokep clokep Apr 23, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I should probably mention that this is partially tested by matrix-org/complement#99.

118 changes: 60 additions & 58 deletions synapse/federation/federation_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,28 @@ async def get_event_auth(

return signed_auth

def _is_unknown_endpoint(
self, e: HttpResponseException, synapse_error: Optional[SynapseError] = None
) -> bool:
"""
Returns true if the response was due to an endpoint being unimplemented.

Args:
e: The error response received from the remote server.
synapse_error: The above error converted to a SynapseError. This is
automatically generated if not provided.

"""
if synapse_error is None:
synapse_error = e.to_synapse_error()
# There is no good way to detect an "unknown" endpoint.
#
# Dendrite returns a 404 (with no body); synapse returns a 400
# with M_UNRECOGNISED.
return e.code == 404 or (
e.code == 400 and synapse_error.errcode == Codes.UNRECOGNIZED
)

async def _try_destination_list(
self,
description: str,
Expand All @@ -468,9 +490,9 @@ async def _try_destination_list(
callback: Function to run for each server. Passed a single
argument: the server_name to try.

If the callback raises a CodeMessageException with a 300/400 code,
attempts to perform the operation stop immediately and the exception is
reraised.
If the callback raises a CodeMessageException with a 300/400 code or
an UnsupportedRoomVersionError, attempts to perform the operation
stop immediately and the exception is reraised.

Otherwise, if the callback raises an Exception the error is logged and the
next server tried. Normally the stacktrace is logged but this is
Expand All @@ -492,8 +514,7 @@ async def _try_destination_list(
continue

try:
res = await callback(destination)
return res
return await callback(destination)
except InvalidResponseError as e:
logger.warning("Failed to %s via %s: %s", description, destination, e)
except UnsupportedRoomVersionError:
Expand All @@ -502,17 +523,15 @@ async def _try_destination_list(
synapse_error = e.to_synapse_error()
failover = False

# Failover on an internal server error, or if the destination
# doesn't implemented the endpoint for some reason.
if 500 <= e.code < 600:
failover = True

elif failover_on_unknown_endpoint:
# there is no good way to detect an "unknown" endpoint. Dendrite
# returns a 404 (with no body); synapse returns a 400
# with M_UNRECOGNISED.
if e.code == 404 or (
e.code == 400 and synapse_error.errcode == Codes.UNRECOGNIZED
):
failover = True
elif failover_on_unknown_endpoint and self._is_unknown_endpoint(
e, synapse_error
):
failover = True

if not failover:
raise synapse_error from e
Expand Down Expand Up @@ -570,9 +589,8 @@ async def make_membership_event(
UnsupportedRoomVersionError: if remote responds with
a room version we don't understand.

SynapseError: if the chosen remote server returns a 300/400 code.

RuntimeError: if no servers were reachable.
SynapseError: if the chosen remote server returns a 300/400 code, or
no servers successfully handle the request.
"""
valid_memberships = {Membership.JOIN, Membership.LEAVE}
if membership not in valid_memberships:
Expand Down Expand Up @@ -642,9 +660,8 @@ async def send_join(
``auth_chain``.

Raises:
SynapseError: if the chosen remote server returns a 300/400 code.

RuntimeError: if no servers were reachable.
SynapseError: if the chosen remote server returns a 300/400 code, or
no servers successfully handle the request.
"""

async def send_request(destination) -> Dict[str, Any]:
Expand Down Expand Up @@ -673,7 +690,7 @@ async def send_request(destination) -> Dict[str, Any]:
if create_event is None:
# If the state doesn't have a create event then the room is
# invalid, and it would fail auth checks anyway.
raise SynapseError(400, "No create event in state")
raise InvalidResponseError("No create event in state")

# the room version should be sane.
create_room_version = create_event.content.get(
Expand Down Expand Up @@ -746,16 +763,11 @@ async def _do_send_join(self, destination: str, pdu: EventBase) -> JsonDict:
content=pdu.get_pdu_json(time_now),
)
except HttpResponseException as e:
if e.code in [400, 404]:
err = e.to_synapse_error()

# If we receive an error response that isn't a generic error, or an
# unrecognised endpoint error, we assume that the remote understands
# the v2 invite API and this is a legitimate error.
if err.errcode not in [Codes.UNKNOWN, Codes.UNRECOGNIZED]:
raise err
else:
raise e.to_synapse_error()
# If an error is received that is due to an unrecognised endpoint,
# fallback to the v1 endpoint. Otherwise consider it a legitmate error
# and raise.
if not self._is_unknown_endpoint(e):
raise

logger.debug("Couldn't send_join with the v2 API, falling back to the v1 API")

Expand Down Expand Up @@ -802,6 +814,11 @@ async def _do_send_invite(

Returns:
The event as a dict as returned by the remote server

Raises:
SynapseError: if the remote server returns an error or if the server
only supports the v1 endpoint and a room version other than "1"
or "2" is requested.
"""
time_now = self._clock.time_msec()

Expand All @@ -817,28 +834,19 @@ async def _do_send_invite(
},
)
except HttpResponseException as e:
if e.code in [400, 404]:
err = e.to_synapse_error()

# If we receive an error response that isn't a generic error, we
# assume that the remote understands the v2 invite API and this
# is a legitimate error.
if err.errcode != Codes.UNKNOWN:
raise err

# Otherwise, we assume that the remote server doesn't understand
# the v2 invite API. That's ok provided the room uses old-style event
# IDs.
# If an error is received that is due to an unrecognised endpoint,
# fallback to the v1 endpoint if the room uses old-style event IDs.
# Otherwise consider it a legitmate error and raise.
err = e.to_synapse_error()
if self._is_unknown_endpoint(e, err):
if room_version.event_format != EventFormatVersions.V1:
raise SynapseError(
400,
"User's homeserver does not support this room version",
Codes.UNSUPPORTED_ROOM_VERSION,
)
elif e.code in (403, 429):
raise e.to_synapse_error()
else:
raise
raise err
clokep marked this conversation as resolved.
Show resolved Hide resolved

# Didn't work, try v1 API.
# Note the v1 API returns a tuple of `(200, content)`
Expand All @@ -865,9 +873,8 @@ async def send_leave(self, destinations: Iterable[str], pdu: EventBase) -> None:
pdu: event to be sent

Raises:
SynapseError if the chosen remote server returns a 300/400 code.

RuntimeError if no servers were reachable.
SynapseError: if the chosen remote server returns a 300/400 code, or
no servers successfully handle the request.
"""

async def send_request(destination: str) -> None:
Expand All @@ -889,16 +896,11 @@ async def _do_send_leave(self, destination: str, pdu: EventBase) -> JsonDict:
content=pdu.get_pdu_json(time_now),
)
except HttpResponseException as e:
if e.code in [400, 404]:
err = e.to_synapse_error()

# If we receive an error response that isn't a generic error, or an
# unrecognised endpoint error, we assume that the remote understands
# the v2 invite API and this is a legitimate error.
if err.errcode not in [Codes.UNKNOWN, Codes.UNRECOGNIZED]:
raise err
else:
raise e.to_synapse_error()
# If an error is received that is due to an unrecognised endpoint,
# fallback to the v1 endpoint. Otherwise consider it a legitmate error
# and raise.
if not self._is_unknown_endpoint(e):
raise

logger.debug("Couldn't send_leave with the v2 API, falling back to the v1 API")

Expand Down