From 28199e93579b5a73841a95ed4d355322227432b5 Mon Sep 17 00:00:00 2001 From: David Teller Date: Mon, 23 May 2022 19:27:39 +0200 Subject: [PATCH] Uniformize spam-checker API, part 2: check_event_for_spam (#12808) Signed-off-by: David Teller --- changelog.d/12808.feature | 1 + docs/modules/spam_checker_callbacks.md | 27 ++++++++------ docs/upgrade.md | 29 +++++++++++++++ synapse/api/errors.py | 4 +-- synapse/events/spamcheck.py | 49 ++++++++++++++++++++------ synapse/federation/federation_base.py | 5 +-- synapse/handlers/message.py | 11 +++--- synapse/module_api/__init__.py | 5 +++ synapse/module_api/errors.py | 2 ++ synapse/spam_checker_api/__init__.py | 27 +++++++++++++- 10 files changed, 129 insertions(+), 31 deletions(-) create mode 100644 changelog.d/12808.feature diff --git a/changelog.d/12808.feature b/changelog.d/12808.feature new file mode 100644 index 000000000000..561c8b9d34a4 --- /dev/null +++ b/changelog.d/12808.feature @@ -0,0 +1 @@ +Update to `check_event_for_spam`. Deprecate the current callback signature, replace it with a new signature that is both less ambiguous (replacing booleans with explicit allow/block) and more powerful (ability to return explicit error codes). \ No newline at end of file diff --git a/docs/modules/spam_checker_callbacks.md b/docs/modules/spam_checker_callbacks.md index 27c5a0ed5cfe..71f6f9f0ab45 100644 --- a/docs/modules/spam_checker_callbacks.md +++ b/docs/modules/spam_checker_callbacks.md @@ -11,22 +11,29 @@ The available spam checker callbacks are: ### `check_event_for_spam` _First introduced in Synapse v1.37.0_ +_Signature extended to support Allow and Code in Synapse v1.60.0_ +_Boolean and string return value types deprecated in Synapse v1.60.0_ ```python -async def check_event_for_spam(event: "synapse.events.EventBase") -> Union[bool, str] +async def check_event_for_spam(event: "synapse.module_api.EventBase") -> Union["synapse.module_api.ALLOW", "synapse.module_api.error.Codes", str, bool] ``` -Called when receiving an event from a client or via federation. The callback must return -either: -- an error message string, to indicate the event must be rejected because of spam and - give a rejection reason to forward to clients; -- the boolean `True`, to indicate that the event is spammy, but not provide further details; or -- the booelan `False`, to indicate that the event is not considered spammy. +Called when receiving an event from a client or via federation. The callback must return either: + - `synapse.module_api.ALLOW`, to allow the operation. Other callbacks + may still decide to reject it. + - `synapse.api.Codes` to reject the operation with an error code. In case + of doubt, `synapse.api.error.Codes.FORBIDDEN` is a good error code. + - (deprecated) a `str` to reject the operation and specify an error message. Note that clients + typically will not localize the error message to the user's preferred locale. + - (deprecated) on `False`, behave as `ALLOW`. Deprecated as confusing, as some + callbacks in expect `True` to allow and others `True` to reject. + - (deprecated) on `True`, behave as `synapse.api.error.Codes.FORBIDDEN`. Deprecated as confusing, as + some callbacks in expect `True` to allow and others `True` to reject. If multiple modules implement this callback, they will be considered in order. If a -callback returns `False`, Synapse falls through to the next one. The value of the first -callback that does not return `False` will be used. If this happens, Synapse will not call -any of the subsequent implementations of this callback. +callback returns `synapse.module_api.ALLOW`, Synapse falls through to the next one. The value of the +first callback that does not return `synapse.module_api.ALLOW` will be used. If this happens, Synapse +will not call any of the subsequent implementations of this callback. ### `user_may_join_room` diff --git a/docs/upgrade.md b/docs/upgrade.md index 92ca31b2f8de..e7eadadb64bf 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -177,7 +177,36 @@ has queries that can be used to check a database for this problem in advance. +## SpamChecker API's `check_event_for_spam` has a new signature. +The previous signature has been deprecated. + +Whereas `check_event_for_spam` callbacks used to return `Union[str, bool]`, they should now return `Union["synapse.module_api.Allow", "synapse.module_api.errors.Codes"]`. + +This is part of an ongoing refactoring of the SpamChecker API to make it less ambiguous and more powerful. + +If your module implements `check_event_for_spam` as follows: + +```python +async def check_event_for_spam(event): + if ...: + # Event is spam + return True + # Event is not spam + return False +``` + +you should rewrite it as follows: + +```python +async def check_event_for_spam(event): + if ...: + # Event is spam, mark it as forbidden (you may use some more precise error + # code if it is useful). + return synapse.module_api.errors.Codes.FORBIDDEN + # Event is not spam, mark it as `ALLOW`. + return synapse.module_api.ALLOW +``` # Upgrading to v1.59.0 diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 9614be6b4e46..6650e826d5af 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -270,9 +270,7 @@ class UnrecognizedRequestError(SynapseError): """An error indicating we don't understand the request you're trying to make""" def __init__( - self, - msg: str = "Unrecognized request", - errcode: str = Codes.UNRECOGNIZED, + self, msg: str = "Unrecognized request", errcode: str = Codes.UNRECOGNIZED ): super().__init__(400, msg, errcode) diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index 61bcbe2abe60..7984874e21df 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -27,9 +27,10 @@ Union, ) +from synapse.api.errors import Codes from synapse.rest.media.v1._base import FileInfo from synapse.rest.media.v1.media_storage import ReadableFileWrapper -from synapse.spam_checker_api import RegistrationBehaviour +from synapse.spam_checker_api import Allow, Decision, RegistrationBehaviour from synapse.types import RoomAlias, UserProfile from synapse.util.async_helpers import delay_cancellation, maybe_awaitable from synapse.util.metrics import Measure @@ -40,9 +41,19 @@ logger = logging.getLogger(__name__) + CHECK_EVENT_FOR_SPAM_CALLBACK = Callable[ ["synapse.events.EventBase"], - Awaitable[Union[bool, str]], + Awaitable[ + Union[ + Allow, + Codes, + # Deprecated + bool, + # Deprecated + str, + ] + ], ] SHOULD_DROP_FEDERATED_EVENT_CALLBACK = Callable[ ["synapse.events.EventBase"], @@ -259,7 +270,7 @@ def register_callbacks( async def check_event_for_spam( self, event: "synapse.events.EventBase" - ) -> Union[bool, str]: + ) -> Union[Decision, str]: """Checks if a given event is considered "spammy" by this server. If the server considers an event spammy, then it will be rejected if @@ -270,18 +281,36 @@ async def check_event_for_spam( event: the event to be checked Returns: - True or a string if the event is spammy. If a string is returned it - will be used as the error message returned to the user. + - on `ALLOW`, the event is considered good (non-spammy) and should + be let through. Other spamcheck filters may still reject it. + - on `Code`, the event is considered spammy and is rejected with a specific + error message/code. + - on `str`, the event is considered spammy and the string is used as error + message. This usage is generally discouraged as it doesn't support + internationalization. """ for callback in self._check_event_for_spam_callbacks: with Measure( self.clock, "{}.{}".format(callback.__module__, callback.__qualname__) ): - res: Union[bool, str] = await delay_cancellation(callback(event)) - if res: - return res - - return False + res: Union[Decision, str, bool] = await delay_cancellation( + callback(event) + ) + if res is False or res is Allow.ALLOW: + # This spam-checker accepts the event. + # Other spam-checkers may reject it, though. + continue + elif res is True: + # This spam-checker rejects the event with deprecated + # return value `True` + return Codes.FORBIDDEN + else: + # This spam-checker rejects the event either with a `str` + # or with a `Codes`. In either case, we stop here. + return res + + # No spam-checker has rejected the event, let it pass. + return Allow.ALLOW async def should_drop_federated_event( self, event: "synapse.events.EventBase" diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 41ac49fdc8bf..1e866b19d87b 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -15,6 +15,7 @@ import logging from typing import TYPE_CHECKING +import synapse from synapse.api.constants import MAX_DEPTH, EventContentFields, EventTypes, Membership from synapse.api.errors import Codes, SynapseError from synapse.api.room_versions import EventFormatVersions, RoomVersion @@ -98,9 +99,9 @@ async def _check_sigs_and_hash( ) return redacted_event - result = await self.spam_checker.check_event_for_spam(pdu) + spam_check = await self.spam_checker.check_event_for_spam(pdu) - if result: + if spam_check is not synapse.spam_checker_api.Allow.ALLOW: logger.warning("Event contains spam, soft-failing %s", pdu.event_id) # we redact (to save disk space) as well as soft-failing (to stop # using the event in prev_events). diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index e566ff1f8ed8..cb1bc4c06f1c 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -23,6 +23,7 @@ from twisted.internet.interfaces import IDelayedCall +import synapse from synapse import event_auth from synapse.api.constants import ( EventContentFields, @@ -885,11 +886,11 @@ async def create_and_send_nonmember_event( event.sender, ) - spam_error = await self.spam_checker.check_event_for_spam(event) - if spam_error: - if not isinstance(spam_error, str): - spam_error = "Spam is not permitted here" - raise SynapseError(403, spam_error, Codes.FORBIDDEN) + spam_check = await self.spam_checker.check_event_for_spam(event) + if spam_check is not synapse.spam_checker_api.Allow.ALLOW: + raise SynapseError( + 403, "This message had been rejected as probable spam", spam_check + ) ev = await self.handle_new_client_event( requester=requester, diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index c4f661bb9382..95f3b2792793 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -35,6 +35,7 @@ from twisted.internet import defer from twisted.web.resource import Resource +from synapse import spam_checker_api from synapse.api.errors import SynapseError from synapse.events import EventBase from synapse.events.presence_router import ( @@ -140,6 +141,9 @@ PRESENCE_ALL_USERS = PresenceRouter.ALL_USERS +ALLOW = spam_checker_api.Allow.ALLOW +# Singleton value used to mark a message as permitted. + __all__ = [ "errors", "make_deferred_yieldable", @@ -147,6 +151,7 @@ "respond_with_html", "run_in_background", "cached", + "Allow", "UserID", "DatabasePool", "LoggingTransaction", diff --git a/synapse/module_api/errors.py b/synapse/module_api/errors.py index e58e0e60feab..bedd045d6fe1 100644 --- a/synapse/module_api/errors.py +++ b/synapse/module_api/errors.py @@ -15,6 +15,7 @@ """Exception types which are exposed as part of the stable module API""" from synapse.api.errors import ( + Codes, InvalidClientCredentialsError, RedirectException, SynapseError, @@ -24,6 +25,7 @@ from synapse.storage.push_rule import RuleNotFoundException __all__ = [ + "Codes", "InvalidClientCredentialsError", "RedirectException", "SynapseError", diff --git a/synapse/spam_checker_api/__init__.py b/synapse/spam_checker_api/__init__.py index 73018f2d002e..95132c80b70e 100644 --- a/synapse/spam_checker_api/__init__.py +++ b/synapse/spam_checker_api/__init__.py @@ -12,13 +12,38 @@ # See the License for the specific language governing permissions and # limitations under the License. from enum import Enum +from typing import Union + +from synapse.api.errors import Codes class RegistrationBehaviour(Enum): """ - Enum to define whether a registration request should allowed, denied, or shadow-banned. + Enum to define whether a registration request should be allowed, denied, or shadow-banned. """ ALLOW = "allow" SHADOW_BAN = "shadow_ban" DENY = "deny" + + +# We define the following singleton enum rather than a string to be able to +# write `Union[Allow, ..., str]` in some of the callbacks for the spam-checker +# API, where the `str` is required to maintain backwards compatibility with +# previous versions of the API. +class Allow(Enum): + """ + Singleton to allow events to pass through in SpamChecker APIs. + """ + + ALLOW = "allow" + + +Decision = Union[Allow, Codes] +""" +Union to define whether a request should be allowed or rejected. + +To accept a request, return `ALLOW`. + +To reject a request without any specific information, use `Codes.FORBIDDEN`. +"""