Skip to content

Commit

Permalink
Detecting bad files during upload/download, rather than while we're s…
Browse files Browse the repository at this point in the history
…ending messages.

This should be much more responsive for users.
  • Loading branch information
Yoric committed Feb 4, 2021
1 parent 79908cb commit f3c9aea
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 60 deletions.
50 changes: 16 additions & 34 deletions synapse_spamcheck_badlist/bad_list_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,43 +153,25 @@ async def check_event_for_spam(self, event) -> bool:
logger.info("Rejected bad link")
return True

# If it's a file, download content, extract hash.
with self._md5_check_performance.time():
if content.get("msgtype", "") in ["m.file", "m.image", "m.audio"]:
if not await self.can_we_check_md5():
return False

match = self._mxc_re.match(content.get("url", ""))
if match != None:
server_name = match.group('server_name')
media_id = match.group('media_id')
response = None
try:
url = "%s/_matrix/media/r0/download/%s/%s" % (
self._base_url,
urlquote(server_name),
urlquote(media_id)
)
response = await self._api.http_client.request("GET", url)
except Exception as e:
# In case of timeout or error, there's nothing we can do.
# Let's not take the risk of blocking valid contents.
logger.warn("Could not download media: '%s', assuming it's not spam." % e)
return False
if response.code == 429:
logger.warn("We were rate-limited, assuming it's not spam.")
return False

md5 = hashlib.md5()
await response.collect(lambda batch: md5.update(batch))
is_bad_upload = await self._api.run_db_interaction("Check upload against evil db", _db_is_bad_upload, self._md5_table, md5.hexdigest())
if is_bad_upload:
logger.info("Rejected bad upload")
return True

# Not spam
return False

async def check_media_file_for_spam(self, file_wrapper, file_info):
if await self.can_we_check_md5():
logger.info("Checking media file")
# Compute MD5 of file.
hasher = hashlib.md5()
await file_wrapper.write_chunks_to(hasher.update)

hex_digest = hasher.hexdigest()

# Check if it shows up in the db.
if await self._api.run_db_interaction("Check whether this md5 shows up in the database", _db_is_bad_upload, self._md5_table, hex_digest):
logger.info("Rejected bad media file")
return True

return False # allow all media

def check_username_for_spam(self, user_profile):
return False # allow all usernames

Expand Down
32 changes: 7 additions & 25 deletions test/4_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
import requests

logging.basicConfig(filename = "/data/test.log")
logger = logging.getLogger("Test")
logger = logging.getLogger("synapse_spamcheck_badlist.test")


class Test:
def __init__(self):
Expand Down Expand Up @@ -80,7 +81,7 @@ def _upload_content(self, prefix, content):
"""
Upload a file.
Argument `prefix` is prepended to the file name, to aid with lookup up
Argument `prefix` is prepended to the file name, to aid with looking up
stuff in the Synapse logs.
"""
response = requests.post('http://localhost:8080/_matrix/media/r0/upload?filename=%s-%s' % (prefix, uuid.uuid1()),
Expand All @@ -90,7 +91,7 @@ def _upload_content(self, prefix, content):
},
data = content
).json()
return response['content_uri']
return response.get('content_uri', None)

def _sync_with_server(self, since):
"""
Expand Down Expand Up @@ -182,9 +183,10 @@ def test(self):
good_mxid = self._upload_content('good', good_file_content)
logger.info('Good image is %s' % good_mxid)

logger.info('Upload a bad image, for the time being, it should be accepted')
logger.info('Upload a bad image, it should be rejected')
evil_mxid = self._upload_content('evil', evil_file_content)
logger.info('Bad image is %s' % evil_mxid)
assert evil_mxid is None


for message_type in ['m.file', 'm.image', 'm.audio']:
logger.info('Send good image with good description, it should be accepted')
Expand Down Expand Up @@ -223,26 +225,6 @@ def test(self):
# Message may be redacted later
bad_events[event_id] = "Good image with bad description, type %s" % message_type

logger.info('Send bad image with good description, it should be rejected')
event_id = self._send_message_to_room(
'bad-image-with-good-description',
{
'body': 'A text without any link',
'msgtype': message_type,
'url': evil_mxid,
'info': {
'w': 320,
'h': 200,
'size': len(evil_file_content),
}
}
)
if event_id is None:
logger.info('Message was rejected immediately')
else:
# Message may be redacted later
bad_events[event_id] = "Good image with bad description, type %s" % message_type

logger.info('Sending canary event, to ensure that all previous events have been flushed')
event_id = self._send_message_to_room(
'canary-event',
Expand Down
3 changes: 3 additions & 0 deletions test/before_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,7 @@

\rm -Rf synapse
git clone https://github.com/matrix-org/synapse.git
cd synapse
git checkout erikj/media_spam_checker
cd ..
docker build -t matrixdotorg/synapse -f synapse/docker/Dockerfile synapse
2 changes: 1 addition & 1 deletion test/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ register_new_matrix_user -c /data/homeserver.yaml -u user_2 -p user_2 --no-admin

# 4. Running test
echo TESTER: Running test
python /data/test/4_test.py
python /data/test/4_test.py &> /data/test.log
RESULT=$?

# 5. In case of failure, display logs
Expand Down

0 comments on commit f3c9aea

Please sign in to comment.