diff --git a/src/tribler/core/sentry_reporter/sentry_scrubber.py b/src/tribler/core/sentry_reporter/sentry_scrubber.py index a3efa7dd404..873adf9f404 100644 --- a/src/tribler/core/sentry_reporter/sentry_scrubber.py +++ b/src/tribler/core/sentry_reporter/sentry_scrubber.py @@ -172,17 +172,22 @@ def scrub_entity_recursively(self, entity: Union[str, Dict, List, Any], depth=10 if isinstance(entity, dict): result = {} for key, value in entity.items(): - if key in self.dict_keys_for_scrub: + if key in self.dict_keys_for_scrub and isinstance(value, str): value = value.strip() fake_value = obfuscate_string(value) placeholder = self.create_placeholder(fake_value) self.add_sensitive_pair(value, placeholder) - result[key] = self.scrub_entity_recursively(value, depth) + result[key] = placeholder + else: + result[key] = self.scrub_entity_recursively(value, depth) return result return entity def add_sensitive_pair(self, text, placeholder): + if not (text and text.strip()): # We should not replace empty substrings in the middle of other strings + return + if text in self.sensitive_occurrences: return diff --git a/src/tribler/core/sentry_reporter/sentry_tools.py b/src/tribler/core/sentry_reporter/sentry_tools.py index 1cbdf8ebe52..8c39ccf4e7d 100644 --- a/src/tribler/core/sentry_reporter/sentry_tools.py +++ b/src/tribler/core/sentry_reporter/sentry_tools.py @@ -200,9 +200,6 @@ def obfuscate_string(s: str, part_of_speech: str = 'noun') -> str: The same random words will be generated for the same given strings. """ - if not s: - return s - faker = Faker(locale='en_US') faker.seed_instance(s) return faker.word(part_of_speech=part_of_speech) diff --git a/src/tribler/core/sentry_reporter/tests/test_sentry_scrubber.py b/src/tribler/core/sentry_reporter/tests/test_sentry_scrubber.py index 6e37651a914..e1bd6f3752f 100644 --- a/src/tribler/core/sentry_reporter/tests/test_sentry_scrubber.py +++ b/src/tribler/core/sentry_reporter/tests/test_sentry_scrubber.py @@ -213,16 +213,16 @@ def test_scrub_event(scrubber): } assert scrubber.scrub_event(event) == { 'the very first item': '', - 'server_name': '', + 'server_name': '', CONTEXTS: { REPORTER: { 'any': { - 'USERNAME': '', + 'USERNAME': '', 'USERDOMAIN_ROAMINGPROFILE': '', 'PATH': '/users//apps', 'TMP_WIN': 'C:\\Users\\\\AppData\\Local\\Temp', - 'USERDOMAIN': '', - 'COMPUTERNAME': '', + 'USERDOMAIN': '', + 'COMPUTERNAME': '', }, STACKTRACE: [ 'Traceback (most recent call last):', @@ -301,15 +301,20 @@ def test_scrub_dict(scrubber): assert scrubber.scrub_entity_recursively(None) is None assert scrubber.scrub_entity_recursively({}) == {} - given = {'PATH': '/home/username/some/', 'USERDOMAIN': 'UD', 'USERNAME': 'U', 'REPEATED': 'user username UD U'} + assert scrubber.scrub_entity_recursively({'key': [1]}) == {'key': [1]} # non-string values should not lead to error + + given = {'PATH': '/home/username/some/', 'USERDOMAIN': 'UD', 'USERNAME': 'U', 'REPEATED': 'user username UD U', + 'key': ''} assert scrubber.scrub_entity_recursively(given) == {'PATH': '/home//some/', 'REPEATED': 'user ', 'USERDOMAIN': '', - 'USERNAME': ''} + 'USERNAME': '', + 'key': ''} - assert 'username' in scrubber.sensitive_occurrences.keys() - assert 'UD' in scrubber.sensitive_occurrences.keys() - assert 'U' in scrubber.sensitive_occurrences.keys() + assert 'username' in scrubber.sensitive_occurrences + assert 'UD' in scrubber.sensitive_occurrences + assert 'U' in scrubber.sensitive_occurrences + assert '' not in scrubber.sensitive_occurrences def test_scrub_list(scrubber): diff --git a/src/tribler/core/sentry_reporter/tests/test_sentry_tools.py b/src/tribler/core/sentry_reporter/tests/test_sentry_tools.py index 4812a157ddc..037821cadb7 100644 --- a/src/tribler/core/sentry_reporter/tests/test_sentry_tools.py +++ b/src/tribler/core/sentry_reporter/tests/test_sentry_tools.py @@ -144,8 +144,7 @@ def test_extract_dict(): OBFUSCATED_STRINGS = [ - (None, None), - ('', ''), + ('', 'dress'), ('any', 'challenge'), ('string', 'quality'), ]