From 1ed237eb51d81e953182db34e08a6a6c489ae214 Mon Sep 17 00:00:00 2001 From: azazelm3dj3d <56496067+azazelm3dj3d@users.noreply.github.com> Date: Wed, 1 Nov 2023 11:45:38 -0500 Subject: [PATCH] Quick RSS fix --- threatingestor/sources/rss.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/threatingestor/sources/rss.py b/threatingestor/sources/rss.py index 029f435..cbe6d44 100644 --- a/threatingestor/sources/rss.py +++ b/threatingestor/sources/rss.py @@ -54,16 +54,17 @@ def run(self, saved_state): rss_exclude = re.sub(re.compile(fr"{self.exclude}", re.IGNORECASE), "", str(item.get('link'))) if rss_exclude: - if self.feed_type == "afterioc": - text = soup.get_text(separator=' ').split('Indicators of Compromise')[-1] - artifacts += self.process_element(text, item.get('link'), include_nonobfuscated=True) - elif self.feed_type == "clean": - text = soup.get_text(separator=' ') - artifacts += self.process_element(text, item.get('link'), include_nonobfuscated=True) - else: - # Default: self.feed_type == 'messy'. - text = soup.get_text(separator=' ') - artifacts += self.process_element(text, item.get('link')) + if "http" in rss_exclude: + if self.feed_type == "afterioc": + text = soup.get_text(separator=' ').split('Indicators of Compromise')[-1] + artifacts += self.process_element(text, item.get('link'), include_nonobfuscated=True) + elif self.feed_type == "clean": + text = soup.get_text(separator=' ') + artifacts += self.process_element(text, item.get('link'), include_nonobfuscated=True) + else: + # Default: self.feed_type == 'messy'. + text = soup.get_text(separator=' ') + artifacts += self.process_element(text, item.get('link')) if self.include is not None: rss_include = re.compile(r"{0}".format(self.include)).findall(str(self.include.split('|')))