diff --git a/threatingestor/sources/rss.py b/threatingestor/sources/rss.py index 029f435..cbe6d44 100644 --- a/threatingestor/sources/rss.py +++ b/threatingestor/sources/rss.py @@ -54,16 +54,17 @@ def run(self, saved_state): rss_exclude = re.sub(re.compile(fr"{self.exclude}", re.IGNORECASE), "", str(item.get('link'))) if rss_exclude: - if self.feed_type == "afterioc": - text = soup.get_text(separator=' ').split('Indicators of Compromise')[-1] - artifacts += self.process_element(text, item.get('link'), include_nonobfuscated=True) - elif self.feed_type == "clean": - text = soup.get_text(separator=' ') - artifacts += self.process_element(text, item.get('link'), include_nonobfuscated=True) - else: - # Default: self.feed_type == 'messy'. - text = soup.get_text(separator=' ') - artifacts += self.process_element(text, item.get('link')) + if "http" in rss_exclude: + if self.feed_type == "afterioc": + text = soup.get_text(separator=' ').split('Indicators of Compromise')[-1] + artifacts += self.process_element(text, item.get('link'), include_nonobfuscated=True) + elif self.feed_type == "clean": + text = soup.get_text(separator=' ') + artifacts += self.process_element(text, item.get('link'), include_nonobfuscated=True) + else: + # Default: self.feed_type == 'messy'. + text = soup.get_text(separator=' ') + artifacts += self.process_element(text, item.get('link')) if self.include is not None: rss_include = re.compile(r"{0}".format(self.include)).findall(str(self.include.split('|')))