Skip to content

Commit

Permalink
Merge pull request #133 from InQuest/dev
Browse files Browse the repository at this point in the history
v1.0.3
  • Loading branch information
Trevor authored Feb 20, 2023
2 parents 85f9cc5 + 5caf299 commit 9c541e5
Show file tree
Hide file tree
Showing 13 changed files with 127 additions and 28 deletions.
33 changes: 31 additions & 2 deletions config.example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,38 @@ sources:
url: https://inquest.net/blog/rss
feed_type: messy

- name: inquest-blog
# Sitemap exmaples

# Searches for "articles" keyword
- name: inquest-sitemap-articles
module: sitemap
url: https://www.inquest.net/sitemap.xml
filter: articles

# Defaults to "blog" keyword
- name: inquest-sitemap-blog
module: sitemap
url: https://www.inquest.net/sitemap.xml

# Searches for "articles or security" keywords
- name: inquest-sitemap-blog-articles-security
module: sitemap
url: https://www.inquest.net/sitemap.xml
filter: articles|security

# Specify directories in the filter
- name: inquest-sitemap-blog-category
module: sitemap
url: https://www.inquest.net/sitemap.xml
path: /blog/category/

# Specify filtering for paths
# Only returns results under /blog/category/release|solutions
- name: inquest-sitemap-release-solutions
module: sitemap
url: https://inquest.net/sitemap.xml
url: https://www.inquest.net/sitemap.xml
path: /blog/category/
filter: release|solutions

- name: image-extraction
module: image
Expand Down
Binary file added docs/_static/logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
27 changes: 22 additions & 5 deletions docs/_templates/links.html
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@ <h3>Other Projects</h3>
<ul>
<li><a href="https://github.com/InQuest/ThreatKB">ThreatKB</a></li>
<li><a href="https://github.com/InQuest/python-iocextract">iocextract</a></li>
<li><a href="https://github.com/InQuest/omnibus">Omnibus</a></li>
<li><a href="https://github.com/InQuest/python-sandboxapi">sandboxapi</a></li>
<li><a href="https://github.com/InQuest/python-inquestlabs">inquestlabs
</a></li>
</ul>

<h3>Useful Links</h3>
Expand All @@ -46,24 +47,40 @@ <h3>Useful Links</h3>
<h3>Stay Informed</h3>

<ul>
<li>
<a href="https://labs.inquest.net/">
<img src="_static/logo.png" width="4%" alt="InQuest logo">
InQuest Labs
</a>
</li>
<li>
<a href="https://inquest.net/newsletter">
<img src="_static/logo.png" width="4%" alt="InQuest logo">
InQuest Newsletter
</a>
</li>
<li>
<a href="http://blog.inquest.net/">
<svg class="icon icon-rss"><use xlink:href="#icon-rss"></use></svg> InQuest Blog
<svg class="icon icon-rss"><use xlink:href="#icon-rss"></use></svg>
InQuest Blog
</a>
</li>
<li>
<a href="https://twitter.com/InQuest">
<svg class="icon icon-twitter"><use xlink:href="#icon-twitter"></use></svg> Twitter
<svg class="icon icon-twitter"><use xlink:href="#icon-twitter"></use></svg>
Twitter
</a>
</li>
<li>
<a href="https://www.linkedin.com/company/inquest.net">
<svg class="icon icon-linkedin"><use xlink:href="#icon-linkedin"></use></svg> LinkedIn
<svg class="icon icon-linkedin"><use xlink:href="#icon-linkedin"></use></svg>
LinkedIn
</a>
</li>
<li>
<a href="https://github.com/InQuest">
<svg class="icon icon-github"><use xlink:href="#icon-github"></use></svg> GitHub
<svg class="icon icon-github"><use xlink:href="#icon-github"></use></svg>
GitHub
</a>
</li>
</ul>
2 changes: 1 addition & 1 deletion docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ github
:member-order: bysource

github_gist
^^^^^^
^^^^^^^^^^^

.. automodule:: threatingestor.sources.github_gist
:members:
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# -- Project information -----------------------------------------------------

project = u'ThreatIngestor'
copyright = u'2019 InQuest, LLC'
copyright = u'2019 - 2023 InQuest, LLC'
author = u'InQuest Labs'

# The short X.Y version
Expand Down
4 changes: 2 additions & 2 deletions docs/sources/github_gist.rst
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
.. _github-gist-source:

GitHub Gist Username Search
------------------------
---------------------------

The **GitHub Gist** source plugin uses GitHub's `gist API`_ to find new gists created by a user, and create a :ref:`Task artifact <task-artifact>` for each.
The **GitHub Gist** source plugin uses GitHub's gist API to find new gists created by a user, and create a :ref:`Task artifact <task-artifact>` for each.

Configuration Options
~~~~~~~~~~~~~~~~~~~~~
Expand Down
1 change: 1 addition & 0 deletions requirements-testing.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ twitter>=1.17.1
feedparser>=5.2.1
threatkb
pymysql
pyshorteners
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ requests
pytesseract
numpy
opencv-python
pyshorteners
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

setup(
name='threatingestor',
version='1.0.2',
version='1.0.3',
include_package_data=True,
install_requires=requires,
extras_require={
Expand Down
39 changes: 34 additions & 5 deletions threatingestor/sources/sitemap.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@
import urllib.request
from urllib.parse import urlparse
from bs4 import BeautifulSoup
import regex as re

from threatingestor.sources import Source

class Plugin(Source):

def __init__(self, name, url):
def __init__(self, name, url, filter=None, path=None):
self.name = name
self.url = url
self.filter = filter
self.path = path

def run(self, saved_state):
saved_state = datetime.datetime.utcnow().isoformat()[:-7] + "Z"
Expand Down Expand Up @@ -58,9 +61,35 @@ def run(self, saved_state):
"loc": loc
}

# Locates all blog links within the sitemap
if "blog" in row["loc"]:
print(row["loc"])
artifacts += self.process_element(row["loc"], self.url)
if self.filter is not None:
# Regex input via config.yml
xml_query = re.compile(r"{0}".format(self.filter)).findall(str(self.filter.split('|')))

# Iterates over the regex output to locate all provided keywords
for x in xml_query:
# Uses a path instead of a keyword
if self.path is not None:
provided_path = f"{self.path}{x}"

if provided_path in row["loc"]:
artifacts += self.process_element(row["loc"], self.url)

# Only filters using a keyword
if self.path is None:
if x in row["loc"]:
artifacts += self.process_element(row["loc"], self.url)

elif self.filter is None and self.path is not None:
# Filters only by path in XML loc, no set filter
# Default: /path/name/*
provided_path = f"{self.path}"

if provided_path in row["loc"]:
artifacts += self.process_element(row["loc"], self.url)

else:
# Locates all blog links within the sitemap
if "blog" in row["loc"]:
artifacts += self.process_element(row["loc"], self.url)

return saved_state, artifacts
8 changes: 3 additions & 5 deletions threatingestor/sources/twitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,11 @@
import twitter
from loguru import logger


from threatingestor.sources import Source

from threatingestor.utils.url_controller import UrlController

TWEET_URL = 'https://twitter.com/{user}/status/{id}'


class Plugin(Source):

def __init__(self, name, api_key, api_secret_key, access_token, access_token_secret, defanged_only=True, **kwargs):
Expand Down Expand Up @@ -85,8 +83,8 @@ def run(self, saved_state):
try:
tweet['content'] = tweet['content'].replace(url['url'], url['expanded_url'])
except KeyError:
# No url/expanded_url, continue without expanding.
pass
# Attempts to expand the URL if not available through Twitter
tweet['content'] = tweet['content'].replace(url['url'], UrlController.expand_url(url['url']))

# Process tweet.
saved_state = tweet['id']
Expand Down
10 changes: 4 additions & 6 deletions threatingestor/sources/twitter_follow_links.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
from __future__ import absolute_import


import re
import requests
import twitter
from loguru import logger
from threatingestor.sources import Source

from threatingestor.sources import Source
from threatingestor.utils.url_controller import UrlController

TWEET_URL = 'https://twitter.com/{user}/status/{id}'

WHITELIST_DOMAINS = r"pastebin\.com"


class Plugin(Source):

def __init__(self, name, api_key, api_secret_key, access_token, access_token_secret, defanged_only=True, **kwargs):
Expand Down Expand Up @@ -97,8 +95,8 @@ def run(self, saved_state):
logger.log('NOTIFY', f"Discovered paste: {location}")

except KeyError:
# No url/expanded_url, continue without expanding.
pass
# Attempts to expand the URL if not available through Twitter
tweet['content'] = tweet['content'].replace(url['url'], UrlController.expand_url(url['url']))

return saved_state, artifacts

Expand Down
26 changes: 26 additions & 0 deletions threatingestor/utils/url_controller.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""
This script is a standalone utility available to all sources. Currently, this script is only being used for expanding Twitter (t.co) shorteners but can be integrated into other places where URLs are ingested.
"""

from pyshorteners import Shortener, exceptions

s = Shortener()

class UrlController:
def expand_url(url):
"""
Expand ingested URLs with this method.
If a URL is unknown or cannot be expanded, you'll get the original URL back.
@param: url (Example: https://inquest.net)
@rtype: str
"""

try:
expanded_link = s.tinyurl.expand(url)
return str(expanded_link)
# If unable to expand the URL, this exception is thrown
except exceptions.ExpandingErrorException:
return str(url)

0 comments on commit 9c541e5

Please sign in to comment.