diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml new file mode 100644 index 0000000..6f2a0a1 --- /dev/null +++ b/.github/workflows/workflow.yml @@ -0,0 +1,23 @@ +name: threatingestor-workflow + +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.6", "3.7"] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + pip install -r requirements.txt + pip install -r requirements-testing.txt + - name: Test scripts + run: nosetests --with-coverage --cover-package=threatingestor --cover-xml \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..61a3d5d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,14 @@ +# Comes with Python 3.6.9 installed by default +FROM ubuntu:18.04 + +RUN apt-get update +RUN apt-get install python3-pip -y +RUN apt-get install sqlite3 + +RUN pip3 install threatingestor \ + twitter \ + feedparser +COPY config.yml . + +# Run the ThreatIngestor without accessing /bin/bash container +CMD ["threatingestor", "config.yml"] \ No newline at end of file diff --git a/README.rst b/README.rst index 041b1dc..5b6bd24 100644 --- a/README.rst +++ b/README.rst @@ -6,16 +6,22 @@ ThreatIngestor :alt: Developed by InQuest .. image:: https://travis-ci.org/InQuest/ThreatIngestor.svg?branch=master :target: https://travis-ci.org/InQuest/ThreatIngestor - :alt: Build Status + :alt: Build Status (Travis CI) + +.. Change ?branch=develop to ?branch=master when merging into master +.. image:: https://github.com/InQuest/ThreatIngestor/workflows/threatingestor-workflow/badge.svg?branch=develop + :target: https://github.com/InQuest/ThreatIngestor/actions + :alt: Build Status (GitHub Workflow) + .. image:: https://readthedocs.org/projects/threatingestor/badge/?version=latest :target: http://inquest.readthedocs.io/projects/threatingestor/en/latest/?badge=latest :alt: Documentation Status -.. image:: https://api.codacy.com/project/badge/Grade/a989bb12e9604d5a9577ce71848e7a2a - :target: https://app.codacy.com/app/InQuest/ThreatIngestor - :alt: Code Health -.. image:: https://api.codacy.com/project/badge/Coverage/a989bb12e9604d5a9577ce71848e7a2a - :target: https://app.codacy.com/app/InQuest/ThreatIngestor - :alt: Test Coverage +.. .. image:: https://api.codacy.com/project/badge/Grade/a989bb12e9604d5a9577ce71848e7a2a +.. :target: https://app.codacy.com/app/InQuest/ThreatIngestor +.. :alt: Code Health +.. .. image:: https://api.codacy.com/project/badge/Coverage/a989bb12e9604d5a9577ce71848e7a2a +.. :target: https://app.codacy.com/app/InQuest/ThreatIngestor +.. :alt: Test Coverage .. image:: http://img.shields.io/pypi/v/ThreatIngestor.svg :target: https://pypi.python.org/pypi/ThreatIngestor :alt: PyPi Version @@ -74,6 +80,7 @@ Sources * `Beanstalk work queues `__ * `Git repositories `__ * `GitHub repository search `__ +* `Gists by username `__ * `RSS feeds `__ * `Amazon SQS queues `__ * `Twitter `__ @@ -127,3 +134,20 @@ Issues and pull requests are welcomed. Please keep Python code PEP8 compliant. B .. _ThreatIngestor walkthroughs: https://inquest.net/taxonomy/term/42 .. _RSS config file: https://github.com/InQuest/ThreatIngestor/blob/master/rss.example.yml .. _labs.inquest.net/iocdb: https://labs.inquest.net/iocdb + +Docker Container +------------ + +A Dockerfile is now available for running ThreatIngestor within a Docker container. + +First, you'll need to build the container:: + + docker build . -t threat + +After that, you can mount the container for use using this command:: + + docker run -it --mount type=bind,source=/,target=/dock threat /bin/bash + +After you've mounted the container, and you're inside of the `/bin/bash` shell, you can run the threatingestor like normal:: + + threatingestor config.yml \ No newline at end of file diff --git a/docs/api.rst b/docs/api.rst index ae9a149..c56404f 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -167,6 +167,15 @@ github :show-inheritance: :member-order: bysource +github_gist +^^^^^^ + +.. automodule:: threatingestor.sources.github_gist + :members: + :undoc-members: + :show-inheritance: + :member-order: bysource + rss ^^^ diff --git a/docs/sources/github.rst b/docs/sources/github.rst index 8e8f8d9..87c8457 100644 --- a/docs/sources/github.rst +++ b/docs/sources/github.rst @@ -10,9 +10,10 @@ Configuration Options ~~~~~~~~~~~~~~~~~~~~~ * ``module`` (required): ``github`` -* ``search`` (required): search term(s). -* ``username``: Optional username for authentication. -* ``token``: Optional token or password for authentication. +* ``search`` (required): Search term(s). +* ``username`` (optional): Username for authentication. +* ``token`` (optional): Token or password for authentication. +* ``num_of_days`` (optional): Search within a specific number of days since repository creation date. Example Configuration ~~~~~~~~~~~~~~~~~~~~~ @@ -41,5 +42,6 @@ Inside the ``sources`` section of your configuration file: credentials: github-auth module: github search: CVE-2018- + num_of_days: 60 .. _repository search API: https://developer.github.com/v3/search/#search-repositories diff --git a/docs/sources/github_gist.rst b/docs/sources/github_gist.rst new file mode 100644 index 0000000..20a3f14 --- /dev/null +++ b/docs/sources/github_gist.rst @@ -0,0 +1,44 @@ +.. _github-gist-source: + +GitHub Gist Username Search +------------------------ + +The **GitHub Gist** source plugin uses GitHub's `gist API`_ to find new gists created by a user, and create a :ref:`Task artifact ` for each. + +Configuration Options +~~~~~~~~~~~~~~~~~~~~~ + +* ``module`` (required): ``github_gist`` +* ``user`` (required): Username of the gist owner. +* ``username`` (optional): Username for authentication. +* ``token`` (optional): Token or password for authentication. + +Example Configuration +~~~~~~~~~~~~~~~~~~~~~ + +The following examples all assume GitHub credentials have already been +configured in the ``credentials`` section of the config, like this: + +.. code-block:: yaml + + credentials: + - name: github-auth + username: myuser + token: MYTOKEN + +.. note:: + + GitHub credentials are optional, but increase the rate limit for API + requests *significantly*. If you are doing more than one or two low- + volume searches, you should set up the credentials. + +Inside the ``sources`` section of your configuration file: + +.. code-block:: yaml + + - name: github-gist-search + credentials: github-auth + module: github_gist + user: InQuest + +.. _github gist user API: https://docs.github.com/en/rest/gists/gists#list-gists-for-a-user diff --git a/docs/workflows.rst b/docs/workflows.rst index c890c8b..386df25 100644 --- a/docs/workflows.rst +++ b/docs/workflows.rst @@ -394,6 +394,11 @@ And the ThreatIngestor config file: credentials: github-auth search: CVE-2018- + - name: github-gist-search + module: github_gist + credentials: github-auth + user: InQuest + - name: git-yara-rules module: git url: https://github.com/InQuest/yara-rules.git diff --git a/tests/test_sources_github.py b/tests/test_sources_github.py index 970f713..cfe8623 100644 --- a/tests/test_sources_github.py +++ b/tests/test_sources_github.py @@ -6,7 +6,6 @@ import threatingestor.sources.github - API_RESPONSE_DATA = """ { "total_count": 40, @@ -57,7 +56,7 @@ def setUp(self): @patch('threatingestor.sources.github.datetime') @responses.activate def test_run_returns_saved_state_tasks(self, mock_datetime): - responses.add(responses.GET, threatingestor.sources.github.SEARCH_URL, + responses.add(responses.GET, threatingestor.sources.github.REPO_SEARCH_URL, body=API_RESPONSE_DATA) mock_datetime.datetime.utcnow.return_value = datetime.datetime(2018, 4, 30, 17, 5, 13, 194840) mock_datetime.datetime.side_effect = lambda *args, **kw: datetime.datetime(*args, **kw) diff --git a/threatingestor/sources/github.py b/threatingestor/sources/github.py index 5af0e49..77f0544 100644 --- a/threatingestor/sources/github.py +++ b/threatingestor/sources/github.py @@ -1,34 +1,30 @@ -import datetime - - -import requests - +import datetime, requests from threatingestor.sources import Source import threatingestor.artifacts - -SEARCH_URL = "https://api.github.com/search/repositories" - +REPO_SEARCH_URL = "https://api.github.com/search/repositories" class Plugin(Source): """Github Source Plugin""" - def __init__(self, name, search, username="", token=""): + def __init__(self, name, search, num_of_days=10, username="", token=""): self.name = name self.search = search + self.num_of_days = num_of_days if username and token: self.auth = (username, token) else: self.auth = None - def _repository_search(self, params): """Returns a list of repository results.""" + # Iterates through pages of results from query. - response = requests.get(SEARCH_URL, params=params, auth=self.auth) + response = requests.get(REPO_SEARCH_URL, params=params, auth=self.auth) repo_list = [] + while True: for repo in response.json().get('items', []): repo_list.append(repo) @@ -37,35 +33,33 @@ def _repository_search(self, params): break response = requests.get( - response.links.get('next')["url"], - auth=self.auth) + response.links.get('next')["url"], auth=self.auth) return repo_list - def run(self, saved_state): """Returns a list of artifacts and the saved state""" # If no saved_state, search max 1 day ago. if not saved_state: - saved_state = (datetime.datetime.utcnow() - - datetime.timedelta(days=10)).isoformat()[:-7] + 'Z' + saved_state = (datetime.datetime.utcnow() - datetime.timedelta(days=self.num_of_days)).isoformat()[:-7] + 'Z' params = { 'q': "{search} created:>={timestamp}".format( search=self.search, timestamp=saved_state), - "per_page": "100"} + "per_page": "100" + } saved_state = datetime.datetime.utcnow().isoformat()[:-7] + 'Z' repo_list = self._repository_search(params) artifact_list = [] + for repo in repo_list: title = "Manual Task: GitHub {u}".format(u=repo['full_name']) description = 'URL: {u}\nTask autogenerated by ThreatIngestor from source: {s}' description = description.format(s=self.name, u=repo['html_url']) - artifact = threatingestor.artifacts.Task( - title, self.name, reference_link=repo['html_url'], reference_text=description) + artifact = threatingestor.artifacts.Task(title, self.name, reference_link=repo['html_url'], reference_text=description) artifact_list.append(artifact) diff --git a/threatingestor/sources/github_gist.py b/threatingestor/sources/github_gist.py new file mode 100644 index 0000000..516a70b --- /dev/null +++ b/threatingestor/sources/github_gist.py @@ -0,0 +1,52 @@ +import datetime, requests + +from threatingestor.sources import Source +import threatingestor.artifacts + +def user_set(user): + GIST_SEARCH_URL = "https://api.github.com/users/{0}/gists".format(user) + return GIST_SEARCH_URL + +class Plugin(Source): + """Github Gist Source Plugin""" + def __init__(self, name, user="", username="", token=""): + self.name = name + self.user = user + + if username and token: + self.auth = (username, token) + else: + self.auth = None + + def _gist_search(self, params): + """Returns a list of gist results.""" + + # Iterates through pages of results from query. + response = requests.get(user_set(self.user), params=params, auth=self.auth) + + gist_list = [] + + for gist in response.json(): + gist_list.append(gist) + + return gist_list + + def run(self, saved_state): + """Returns a list of artifacts and the saved state""" + + params = { "per_page": "100" } + + saved_state = datetime.datetime.utcnow().isoformat()[:-7] + 'Z' + gist_list = self._gist_search(params) + + artifact_list = [] + + for gist in gist_list: + title = "Gist Owner: {0}".format(self.user) + description = 'URL: {u}\nTask autogenerated by ThreatIngestor from source: {s}' + description = description.format(s=self.name, u=gist['html_url']) + artifact = threatingestor.artifacts.Task(title, self.name, reference_link=gist['html_url'], reference_text=description) + + artifact_list.append(artifact) + + return saved_state, artifact_list \ No newline at end of file diff --git a/threatingestor/sources/twitter.py b/threatingestor/sources/twitter.py index f12b0ed..cb22217 100644 --- a/threatingestor/sources/twitter.py +++ b/threatingestor/sources/twitter.py @@ -62,12 +62,19 @@ def run(self, saved_state): except TypeError: tweet_list = response - tweets = [{ - 'content': s.get('full_text', ''), - 'id': s.get('id_str', ''), - 'user': s.get('user', {}).get('screen_name', ''), - 'entities': s.get('entities', {}), - } for s in tweet_list] + tweets = [] + for tweet in tweet_list: + if "retweeted_status" in tweet: + content = tweet['retweeted_status'].get('full_text', '') + else: + content = tweet.get('full_text', '') + + tweets.append({ + 'content': content, + 'id': tweet.get('id_str', ''), + 'user': tweet.get('user', {}).get('screen_name', ''), + 'entities': tweet.get('entities', {}), + }) artifacts = [] # Traverse in reverse, old to new.