Skip to content

Commit

Permalink
Completed ioc image extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
battleoverflow committed Jan 24, 2023
1 parent b089a32 commit 126eb85
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 23 deletions.
17 changes: 0 additions & 17 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,6 @@ on:
- master

jobs:
pre-deploy:
runs-on: ubuntu-20.04

steps:
- name: Install dependencies
run: |
pip install setuptools build
pip install -r requirements.txt
pip install -r requirements-testing.txt
- name: Building package
run: python -m build
- name: Publishing package to Test PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
password: ${{ secrets.TEST_PYPI_API_TOKEN }}
repository_url: https://test.pypi.org/legacy/

deploy:
runs-on: ubuntu-20.04

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ jobs:
runs-on: ubuntu-20.04
strategy:
matrix:
python-version: ["3.6", "3.7", "3.8", "3.9"]
python-version: ["3.6", "3.7", "3.8"]

steps:
- uses: actions/checkout@v3
Expand Down
12 changes: 8 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@ FROM ubuntu:18.04
RUN apt-get update
RUN apt-get install python3-pip -y
RUN apt-get install sqlite3
RUN apt-get install tesseract-ocr -y

RUN pip3 install --upgrade pip
RUN pip3 install threatingestor \
twitter \
feedparser
COPY config.yml .
feedparser \
iocextract \
pytesseract \
numpy \
opencv-python

# Run the ThreatIngestor without accessing /bin/bash container
CMD ["threatingestor", "config.yml"]
COPY config.yml .
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

setup(
name='threatingestor',
version='1.0.0-beta8',
version='1.0.1',
include_package_data=True,
install_requires=requires,
extras_require={
Expand Down
62 changes: 62 additions & 0 deletions threatingestor/sources/image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import cv2
import pytesseract
import iocextract
import requests
import datetime
import os
import numpy as np

from threatingestor.sources import Source
import threatingestor.artifacts

class Plugin(Source):
"""
Image text extraction using Google's OCR Tesseract engine and computer vision
"""

def __init__(self, name, img=""):
self.name = name
self.img = img

if "http" in img:
with open("/tmp/data.png", "wb") as i:
i.write(requests.get(str(self.img)).content)

def run(self, saved_state):
saved_state = datetime.datetime.utcnow().isoformat()[:-7] + "Z"

if os.path.exists("/tmp/data.png"):
data = cv2.imread("/tmp/data.png")
else:
data = cv2.imread(self.img)

try:
# Helps with preprocessing by converting to a grayscale
grayscale_img = cv2.cvtColor(data, cv2.COLOR_BGR2GRAY)

# Creates a binary image by using the proper threshold from cv
binary_img = cv2.threshold(grayscale_img, 130, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

# Inverts the binary
invert_img = cv2.bitwise_not(binary_img)

# Helps with cleanup
noise_reduction = np.ones((2,2), np.uint8)
process_iter = cv2.erode(invert_img, noise_reduction, iterations = 1)
process_iter = cv2.dilate(process_iter, noise_reduction, iterations = 1)

# Converts image data to a string
img_data = pytesseract.image_to_string(process_iter)

artifact_list = []

title = "Image: {0}".format(self.img)
description = 'URL: {u}\nTask autogenerated by ThreatIngestor from source: {s}'
description = description.format(s=self.name, u=list(iocextract.extract_urls(img_data)))
artifact = threatingestor.artifacts.Task(title, self.name, reference_link=str(list(iocextract.extract_urls(img_data))), reference_text=description)
artifact_list.append(artifact)

except cv2.error:
raise FileNotFoundError

return saved_state, artifact_list

0 comments on commit 126eb85

Please sign in to comment.