-
Notifications
You must be signed in to change notification settings - Fork 5
/
Dockerfile.base
93 lines (84 loc) · 2.92 KB
/
Dockerfile.base
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
FROM python:3.9-bookworm
ENV PYTHONUNBUFFERED 1
ENV DEBIAN_FRONTEND=noninteractive
RUN set -e \
&& echo 'deb http://deb.debian.org/debian bookworm non-free' >> /etc/apt/sources.list \
&& ( apt-get update -y || sleep 15 && apt-get update -y) \
&& apt-get install -y --no-install-recommends \
python3 \
python3-dev \
python3-pip \
python3-numpy \
python3-icu \
libicu-dev \
pkg-config \
build-essential \
p7zip-full p7zip-rar \
cpanminus \
libgsf-1-dev \
postgresql-client \
imagemagick \
tesseract-ocr \
tesseract-ocr-all \
libtiff-tools \
ghostscript qpdf \
wget curl git file \
gpg gpg-agent \
gosu tini \
s3fs pdftk poppler-utils ghostscript nodejs npm qpdf \
davfs2 \
cuneiform \
&& cpanm --notest Email::Outlook::Message \
&& apt-get clean && rm -rf /var/lib/apt/lists/* \
&& pip3 install --upgrade pip \
&& pip3 install pipenv
# disable tesseract multithreading
ENV OMP_THREAD_LIMIT 1
# download libpst
ENV LIBPST_VERSION libpst-0.6.76
ENV LIBPST_URL https://www.five-ten-sg.com/libpst/packages/$LIBPST_VERSION.tar.gz
# ENV LIBPST_URL https://github.com/liquidinvestigations/snoop-deps/raw/master/$LIBPST_VERSION.tar.gz
RUN wget $LIBPST_URL --progress=dot:giga \
&& tar zxvf $LIBPST_VERSION.tar.gz \
&& rm -f $LIBPST_VERSION.tar.gz \
&& mv $LIBPST_VERSION /opt/libpst \
&& cd /opt/libpst \
&& ./configure --disable-python --prefix="`pwd`" \
&& make \
&& make install
ENV PATH="/opt/libpst/bin:${PATH}"
# download recent version of 7z
ENV SEVEN_Z_VERSION 7z2107-linux-x64
RUN wget https://github.com/liquidinvestigations/snoop-deps/raw/master/$SEVEN_Z_VERSION.tar.xz --progress=dot:giga \
&& tar xvf $SEVEN_Z_VERSION.tar.xz --one-top-level=7z \
&& rm -f $SEVEN_Z_VERSION.tar.xz \
&& mkdir /opt/7z \
&& mv 7z/7zz /opt/7z \
&& rm -r 7z
ENV PATH="/opt/7z:${PATH}"
# install pdf2pdfocr
RUN pip3 install --upgrade Pillow reportlab packaging psutil Pillow reportlab \
&& pip3 install --upgrade lxml beautifulsoup4 \
&& pip3 install --upgrade wheel \
&& pip3 install PyPDF2==1.27.12
RUN rm /etc/ImageMagick-6/policy.xml
RUN git clone https://github.com/liquidinvestigations/pdf2pdfocr --branch master2 /opt/pdf2pdfocr \
&& cd /opt/pdf2pdfocr \
&& ./install_command
# download others
ADD https://github.com/liquidinvestigations/snoop-deps/raw/master/wait_v2.3.0 /wait
RUN chmod +x /wait
# Download & Install TINI
ADD https://github.com/liquidinvestigations/snoop-deps/raw/master/tini_v0.19.0 /tini
RUN chmod +x /tini
# extra python libs
RUN pip install -e 'git+https://github.com/liquidinvestigations/pycld2.git#egg=pycld2'
# install PDF tools
RUN mkdir -p /opt/hoover/snoop/pdf-tools
ADD ./pdf-tools /opt/hoover/snoop/pdf-tools
RUN bash /opt/hoover/snoop/pdf-tools/install.sh
# install snoop libs
RUN mkdir -p /opt/hoover/snoop
WORKDIR /opt/hoover/snoop
ADD Pipfile Pipfile.lock ./
RUN pipenv install --system --deploy --ignore-pipfile