-
Notifications
You must be signed in to change notification settings - Fork 1
/
ocrwm
executable file
·195 lines (163 loc) · 5.63 KB
/
ocrwm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
#!/bin/bash
set -u
########################################
# Set these variables
########################################
# Set the default text for the watermark
label='Copyright ©2013 KZ Gedenkstätte Neuengamme'
# Download a free font from Google Fonts
# http://www.google.com/fonts#UsePlace:use/Collection:Gudea
font='/Users/aes9h/Downloads/fonts/Gudea/Gudea-Regular.ttf'
########################################
# No need to edit past this point
########################################
CONV="$(which convert)"
COMP="$(which composite)"
IDEN="$(which identify)"
TESS="$(which tesseract)"
# Set colors for echo
ESQ="\x1b["
red=$ESQ'1;31;40m'
blue=$ESQ'1;34;40m'
yellow=$ESQ'33;40m'
green=$ESQ'32;40m'
magenta=$ESQ'35;40m'
reset=$(tput sgr0)
USAGE='ocrwm [-copw] [-l "text"] [/path/to/folder]'
# Get the options
while getopts "cl:opw" options; do
case $options in
c ) NOCOPY="true";;
l ) LABEL="$OPTARG";;
o ) OCR="true";;
p ) PDF="true";;
w ) WATERMARK="true";;
\? ) echo -e "$USAGE"
exit 1;;
* ) echo -e "$USAGE"
exit 1;;
esac
done
shift $((OPTIND - 1))
# Set the watermark text
if [[ -z "${LABEL:-}" ]]; then
setlabel="$label"
else
setlabel="$LABEL"
fi
# If font not set above, set variable to empty
if [[ -f "$font" ]]; then
setfont="-font $font"
else
setfont=''
fi
# Get the working directory
if [[ -z ${1:-} ]]; then
path=$(pwd)
else
cd "$1"
path=$(pwd)
fi
if [[ "true" != "${NOCOPY:-}" ]]; then
if [[ ! -d copies ]]; then
mkdir -p copies
fi
else
echo -e "${red}Please note, OCR and Watermark assumes that a copy has been made in the png format and is available in the 'copies' directory.${reset}"
fi
if [[ "true" == "${WATERMARK:-}" ]]; then
if [[ ! -d marked ]]; then
mkdir -p marked
fi
fi
if [[ "true" == "${OCR:-}" ]]; then
if [[ ! -d ocr ]]; then
mkdir -p ocr
fi
fi
# Use find piped to while with IFS to account for names with spaces and other
# characters that would break in a for loop. Putting IFS in a while loop means
# we don't have to worry about it being unset/reset.
find "$path" -type f -depth 1 \
\( -iname \*.jpg -o -iname \*.jpeg \
-o -iname \*.png -o -iname \*.gif \
-o -iname "\*.tif*" \) \
-print0 | while IFS= read -r -d '' file
do
# while parameters -r = do not read slashes in file names specially
# -d '' = NUL delimeted
# ## deletes longest match from front of variable
# % deletes shortest match from end of variable
fullname="${file##*/}"
extension="${fullname##*.}"
filename="${fullname%.*}"
newname="${filename}.png"
if [[ -f "$file" ]]; then
########################################################################
# create a copy in png form in the copy directory
########################################################################
if [[ "true" == "${NOCOPY:-}" ]]; then
echo -e "${red}Skipping making a copy of $fullname.${reset}"
else
echo -e "Making copy of ${blue}$fullname${reset}."
if [[ "png" == "$extension" ]]; then
cp "$path/$fullname" "$path/copies/"
else
$CONV "$path/$fullname" "$path/copies/$newname"
fi
fi
########################################################################
# OCR the png and put in ocr directory
########################################################################
if [[ "true" == "${OCR:-}" ]]; then
echo -e "Running OCR on ${yellow}$newname${reset}."
#command #input #output(.txt) #language=German
$TESS "$path/copies/$newname" "$path/ocr/$filename" -l deu
fi
########################################################################
# Add watermark
########################################################################
if [[ "true" == "${WATERMARK:-}" ]]; then
echo -e "Putting water mark on ${green}$newname${reset}."
width=$($IDEN -format %w "$path/copies/$newname")
# height=$($IDEN -format %h "$path/copies/$newname")
# w=$((width * 30))
s=$((width/2))
# h=$((s+$s))
# p=$((height/35))
# Add black text in gold box under image
# convert \
# $1 \
# -background Gold \
# -pointsize $s \
# -font $font \
# -gravity south \
# -splice 0x$h \
# -annotate +0+22 "$label" \
# marked/$1
# $CONV -background '#0008' -fill white -gravity center -size $w \
# caption:"$label" \
# dragon.gif +swap -gravity south -composite anno_caption.jpg
# Overlay white text on dark grey transparent background
$CONV \
-background '#00000080' \
-fill white \
-size "$s" \
$setfont \
label:"$setlabel" \
miff:- | \
$COMP \
-gravity south \
-geometry +0+3 \
- "$path/copies/$newname" "$path/marked/$newname"
fi # end water mark section
fi
echo
done
########################################################################
# Compile into one pdf
########################################################################
if [[ "true" == "${PDF:-}" ]]; then
echo -e "${magenta}Consolidating images into one pdf.${reset}"
$CONV "$path/copies/*.png" "$path/combined.pdf"
fi