-
Notifications
You must be signed in to change notification settings - Fork 0
/
mda.sh
executable file
·55 lines (46 loc) · 2.58 KB
/
mda.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/bin/bash
#get through STDIN a new mail
#store mail in its full form under the MAILDIR folder
#then call the archiveMail.sh method to separate the attachments and store them using a UUID folder in an archived file structure
UUID=$(uuidgen)
MAILDIR="/opt/SOLRTEST/maildir"
TMPMAIL=$MAILDIR/tmp/$UUID #random temp folder
cat - > $TMPMAIL
#need to to this in 2 steps to avoid: "Delivery error (command mda.sh 10064 wrote to stderr: head: write error: Broken pipe"
MSGID=$(head -n 500 $TMPMAIL |formail -xMessage-Id) #extract Message-ID, regardless of upper or lowercase
MESSAGEID=$(echo $MSGID |sed 's/[< >]//g') #remove <,>, and space
if [ -z $MESSAGEID ]; then
MESSAGEID=$UUID
fi
FILEPATH=$MAILDIR/new/$MESSAGEID
mv $TMPMAIL $FILEPATH
#now we have in a single file $FILEPATH the full mail, MIME encoded
YEARMONTH=$(date +"%Y/%m")
ARCHIVE="/opt/SOLRTEST/mailarchive"
MIMETOOL="ripmime -i - --overwrite -e -d " #receives MIME msg by stdin, overwrite if we process twice the same message (waranteed to be unique thanks to messageID folder
ARCHIVEDEST=$ARCHIVE/$YEARMONTH/$MESSAGEID
mkdir -p $ARCHIVEDEST
#lets call ripmime to chop the MIMEfile into multiple files with attachments
cat $FILEPATH | $MIMETOOL $ARCHIVEDEST #will put files in -d $ARCHIVEDEST
#rm $FILEPATH #remove original MIME file
TIKAJAR="/opt/tika/tika-app-1.3.jar"
#clean up empty files (ripmime problem?)
#whitespace problem: for f in $(ls $ARCHIVEDEST/*); do
find $ARCHIVEDEST -type f -print0 | while read -d $'\0' f
do
test ! -s "$f" && rm "$f"; #if the file is empty, remove it
filename=$(basename "$f")
if [[ $filename != _headers_ && $filename != textfile* && $filename != *_tikaxml ]]; then
echo $f "is going to Tika"
java -jar $TIKAJAR -x "$f" > "${f}_tikaxml" 2>/dev/null
fi
done
#now send the files to SOLR
SOLRURL="http://liferay-hydroqc-cluster1.mtllab.sfl:8983/solr/mail/upload"
ARCHIVEURL="http://archiveserver/$YEARMONTH/$MESSAGEID"
#SOLRCELL CAN ACCEPT MIMEFILES AND EXTRACT THE DATA. WE JUST HAVE TO MAP SOME VALUES, AND ADD OTHER INFO (like links to the archived attachments) AFTERWARDS, USSING MESSAGEID
#PROBLEM: we have to map the tag <div class="email-entry"> to an attachment
#curl "http://localhost:8983/solr/mail/update/extract?literal.messageId=$MESSAGEID&commit=true&fmap.content=attachment&capture=meta&fmap.meta=ignored_meta&fmap.Message-From=from&fmap.Creation-Date=sentDate&" -F "myfile=@$FILEPATH" -v
/opt/SOLRTEST/sendMimefolderToSOLR.py --folder $ARCHIVEDEST --messageid $MESSAGEID --solrURL $SOLRURL --archiveURL $ARCHIVEURL
#exit OK, probably getmail will remove the mail from IMAP
exit 0