forked from hishamhm/usercount
-
Notifications
You must be signed in to change notification settings - Fork 3
/
crawler.sh
executable file
·36 lines (28 loc) · 1.37 KB
/
crawler.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/bin/bash
scritp_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
cd $scritp_dir
export TZ=Europe/Budapest
export PYTHONHASHSEED=42
python3 -m compileall -l . > /dev/null
cp __pycache__/common.cpython*.pyc common.pyc
cp __pycache__/crawler.cpython*.pyc crawler.pyc
EXECCOUNT=$(grep -oP "execcount[^\-0-9]*\K[\-0-9]*" snapshot.json)
echo $(date +"%Y-%m-%d %H:%M:%S")" + Crawler started with execcount $EXECCOUNT" | tee -a crawler.log
STARTTS=$(date +'%s')
MASTO1=$(tail -1 mastostats.csv)
while [[ $STARTTS -gt $(($(date +%s) - 1200)) ]]; do
stdbuf -o L python3 crawler.pyc $@ 2>&1 | tee -a crawler.log
MASTO2=$(tail -1 mastostats.csv)
if [ "$MASTO1" == "$MASTO2" ]; then
echo $(date +"%Y-%m-%d %H:%M:%S")" !!! Crawler err" | tee -a crawler.log
else
break
fi
done
[ -f "config.txt" ] && LOGLINES=$(grep loglines "config.txt" | cut -f2 -d":" | cut -f2 -d"\"")
[ -z "$LOGLINES" ] && LOGLINES="9998"
tail -n $LOGLINES crawler.log > crawler.log.temp
mv crawler.log.temp crawler.log
tac crawler.log | grep --text -v "+" | grep -v "No time for crawl" | grep -v "[Nn]o more time left" | grep -v Shrinking | rev | cut -d$'\r' -f 1 | rev | egrep -v '[0-9]+ of [0-9]+ done' | uniq -f 2 | tac > crawler.err
RUNNING_TIME=$(($(date '+%s') - $STARTTS))
echo $(date +"%Y-%m-%d %H:%M:%S")" + Crawler finished in "$(date -d @$RUNNING_TIME +"%M:%S") | tee -a crawler.log