-
Notifications
You must be signed in to change notification settings - Fork 4
/
tpch.sh
executable file
·191 lines (119 loc) · 4.84 KB
/
tpch.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#!/bin/sh
RESULTS=$1
HOST=$2
DBNAME=$3
USER=$4
PWD=$5
# delay between stats collections (iostat, vmstat, ...)
DELAY=15
# DSS queries timeout (5 minutes or something like that)
DSS_TIMEOUT=300000 # 5 minutes in seconds
# log
LOGFILE=bench.log
function benchmark_run() {
mkdir -p $RESULTS
# store the settings
psql -h $HOST -U $USER postgres -c "select name,setting from pg_settings" > $RESULTS/settings.log 2> $RESULTS/settings.err
print_log "preparing TPC-H database"
# create database, populate it with data and set up foreign keys
# psql -h $HOST tpch < dss/tpch-create.sql > $RESULTS/create.log 2> $RESULTS/create.err
print_log " loading data"
psql -h $HOST -U $USER $DBNAME < dss/tpch-load.sql > $RESULTS/load.log 2> $RESULTS/load.err
print_log " creating primary keys"
psql -h $HOST -U $USER $DBNAME < dss/tpch-pkeys.sql > $RESULTS/pkeys.log 2> $RESULTS/pkeys.err
print_log " creating foreign keys"
psql -h $HOST -U $USER $DBNAME < dss/tpch-alter.sql > $RESULTS/alter.log 2> $RESULTS/alter.err
print_log " creating indexes"
psql -h $HOST -U $USER $DBNAME < dss/tpch-index.sql > $RESULTS/index.log 2> $RESULTS/index.err
print_log " analyzing"
psql -h $HOST -U $USER $DBNAME < dss/tpch-analyze.sql > $RESULTS/analyze.log 2> $RESULTS/analyze.err
print_log "running TPC-H benchmark"
benchmark_dss $RESULTS
print_log "finished TPC-H benchmark"
}
function benchmark_dss() {
mkdir -p $RESULTS
mkdir $RESULTS/vmstat-s $RESULTS/vmstat-d $RESULTS/explain $RESULTS/results $RESULTS/errors
# get bgwriter stats
psql -h $HOST -U $USER postgres -c "SELECT * FROM pg_stat_bgwriter" > $RESULTS/stats-before.log 2>> $RESULTS/stats-before.err
psql -h $HOST -U $USER postgres -c "SELECT * FROM pg_stat_database WHERE datname = '$DBNAME'" >> $RESULTS/stats-before.log 2>> $RESULTS/stats-before.err
vmstat -s > $RESULTS/vmstat-s-before.log 2>&1
vmstat -d > $RESULTS/vmstat-d-before.log 2>&1
print_log "running queries defined in TPC-H benchmark"
for n in `seq 1 22`
do
q="dss/queries/$n.sql"
qe="dss/queries/$n.explain.sql"
if [ -f "$q" ]; then
print_log " running query $n"
echo "======= query $n =======" >> $RESULTS/data.log 2>&1;
# run explain
psql -h $HOST -U $USER $DBNAME < $qe > $RESULTS/explain/$n 2>> $RESULTS/explain.err
vmstat -s > $RESULTS/vmstat-s/before-$n.log 2>&1
vmstat -d > $RESULTS/vmstat-d/before-$n.log 2>&1
# run the query on background
/usr/bin/time -a -f "$n = %e" -o $RESULTS/results.log psql -h $HOST -U $USER $DBNAME < $q > $RESULTS/results/$n 2> $RESULTS/errors/$n &
# wait up to the given number of seconds, then terminate the query if still running (don't wait for too long)
for i in `seq 0 $DSS_TIMEOUT`
do
# the query is still running - check the time
if [ -d "/proc/$!" ]; then
# the time is over, kill it with fire!
if [ $i -eq $DSS_TIMEOUT ]; then
print_log " killing query $n (timeout)"
# echo "$q : timeout" >> $RESULTS/results.log
psql -h $HOST -U $USER postgres -c "SELECT pg_terminate_backend(procpid) FROM pg_stat_activity WHERE datname = 'tpch'" >> $RESULTS/queries.err 2>&1;
# time to do a cleanup
sleep 10;
# just check how many backends are there (should be 0)
psql -h $HOST -U $USER postgres -c "SELECT COUNT(*) AS tpch_backends FROM pg_stat_activity WHERE datname = 'tpch'" >> $RESULTS/queries.err 2>&1;
else
# the query is still running and we have time left, sleep another second
sleep 1;
fi;
else
# the query finished in time, do not wait anymore
print_log " query $n finished OK ($i seconds)"
break;
fi;
done;
vmstat -s > $RESULTS/vmstat-s/after-$n.log 2>&1
vmstat -d > $RESULTS/vmstat-d/after-$n.log 2>&1
fi;
done;
# collect stats again
psql -h $HOST -U $USER postgres -c "SELECT * FROM pg_stat_bgwriter" > $RESULTS/stats-after.log 2>> $RESULTS/stats-after.err
psql -h $HOST -U $USER postgres -c "SELECT * FROM pg_stat_database WHERE datname = '$DBNAME'" >> $RESULTS/stats-after.log 2>> $RESULTS/stats-after.err
vmstat -s > $RESULTS/vmstat-s-after.log 2>&1
vmstat -d > $RESULTS/vmstat-d-after.log 2>&1
}
function stat_collection_start()
{
local RESULTS=$1
# run some basic monitoring tools (iotop, iostat, vmstat)
for dev in $DEVICES
do
iostat -t -x /dev/$dev $DELAY >> $RESULTS/iostat.$dev.log &
done;
vmstat $DELAY >> $RESULTS/vmstat.log &
}
function stat_collection_stop()
{
# wait to get a complete log from iostat etc. and then kill them
sleep $DELAY
for p in `jobs -p`; do
kill $p;
done;
}
function print_log() {
local message=$1
echo `date +"%Y-%m-%d %H:%M:%S"` "["`date +%s`"] : $message" >> $RESULTS/$LOGFILE;
}
mkdir $RESULTS;
export PGPASSWORD=$PWD
# start statistics collection
stat_collection_start $RESULTS
# run the benchmark
benchmark_run $RESULTS $DBNAME $USER
# stop statistics collection
stat_collection_stop