-
Notifications
You must be signed in to change notification settings - Fork 73
/
benchwarmer
executable file
·663 lines (574 loc) · 29.7 KB
/
benchwarmer
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
#!/bin/bash
source ./config
source ./params
# Take clients and rate limit from command line if passed, use defaults
# otherwise
# Clients
CLIENTS=4
if [ -n "$1" ]; then
CLIENTS="$1"
fi
# Rate limit
if [ -n "$2" ]; then
RATE="$2"
RATEOPT="-R $2 "
else
RATE="NULL"
RATEOPT=""
fi
# Client limit
CLIENT_LIMIT=0
if [ -n "$3" ]; then
CLIENT_LIMIT="$3"
fi
# latency limit
if [ -n "$4" ]; then
LAT="$4"
LATOPT="-L $4 "
else
LAT="NULL"
LATOPT=""
fi
function detect_cpus {
# getconf should work on Linux and more recent Macs. Try the "online" processor
# count first.
if [ -z "$CPUS" ]; then
CPUS=`getconf _NPROCESSORS_ONLN 2>/dev/null`
fi
# This will work on older Macs and possibly other BSD systems, but is expected
# to fail on Linux.
CPUS=`sysctl -n hw.ncpu 2>/dev/null`
# Newer Macs dropped ncpu sysctl to break out logical and physical cores
if [ -z "$CPUS" ]; then
CPUS=`sysctl -n hw.logicalcpu 2>/dev/null`
fi
# Try the gnu coreutils getconf, available on newer Linux and Homebrew.
if [ -z "$CPUS" ]; then
CPUS=`nproc 2>/dev/null`
fi
# Fallback to the getconf total number if it's not available. That's not
# always what we want, but we shouldn't even reach this point.
if [ -z "$CPUS" ]; then
CPUS=`getconf _NPROCESSORS_CONF 2>/dev/null`
fi
# TODO Test whether any of these CPU counters work on FreeBSD and Solaris
if [ -z "$CPUS" ]; then
# If there's no more accurate guess, running make with 4 processes works
# well enough on typical hardware circa 2020.
CPUS=4
fi
}
# Query the test database and put its background writer statistics into
# a set of environment variables, probably for use inserting stats into
# the results database.
function get_bgwriter {
BACKEND_FSYNC="buffers_backend_fsync"
if [ "$SERVER_VERSION" -lt "900200" ]; then
BACKEND_FSYNC="0"
fi
if [ "$SERVER_VERSION" -ge "170000" ]; then
BGW=`$TESTPSQL -A -t -F" " -c "SELECT c.num_timed AS checkpoints_timed, c.num_requested AS checkpoints_req, c.buffers_written AS buffers_checkpoint, b.buffers_clean, b.maxwritten_clean, b.buffers_alloc, 0 AS buffers_backend, 0 AS buffers_backend_fsync FROM pg_catalog.pg_stat_checkpointer c,pg_catalog.pg_stat_bgwriter b;"`
else
BGW=`$TESTPSQL -A -t -F" " -c "select checkpoints_timed,checkpoints_req,buffers_checkpoint,buffers_clean,maxwritten_clean,buffers_backend,buffers_alloc,$BACKEND_FSYNC from pg_stat_bgwriter"`
fi
set -- $BGW
checkpoints_timed=$1
checkpoints_req=$2
buffers_checkpoint=$3
buffers_clean=$4
maxwritten_clean=$5
buffers_backend=$6
buffers_alloc=$7
buffers_backend_fsync=$8
}
# Figure out how many transactions per client, then recompute
# a new total; this copes with rounding issues
if [ -z "$TOTTRANS" ] ; then
TOTTRANS=1000
fi
TRANS=`expr $TOTTRANS / $CLIENTS`
TOTTRANS=`expr $TRANS \* $CLIENTS`
# If a runtime has been specified instead, throw away the transaction counts
# use it instead
if [ -n "$RUNTIME" ] ; then
TESTLEN="-T $RUNTIME"
TOTTRANS=""
else
TESTLEN="-t $TRANS"
fi
if [ -z "$TRIM" ] ; then
TRIM=0
fi
# Pass through prepared statements option
PREPAREDOPT=""
if [ -z "$PREPARED" ] ; then
PREPARED=0
fi
if [ "$PREPARED" -gt 0 ] ; then
PREPAREDOPT="-M prepared"
fi
# Set MAX_WORKERS automatically on Linux, OS X, and maybe other platforms
if [ "$MAX_WORKERS" == "auto" ] ; then
detect_cpus
if [ -z "$CPUS" ] ; then
error 16 "Bug in detect_cpus function, returned a null value"
fi
MAX_WORKERS="$CPUS"
echo Workers using $MAX_WORKERS CPUs
fi
# Set WORKERS string so that the largest possible worker count
# up to MAX_WORKERS is used, while still dividing CLIENTS into an
# even number per worker.
WORKERS=""
NUM_WORKERS="1"
if [ -n "$MAX_WORKERS" ] ; then
# Only bother with/allow adjustment to WORKERS if the max is >1.
# That keeps up out of trouble if using a pgbench before 9.0,
# where using any value for "-j" won't be allowed, as long as the
# config file we're given isn't setup incorrectly.
if [ "$MAX_WORKERS" -gt 1 ]; then
NUM_WORKERS=$MAX_WORKERS
while [ "$NUM_WORKERS" -gt 1 ]; do
(( remainder=$CLIENTS % $NUM_WORKERS ))
if [ $remainder -eq 0 ] ; then
break
fi
(( NUM_WORKERS = $NUM_WORKERS - 1 ))
done
WORKERS="-j ${NUM_WORKERS}"
fi
fi
# Setting SOCKETS to 1 avoids host IP networking to prefer a socket connection.
# Only useful when running this script on the server itself for local testing.
# TODO Allow specifying the location of the sockets file.
TESTHOSTPARAM="-h $TESTHOST"
if [ -n "$SOCKETS" ]; then
if [ "$SOCKETS" -eq "1" ]; then
TESTHOSTPARAM=""
fi
fi
# psql statements for the test database and the result database
TESTPSQL="psql $TESTHOSTPARAM -U $TESTUSER -p $TESTPORT -d $TESTDB"
RESULTPSQL="psql -h $RESULTHOST -U $RESULTUSER -p $RESULTPORT -d $RESULTDB"
echo Running tests using: $TESTPSQL
echo Storing results using: $RESULTPSQL
# See if this database has all the standard pgbench tables in it
PGCOUNT=`$TESTPSQL -A -t -c "SELECT count(*) FROM pg_stat_user_tables WHERE relname IN ('pgbench_history','pgbench_tellers','pgbench_accounts','pgbench_branches')"`
if [ "$PGCOUNT" -eq 4 ] ; then
TABLEPREFIX="pgbench_"
PGBENCH_TABLES=1
echo Found standard pgbench tables with prefix=$TABLEPREFIX
else
TABLEPREFIX=""
PGCOUNT=`$TESTPSQL -A -t -c "SELECT count(*) FROM pg_stat_user_tables WHERE relname IN ('history','tellers','accounts','branches')"`
if [ "$PGCOUNT" -eq 4 ] ; then
echo Found standard pgbench tables with prefix=$TABLEPREFIX
PGBENCH_TABLES=1
else
echo Did not find standard pgbench tables
PGBENCH_TABLES=0
fi
fi
# Determine database scale. Allow overriding it in the config file.
# That's needed to run the pgbench init script. It can't be an empty string.
if [ -z "$SCALE" ] ; then
if [ "$PGBENCH_TABLES" -eq "1" ] ; then
SCALE=`$TESTPSQL -A -t -c "select count(*) from ${TABLEPREFIX}branches"`
fi
fi
if [ -z "$SCALE" ] ; then
SCALE="0"
fi
# Confirm we have a useful pgbench to run
if [ -z $PGBENCHBIN ] || [ ! -x $PGBENCHBIN ]; then
echo ERROR: cannot find pgbench binary $PGBENCHBIN , aborting
exit
fi
# Find current test set
SET=`$RESULTPSQL -A -t -c "select max(set) from testset"`
if [ "$?" -ne "0" ]; then
echo ERROR: Attempt to determine test set failed
# TODO Create a fake first set if this happens? Right now,
# the likely case is that the test DDL was never added, which
# makes that sort of correction attempt unlikely to be useful
exit
fi
# Version of Server
SERVER_VERSION=`$TESTPSQL -A -t -F" " -c "select current_setting('server_version_num')::numeric;"`
VERSION_TEXT=`$TESTPSQL -Atc "select version();"`
# Cleanup pgbench tables, unless we've been told to skip that
if [ "$SKIPINIT" -ne "1" ]; then
echo Cleaning up database $TESTDB
if [ "$PGBENCH_TABLES" -eq "1" ] ; then
$TESTPSQL -c "truncate table ${TABLEPREFIX}history"
fi
# If the previous test ran recently and it did a cleanup, we should
# be safe to skip this possibly lengthy step. "Recently" is defined
# as within 10 seconds of when the last test cleanup finished. There
# is some margin for error here, particulary if processing the results
# latency log was very time consuming. The price of making a mistake
# is just some extra run-time though, doing a redundant vacuum. On
# databases sizes where the vacuum time is very long, its actually less
# likely that the results log is large. Tests against large database
# tend to run slowly.
CLEANED=`$RESULTPSQL -A -t -c "SELECT EXISTS(SELECT 1 FROM tests WHERE server='${SERVERNAME}' AND cleanup IS NOT NULL AND ((now() - end_time) + cleanup) < '100 seconds'::interval ORDER BY test DESC LIMIT 1)"`
if [ "$CLEANED" = 'f' ] ; then
$TESTPSQL -c "vacuum analyze"
else
echo "Skipping vacuum, it was recently ran by the last test"
fi
fi
# Run a reset script if one is provided. Typical use is cache clearing.
if [ -n "$TESTRESET" ]; then
$TESTRESET $SERVER_VERSION
fi
# Try to start the test close to the beginning of the checkpoint cycle.
if [ "$SKIPINIT" -ne "1" ]; then
# We want clean stats from the pg_stat_bgwriter, but those won't show up
# until after the checkpoint is done. Wait a bit for the stats to update
if [ "$SERVER_VERSION" -ge "170000" ]; then
REQCHECKER="SELECT num_requested FROM pg_stat_checkpointer;"
else
REQCHECKER="SELECT checkpoints_req FROM pg_stat_bgwriter;"
fi
STARTCHECK=`$TESTPSQL -At -c "$REQCHECKER"`
$TESTPSQL -c "checkpoint"
echo "Waiting for checkpoint statistics"
while [ 1 ] ; do
CHECK=`$TESTPSQL -At -c "$REQCHECKER"`
if [ "$CHECK" -gt "$STARTCHECK" ] ; then
break
fi
sleep 1
done
fi
# Create the tests record
DBSIZE=`$TESTPSQL -A -t -c "select pg_database_size(current_database())"`
$RESULTPSQL -q -c "insert into tests (server,server_version,script,clients,workers,set,scale,dbsize,rate_limit,client_limit,multi) values('$SERVERNAME','$VERSION_TEXT','$SCRIPT','$CLIENTS','$NUM_WORKERS','$SET','$SCALE','$DBSIZE',$RATE, $CLIENT_LIMIT,$MULTI)"
TEST=`$RESULTPSQL -A -t -c "select max(test) from tests WHERE server='${SERVERNAME}'"`
if [ "$?" -ne "0" ]; then
echo ERROR Can\'t read from tests table. Was the test data installed?
exit
fi
if [ -z "$TEST" ]; then
echo ERROR: Attempt to get a test number returned \"$TEST\", aborting
exit
fi
# Grab starting values for statistics
get_bgwriter
if [ "$SERVER_VERSION" -ge "100000" ]; then
START_WAL=`$TESTPSQL -A -t -F" " -c "select pg_current_wal_lsn()"`
else
START_WAL=`$TESTPSQL -A -t -F" " -c "select pg_current_xlog_location()"`
fi
$RESULTPSQL -c "insert into test_bgwriter(server,test,checkpoints_timed,checkpoints_req,buffers_checkpoint,buffers_clean,maxwritten_clean,buffers_backend,buffers_alloc,buffers_backend_fsync) values('$SERVERNAME','$TEST','$checkpoints_timed','$checkpoints_req','$buffers_checkpoint','$buffers_clean','$maxwritten_clean','$buffers_backend','$buffers_alloc','$buffers_backend_fsync')"
$TESTPSQL -c 'SELECT pg_stat_reset()' >> /dev/null 2>&1
$TESTPSQL -c 'SELECT pg_stat_statements_reset()' >> /dev/null 2>&1
# Tack on multiplier to rate option set
if [[ "$MULTI" -ge "0" ]] ; then
RATEOPT="$RATEOPT -Dmultiplier=$MULTI"
fi
# Decide between regular script and pass through.
# Most options are lost in pass-through mode to support the init workload.
# pgbench doesn't allow most of its command line parameters unless you're in
# "benchmark mode", which an init operation is not.
PREFIX="${SCRIPT:0:1}"
if [ "$PREFIX" == ":" ] ; then
SCRIPTOPT="${SCRIPT:1}"
PASSTHRU=1
else
SCRIPTOPT="-f $BASEDIR/$TESTDIR/$SCRIPT.sql $TESTLEN -n -l -c $CLIENTS $WORKERS $RATEOPT $LATOPT $PREPAREDOPT"
PASSTHRU=0
fi
# Setup test directory tree
echo This is test $TEST
mkdir -p results/$SERVERNAME/$TEST
# Start background daemon collectors
OS=`uname`
if [ "$OSDATA" -eq "1" ]; then
./timed-os-stats vmstat > results/$SERVERNAME/$TEST/vmstat.log &
VMSTAT=$!
./timed-os-stats iostat > results/$SERVERNAME/$TEST/iostat.log &
IOSTAT=$!
if [ "$OS" = "Linux" ] ; then
./timed-os-stats meminfo > results/$SERVERNAME/$TEST/meminfo.log &
MEMINFO=$!
fi
fi
# This one should run even if osdata collectors are not available
./timed-os-stats $TESTPSQL -At -f $BASEDIR/util/metric-query.sql > results/$SERVERNAME/$TEST/psql.log &
PSQLMET=$!
# Run the main pgbench test
cd results/$SERVERNAME/$TEST
echo Script $SCRIPT executing for $CLIENTS concurrent users... 1>&2
$PGBENCHBIN $SCRIPTOPT -s $SCALE $TESTHOSTPARAM -p $TESTPORT -U $TESTUSER $TESTDB > results.txt 2>&1 &
P=$!
wait $P
$RESULTPSQL -q -c "UPDATE tests SET end_time=now() WHERE server='${SERVERNAME}' AND test='${TEST}'"
$RESULTPSQL -q -c "UPDATE tests SET start_latency=start_time, end_latency=end_time WHERE server='${SERVERNAME}' AND tests.test='$TEST';"
../../../kill_pg "${PSQLMET}"
if [ "$OSDATA" -eq "1" ]; then
# The PIDs we have will be the process ID of the Python timed-os-stats
# process, but we must make sure to also kill the child processes (launched
# via subprocess.Popen()) so they are not lingering forever.
../../../kill_pg "${VMSTAT}"
../../../kill_pg "${IOSTAT}"
if [ "$OS" = "Linux" ] ; then
../../../kill_pg "${MEMINFO}"
# Find largest dirty memory value (which is in kB)
max_dirty=`cat meminfo.log | grep Dirty | awk '{print $4}' | sort -n | tail -n 1`
fi
fi
# max_dirty has to be a valid integer value
if [ -z "$max_dirty" ] ; then
max_dirty=0
fi
# Update bgwriter data with delta
get_bgwriter
$RESULTPSQL -c "update test_bgwriter set \
checkpoints_timed=$checkpoints_timed - checkpoints_timed,\
checkpoints_req=$checkpoints_req - checkpoints_req,\
buffers_checkpoint=$buffers_checkpoint - buffers_checkpoint,\
buffers_clean=$buffers_clean - buffers_clean,\
maxwritten_clean=$maxwritten_clean - maxwritten_clean,\
buffers_backend=$buffers_backend - buffers_backend,\
buffers_alloc=$buffers_alloc - buffers_alloc,\
buffers_backend_fsync=$buffers_backend_fsync - buffers_backend_fsync, \
max_dirty=1024::bigint * '$max_dirty' \
WHERE server='${SERVERNAME}' AND test='$TEST'"
# Cache server info in the test record to save CPU/memory upgrade history
SERVERCPU=`$RESULTPSQL -Atc "SELECT server_cpu FROM server WHERE server='${SERVERNAME}'"`
MEMGB=`$RESULTPSQL -Atc "SELECT server_mem_gb FROM server WHERE server='${SERVERNAME}'"`
if [ -z "${MEMGB}" ] ; then
MEMGB=0
echo WARNING: missing entry "${SERVERNAME}" in server table
fi
# Save database size at the end to support init case
DBSIZE=`$TESTPSQL -A -t -c "select pg_database_size(current_database())"`
$RESULTPSQL -c "update tests set dbsize=${DBSIZE},server_mem_gb=${MEMGB},server_cpu='${SERVERCPU}' WHERE server='${SERVERNAME}' AND test='${TEST}'"
# Stats and buffer cache data are spoiled very fast by post processing, grab them as early as feasible
$TESTPSQL -c "\copy (SELECT '$TEST', current_timestamp, stats_reset, numbackends, xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted, conflicts, temp_files, temp_bytes, deadlocks, blk_read_time, blk_write_time, '$SERVERNAME' FROM pg_stat_database WHERE datname=current_database()) TO 'dbstat.csv' WITH DELIMITER ',' CSV HEADER"
$RESULTPSQL -c "\copy test_stat_database FROM 'dbstat.csv' WITH DELIMITER ',' CSV HEADER"
$TESTPSQL -c "\copy (SELECT '$TEST', current_timestamp AS collected, schemaname AS nspname, relname AS tablename, indexrelname AS indexname, pg_relation_size(indexrelid) AS size, idx_blks_read AS rel_blks_read, idx_blks_hit AS rel_blks_hit, '$SERVERNAME' FROM pg_statio_user_indexes UNION SELECT '$TEST', current_timestamp AS collected, schemaname AS nspname, relname AS tablename, NULL AS indexname, pg_relation_size(relid) AS size, heap_blks_read AS rel_blks_read, heap_blks_hit AS rel_blks_hit, '$SERVERNAME' FROM pg_statio_user_tables) TO 'statio.csv' WITH DELIMITER ',' CSV HEADER"
$RESULTPSQL -c "\copy test_statio FROM 'statio.csv' WITH DELIMITER ',' CSV HEADER"
$TESTPSQL -c "\copy (SELECT '$SERVERNAME' AS server,'$TEST' AS test, N.nspname AS schemaname,c.relname,8192 * count(*) AS bytes, round(avg(usagecount)::numeric,2) AS avg_usage,max(usagecount) AS max_usage,isdirty FROM pg_class c LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace) INNER JOIN pg_buffercache b ON b.relfilenode = c.relfilenode INNER JOIN pg_database d ON (b.reldatabase = d.oid AND d.datname = current_database()) WHERE NOT N.nspname IN ('pg_catalog','pg_toast','information_schema') GROUP BY N.nspname,c.relname,isdirty ) TO 'buffercache.csv' WITH DELIMITER ',' CSV HEADER"
$RESULTPSQL -c "\copy test_buffercache FROM 'buffercache.csv' WITH DELIMITER ',' CSV HEADER"
# Import test artifacts from results. So far this only gives useful output
# for the pgbench init case and osm2pgsql.
$BASEDIR/util/pgbench-init-parse results.txt "${SCRIPT}" > results.json
$RESULTPSQL -f $BASEDIR/util/art-import.sql
# Export less time sensitive database statistics from pgbench into results database
$TESTPSQL -c "\copy (SELECT '${SERVERNAME}', '$TEST',
name,
setting,
unit,
source,
boot_val,
current_setting(name) AS value,
CASE
WHEN unit='B' THEN setting::numeric
WHEN unit='8kB' THEN setting::numeric * 8192
WHEN unit='kB' THEN setting::numeric * 1024
WHEN unit='MB' THEN setting::numeric * 1024 * 1024
WHEN unit='s' THEN setting::numeric
WHEN unit='ms' THEN round(setting::numeric / 1000,3)
WHEN unit='min' THEN setting::numeric * 60
WHEN unit IS NULL AND vartype='integer' THEN setting::numeric
WHEN unit IS NULL AND vartype='real' THEN setting::numeric
END AS numeric_value,
CASE
WHEN unit IN ('B','8kB','kB','MB') THEN 'bytes'
WHEN unit IN ('s','ms','min') THEN 'seconds'
WHEN unit IN ('integer,real') THEN 'numeric'
END AS numeric_unit
FROM pg_settings
) TO 'pgset.csv' WITH DELIMITER ',' CSV HEADER"
$RESULTPSQL -c "\copy test_settings FROM 'pgset.csv' WITH DELIMITER ',' CSV HEADER"
if [ "$SERVER_VERSION" -ge "170000" ]; then
$TESTPSQL -c "\copy (SELECT '$SERVERNAME','$TEST',queryid,query,plans,total_plan_time,min_plan_time,max_plan_time,mean_plan_time,stddev_plan_time,calls,total_exec_time,min_exec_time,max_exec_time,mean_exec_time,stddev_exec_time,rows,shared_blks_hit,shared_blks_read,shared_blks_dirtied,shared_blks_written,local_blks_hit,local_blks_read,local_blks_dirtied,local_blks_written,temp_blks_read,temp_blks_written,shared_blk_read_time AS blk_read_time,shared_blk_write_time AS blk_write_time,wal_records,wal_fpi,wal_bytes FROM pg_stat_statements WHERE calls>1) TO 'statements.csv' WITH DELIMITER ',' CSV HEADER"
else
$TESTPSQL -c "\copy (SELECT '$SERVERNAME','$TEST',queryid,query,plans,total_plan_time,min_plan_time,max_plan_time,mean_plan_time,stddev_plan_time,calls,total_exec_time,min_exec_time,max_exec_time,mean_exec_time,stddev_exec_time,rows,shared_blks_hit,shared_blks_read,shared_blks_dirtied,shared_blks_written,local_blks_hit,local_blks_read,local_blks_dirtied,local_blks_written,temp_blks_read,temp_blks_written,blk_read_time,blk_write_time,wal_records,wal_fpi,wal_bytes FROM pg_stat_statements WHERE calls>1) TO 'statements.csv' WITH DELIMITER ',' CSV HEADER"
fi
$RESULTPSQL -c "\copy test_statements FROM 'statements.csv' WITH DELIMITER ',' CSV HEADER"
# WAL values are initially saved as text because they're pulled from the test
# database--but the functions to turn them into numeric types is available
# only in the results one.
if [ "$SERVER_VERSION" -gt "100000" ]; then
END_WAL=`$TESTPSQL -A -t -F" " -c "select pg_current_wal_lsn()"`
else
END_WAL=`$TESTPSQL -A -t -F" " -c "select pg_current_xlog_location()"`
fi
$RESULTPSQL -q -c "UPDATE tests SET wal_written=wal_lsn('$END_WAL') - wal_lsn('$START_WAL') WHERE server='${SERVERNAME}' AND test=$TEST"
tail -n 40 results.txt
# Try to do the table cleanup before spoiling the database and OS cache by
# touching the results heavily. That means before reading pgbench.log,
# which can be quite large on a fast server.
if [ "$SKIPINIT" -ne "1" ]; then
echo Cleaning up database $TESTDB
if [ "$PGBENCH_TABLES" -eq "1" ] ; then
$TESTPSQL -c "truncate table ${TABLEPREFIX}history"
fi
VACUUMSTART=`$RESULTPSQL -A -t -c "SELECT now()"`
$TESTPSQL -c "vacuum"
$TESTPSQL -c "checkpoint"
$RESULTPSQL -q -c "UPDATE tests SET cleanup=now() - '$VACUUMSTART'::timestamp WHERE server='${SERVERNAME}' AND test=$TEST"
fi
# Save pgbench log
# In the multi-threaded workers case, there may be a number of files, so copy
# them all in
cat pgbench_log.${P}* > pgbench.log
echo Worst latency results:
# TODO On Solaris, this may need to use /usr/xpg4/bin/tail instead
cat pgbench.log | cut -f 3 -d " " | sort -n | tail -n 5
tps=`egrep "(without initial connection time)|(including connections establishing)" results.txt | cut -d " " -f 3`
trans=`grep "number of transactions actually processed:" results.txt | cut -d":" -f 2 | cut -d "/" -f 1`
# Defaults for init and custom scripts that don't have transaction data.
if [ -z "$tps" ] ; then
tps=0
fi
if [ -z "$trans" ] ; then
trans=0
fi
$RESULTPSQL -q -c "UPDATE tests SET tps='$tps',trans='$trans', trans_latency='$trans' WHERE server='${SERVERNAME}' AND test=$TEST"
cp $BASEDIR/config config
cp $BASEDIR/params params
cp $BASEDIR/templates/test-index.html index.html
echo Importing and processing latency information
# Confirm we have an patched version of pgbench that has timestamps
TESTFORTS=`cat pgbench.log | head -n 1 | cut -d" " -f 6`
if [ -z "$TESTFORTS" ]; then
if [ "$PGBENCH_TABLES" -eq "0" ] ; then
echo Skipping pgbench latency analysis since those tables aren\'t present.
elif [ "$PASSTHRU" -eq "1" ]; then
echo Database passthrough requests like initialization don\'t provide pgbench latency.
else
echo "ERROR: the pgbench used for this test is missing transaction"
echo timestamps. No latency information will be imported into
echo the database and some plots will be missing.
echo This may not be an error, some special features just don\'t create the right logs.
fi
else
# Import timestamp information
# Format of csv file changes when --rates is used
if [ -n "$2" ]; then
$BASEDIR/log-to-csv_rates $TEST "${SERVERNAME}" < pgbench.log > timing.csv
$RESULTPSQL -c "copy timing from stdin with csv" < timing.csv
if [ "$TRIM" -gt 0 ] ; then
$RESULTPSQL -c "UPDATE tests SET start_latency=start_time + (end_time - start_time)/10, end_latency=end_time - (end_time - start_time)/10 WHERE server='${SERVERNAME}' AND tests.test='$TEST';"
$RESULTPSQL -c "WITH trimmed AS (SELECT start_time + (end_time - start_time)/10 as trim_start, end_time - (end_time - start_time)/10 as trim_end FROM tests WHERE server='${SERVERNAME}' AND test='$TEST') DELETE FROM timing WHERE server='${SERVERNAME}' AND test='$TEST' AND (ts<=(SELECT trim_start FROM trimmed) OR ts>=(SELECT trim_end FROM trimmed));"
$RESULTPSQL -c "UPDATE tests SET trans_latency=(SELECT count(*) FROM timing WHERE server='${SERVERNAME}' AND tests.test='$TEST') WHERE server='${SERVERNAME}' AND tests.test='$TEST';"
fi
$RESULTPSQL -q -c "UPDATE tests set avg_latency=(select avg(latency) from timing where tests.test=timing.test), max_latency=(select max(latency)from timing where tests.test=timing.test), percentile_90_latency=(select latency from timing ti where tests.test=ti.test and latency IS NOT NULL ORDER BY latency OFFSET GREATEST(0,(SELECT trunc(0.90*count(*)) FROM timing WHERE latency IS NOT null) LIMIT 1)) WHERE server='${SERVERNAME}' AND tests.test='$TEST'"
else
$BASEDIR/log-to-csv $TEST "${SERVERNAME}" < pgbench.log > timing.csv
$RESULTPSQL -c "copy timing from stdin with csv" < timing.csv
if [ "$TRIM" -gt 0 ] ; then
$RESULTPSQL -c "UPDATE tests SET start_latency=start_time + (end_time - start_time)/10, end_latency=end_time - (end_time - start_time)/10 WHERE server='${SERVERNAME}' AND tests.test='$TEST';"
$RESULTPSQL -c "WITH trimmed AS (SELECT start_time + (end_time - start_time)/10 as trim_start, end_time - (end_time - start_time)/10 as trim_end FROM tests WHERE server='${SERVERNAME}' AND test='$TEST') DELETE FROM timing WHERE server='${SERVERNAME}' AND test='$TEST' AND (ts<=(SELECT trim_start FROM trimmed) OR ts>=(SELECT trim_end FROM trimmed));"
$RESULTPSQL -c "UPDATE tests SET trans_latency=(SELECT count(*) FROM timing WHERE server='${SERVERNAME}' AND tests.test='$TEST') WHERE server='${SERVERNAME}' AND tests.test='$TEST';"
fi
fi
$RESULTPSQL -q -c "UPDATE tests SET avg_latency=(select avg(latency) from timing where tests.test=timing.test), max_latency=(select max(latency)from timing where tests.test=timing.test), percentile_90_latency=(select latency from timing where tests.test=timing.test ORDER BY latency OFFSET GREATEST(0,(trunc(0.90*trans_latency))) LIMIT 1) WHERE server='${SERVERNAME}' AND tests.test='$TEST'"
fi
if [ "$OS" = "Linux" ] ; then
$BASEDIR/dirty-plot < meminfo.log > dirtydata.txt
fi
# Save high level summary of latency broken into equal sized buckets of transactions
# 20 5% buckets
$RESULTPSQL -c "DROP TABLE buckets" >> /dev/null 2>&1
$RESULTPSQL -c "CREATE TABLE buckets AS SELECT trans,0.05 as bucket_size,0.0 as bucket_left,0.05*generate_series(1,20) AS bucket_right,0.0 as offset_left,0.0 as offset_right,0.0 as latency_left,0.0 as latency_right FROM tests WHERE server='${SERVERNAME}' AND test=$TEST"
$RESULTPSQL -c "UPDATE buckets SET bucket_left=bucket_right - bucket_size;"
$RESULTPSQL -c "UPDATE buckets SET offset_left=round(trans * bucket_left), offset_right=round(trans * bucket_right) - 1;"
$RESULTPSQL -c "UPDATE buckets SET latency_left=(SELECT latency FROM timing WHERE test=$TEST ORDER BY test,latency OFFSET GREATEST(0,buckets.offset_left) LIMIT 1), latency_right=(SELECT latency FROM timing WHERE test=$TEST ORDER BY test,latency OFFSET GREATEST(0,buckets.offset_right) LIMIT 1)"
$RESULTPSQL -c "ALTER TABLE buckets DROP COLUMN trans"
$RESULTPSQL -c "ALTER TABLE buckets DROP COLUMN bucket_size"
# DEBUG $RESULTPSQL -c "SELECT * FROM buckets ORDER BY bucket_left"
$RESULTPSQL -c "\copy (SELECT * FROM buckets) TO 'latency-20.csv' WITH DELIMITER ',' CSV HEADER"
# 100 1% buckets
if [ "$LARGEBUCKETS" -eq "1" ] ; then
$RESULTPSQL -c "DROP TABLE buckets"
$RESULTPSQL -c "CREATE TABLE buckets AS SELECT trans,0.01 as bucket_size,0.0 as bucket_left,0.01*generate_series(1,100) AS bucket_right,0.0 as offset_left,0.0 as offset_right,0.0 as latency_left,0.0 as latency_right FROM tests WHERE tests.test=$TEST"
$RESULTPSQL -c "UPDATE buckets SET bucket_left=bucket_right - bucket_size;"
$RESULTPSQL -c "UPDATE buckets SET offset_left=round(trans * bucket_left), offset_right=round(trans * bucket_right) - 1";
$RESULTPSQL -c "UPDATE buckets SET latency_left=(SELECT latency from timing WHERE test=$TEST ORDER BY test,latency OFFSET GREATEST(0,buckets.offset_left) LIMIT 1), latency_right=(SELECT latency FROM timing WHERE test=$TEST ORDER BY test,latency OFFSET GREATEST(0,buckets.offset_right) LIMIT 1)"
$RESULTPSQL -c "ALTER TABLE buckets DROP COLUMN trans"
$RESULTPSQL -c "ALTER TABLE buckets DROP COLUMN bucket_size"
$RESULTPSQL -c "\copy (SELECT * FROM buckets) TO 'latency-100.csv' WITH DELIMITER ',' CSV HEADER"
$RESULTPSQL -c "DROP TABLE buckets"
fi
# Save second resolution transaction summary and metrics summaries
$RESULTPSQL -c "\copy (SELECT extract(epoch FROM date_trunc('second',ts)) AS collected, count(*) AS samples, min(latency) AS min_latency, round(1000*avg(latency))/1000 AS avg_latency, max(latency) AS max_latency FROM timing WHERE server='${SERVERNAME}' AND test=$TEST GROUP BY date_trunc('second',ts) ORDER BY date_trunc('second',ts)) TO 'latency_1s.csv' WITH DELIMITER ',' CSV HEADER"
if [ -z "$BRIDGE_BEARER" ] ; then
$BASEDIR/metrics2csv $TEST "${SERVERNAME}"
else
$BASEDIR/util/cbmetrics-import $TEST "${SERVERNAME}"
fi
# Plot result graphs
if [ ! -z $GNUPLOT ] && [ -x $GNUPLOT ]; then
$RESULTPSQL -A -t -F' ' -c "select extract(epoch from date_trunc('second',ts)),count(*) from timing where server='${SERVERNAME}' AND test=$TEST group by date_trunc('second',ts) order by date_trunc('second',ts)" > tpsdata.txt
$GNUPLOT $BASEDIR/plots/tps.plot
$RESULTPSQL -A -t -F' ' -c "select extract(epoch from ts),latency from timing WHERE server='${SERVERNAME}' AND test=$TEST" > latency.txt
$GNUPLOT $BASEDIR/plots/latency.plot
if [ -f dirtydata.txt ] ; then
$GNUPLOT $BASEDIR/plots/dirty.plot
fi
# TODO If $GNUPLOT is set to something other than its default value from `which`, there
# should be a command line option in csv2gnuplot that informs this program of that path.
$BASEDIR/csv2gnuplot -i iostat.log -d iostat -t "pgbench iostat" --disks="$DISKLIST" -o iostat.png
$BASEDIR/csv2gnuplot -i vmstat.log -d vmstat -t "pgbench vmstat" -o vmstat.png
else
echo "gnuplot not found, skipping benchwarmer charts generation"
fi
# Now that we're done plotting and computing stats, wipe the low-level
# data we don't need anymore
$RESULTPSQL -q -c "truncate table timing"
# Save some configuration information about the server
CLIENTHOST=`hostname`
SERVERHOST="$TESTHOST"
if [ "$SERVERHOST" = "localhost" ]; then
SERVERHOST="$CLIENTHOST"
fi
SETTINGS="pg_settings.txt"
# Write out system and PostgreSQL installation
echo Test results: > $SETTINGS
$RESULTPSQL -c "select script,clients,round(tps) as tps,1000*round(avg_latency)/1000 as avg_latency,1000*round(max_latency)/1000 as max_latency from tests where server='${SERVERNAME}' AND test=$TEST" | grep -v " row)" >> $SETTINGS
echo Server $SERVERHOST, client $CLIENTHOST >> $SETTINGS
echo >> $SETTINGS
echo "Server info and settings in postgresql.conf:" >> $SETTINGS
$TESTPSQL -At -c "select version();" | grep -v " row" >> $SETTINGS
$TESTPSQL -c "show data_directory" | grep -v " row" >> $SETTINGS
$TESTPSQL -c "select name,current_setting(name) from pg_settings where source='configuration file' and not name in ('DateStyle','lc_messages','lc_monetary','lc_numeric','lc_time','listen_addresses','log_directory','log_rotation_age','log_rotation_size','log_truncate_on_rotation');" | grep -v " rows)" >> $SETTINGS
# Operating system information
echo >> $SETTINGS
echo "benchmark client OS Configuration (may not be the same as the server)" >> $SETTINGS
uname -a >> $SETTINGS
# Linux OS release is likely to be in one or more of these files
for f in `ls /etc/lsb-release /etc/debian_version /etc/redhat-release 2>/dev/null` ; do
echo $f: >> $SETTINGS
cat $f >> $SETTINGS
echo >> $SETTINGS
done
# Save version info on Mac OS X
if [ "$OS" = "Darwin" ] ; then
sw_vers >> $SETTINGS
fi
echo >> $SETTINGS
if [ -d /proc/sys/vm/ ] ; then
for f in `ls /proc/sys/vm/dirty_*` ; do
S=`cat $f`
echo $f=$S >> $SETTINGS
done
echo >> $SETTINGS
fi
mount >> $SETTINGS
# Remove temporary files, don't complain if the files are missing e.g.
# because we skipped gnuplot'ing and generating these files
rm -f pgbench_log.${P}*
rm -f pgbench.log
rm -f timing.csv # DEBUG
rm -f tpsdata.txt
rm -f latency.txt
rm -f dirtydata.txt
rm -f dbstat.csv statio.csv
rm -f pgset.csv
rm -f statements.csv