forked from benhoyt/goawk
-
Notifications
You must be signed in to change notification settings - Fork 0
/
benchmark_awks.py
executable file
·122 lines (107 loc) · 3.41 KB
/
benchmark_awks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/usr/bin/env python3
# Benchmark GoAWK against other AWK versions
from __future__ import print_function
import glob
import os.path
import shutil
import subprocess
import sys
import time
AWKS = [
'./goawk',
'./orig', # GoAWK without perf improvements (commit 8ab5446)
'original-awk',
'gawk',
'mawk',
]
NORM_INDEX = AWKS.index('original-awk')
# Only get the mean of these tests because these are the only ones
# we show in the GoAWK article.
TESTS_TO_MEAN = [
'tt.01',
'tt.02',
'tt.02a',
'tt.03',
'tt.03a',
'tt.04',
'tt.05',
'tt.06',
'tt.07',
'tt.big',
'tt.x1',
'tt.x2',
]
MEAN_TESTS = []
NUM_RUNS = 6
MIN_TIME = 0.5
PROGRAM_GLOB = 'testdata/tt.*'
if len(sys.argv) > 1:
PROGRAM_GLOB = 'testdata/' + sys.argv[1]
def repeat_file(input_file, repeated_file, n):
with open(input_file, 'rb') as fin, open(repeated_file, 'wb') as fout:
for i in range(n):
fin.seek(0)
shutil.copyfileobj(fin, fout)
print('Test ', end='')
for awk in AWKS:
display_awk = os.path.basename(awk)
display_awk = display_awk.replace('original-awk', 'awk')
print('| {:>5} '.format(display_awk), end='')
print()
print('-'*9 + ' | -----'*len(AWKS))
repeats_created = []
products = [1] * len(AWKS)
num_products = 0
programs = sorted(glob.glob(PROGRAM_GLOB))
for program in programs:
# First do a test run with GoAWK to see roughly how long it takes
cmdline = '{} -f {} testdata/foo.td >tt.out'.format(AWKS[0], program)
start = time.time()
status = subprocess.call(cmdline, shell=True)
elapsed = time.time() - start
# If test run took less than MIN_TIME seconds, scale/repeat input
# file accordingly
input_file = 'testdata/foo.td'
if elapsed < MIN_TIME:
multiplier = int(round(MIN_TIME / elapsed))
repeated_file = '{}.{}'.format(input_file, multiplier)
if not os.path.exists(repeated_file):
repeat_file(input_file, repeated_file, multiplier)
repeats_created.append(repeated_file)
input_file = repeated_file
# Record time taken to run this test, running each NUM_RUMS times
# and taking the minimum elapsed time
awk_times = []
for awk in AWKS:
cmdline = '{} -f {} {} >tt.out'.format(awk, program, input_file)
times = []
for i in range(NUM_RUNS):
start = time.time()
status = subprocess.call(cmdline, shell=True)
elapsed = time.time() - start
times.append(elapsed)
if status != 0:
print('ERROR status {} from cmd: {}'.format(status, cmdline), file=sys.stderr)
min_time = min(sorted(times)[1:])
awk_times.append(min_time)
# Normalize to One True AWK time = 1.0
norm_time = awk_times[NORM_INDEX]
speeds = [norm_time/t for t in awk_times]
test_name = program.split('/')[1]
if test_name in TESTS_TO_MEAN:
num_products += 1
for i in range(len(AWKS)):
products[i] *= speeds[i]
print('{:9}'.format(test_name), end='')
for i, awk in enumerate(AWKS):
print(' | {:5.2f}'.format(speeds[i]), end='')
print()
print('-'*9 + ' | -----'*len(AWKS))
print('**Geo mean** ', end='')
for i, awk in enumerate(AWKS):
print(' | **{:.2f}**'.format(products[i] ** (1.0/num_products)), end='')
print()
# Delete temporary files created
os.remove('tt.out')
for repeated_file in repeats_created:
os.remove(repeated_file)