-
Notifications
You must be signed in to change notification settings - Fork 0
/
zutrust.py
437 lines (376 loc) · 13.8 KB
/
zutrust.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
#!/usr/bin/env python
"""
zutrust
Called by AWS Lambda to run trust advisor checks
and track changes since the last run.
Copyright 2018 zulily, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import json
import logging
from datetime import datetime, timedelta
from multiprocessing import Process, Pipe
from time import sleep
import zlib
import boto3
from botocore import exceptions
from dateutil.parser import parse
import pytz
LOGGER = logging.getLogger()
LOGGER.setLevel(logging.INFO)
SNS_C = boto3.client('sns')
S3_C = boto3.client('s3')
TA_C = boto3.client('support')
LANG = 'en'
UTC = pytz.utc
LOCALTZ = pytz.timezone('America/Los_Angeles')
DEFS_PATH = 'trustdefs/'
ACCT_FILEPATH = DEFS_PATH + 'trustdefs.json'
MAX_SNS_MESSAGE = 1024 * 256
class TrustCheck(object):
"""
Class for each AWS Trusted Advisor check, which could be a
separate process, if desired.
"""
def __init__(self, acct_info, chk_info, cur_dt, chk_defs):
"""
Initialize the connector to S3.
"""
self.s3 = boto3.client('s3')
self.support = boto3.client('support')
self.check_id = chk_info['check_id']
self.freq = chk_info['freq_mins']
self.warn = chk_info['warning']
self.error = chk_info['error']
self.bucket = acct_info['bucket']
self.cur_dt = cur_dt
self.chk_defs = chk_defs
self.check_file = self.check_id + '-' + acct_info['S3_suffix'] + '.json'
self.last_dt = self._get_last_check_time()
def _save_check_history(self, check_dict, filename):
"""
Save check history to S3 json
"""
try:
out = self.s3.put_object(Bucket=self.bucket, Key=filename,
Body=json.dumps(check_dict,
ensure_ascii=False,
default=dateconverter))
except exceptions.ClientError as err:
LOGGER.error('Issue writing file:' + filename + ':' + str(err))
return out['ResponseMetadata']['HTTPStatusCode']
def determine_deltas(self, regs, last_regs):
"""
Create lists of new regressions, and fixed issues, since previous run
of checks, handling case when no timestamp exists
"""
ret1 = {}
try:
idict = {a['resourceId']:a for a in regs['flaggedResources']}
except KeyError:
# no issues found
idict = {}
ret2 = idict.values()
if last_regs:
try:
ldict = {a['resourceId']:a for a in last_regs['flaggedResources']}
except KeyError:
# no issues found
ldict = {}
set_regs = set(tuple(idict.keys()))
set_last_regs = set(tuple(ldict.keys()))
newregkeys = list(set_regs - set_last_regs)
delregkeys = list(set_last_regs - set_regs)
ret2 = [idict[a] for a in newregkeys]
ret1 = [ldict[a] for a in delregkeys]
return ret1, ret2
def _get_last_check_time(self):
"""
Retrieve last Trust Advisor check time
"""
retval = None
resp = self.support.describe_trusted_advisor_check_summaries(checkIds=[self.check_id])
if resp:
try:
timestr = resp['summaries'][0]['timestamp']
retval = parse(timestr)
except (KeyError, ValueError):
LOGGER.info('Received invalid describe check: %s', str(resp))
else:
LOGGER.error('No response from describe check')
return retval
def _refresh_check(self):
"""
Resubmit the Trust Advisor check
"""
retval = ''
resp = self.support.refresh_trusted_advisor_check(checkId=self.check_id)
if resp:
try:
retval = resp['status']['status']
except ValueError:
LOGGER.error('Received invalid refresh check: %s', str(resp))
else:
LOGGER.error('No response from refresh check')
return retval
def _get_check_results(self):
"""
Retrieve Trust Advisor results
"""
retval = {}
resp = self.support.describe_trusted_advisor_check_result(checkId=self.check_id,
language=LANG)
if resp:
try:
retval = resp['result']
except ValueError:
LOGGER.error('Received invalid check results: %s', str(resp))
else:
LOGGER.error('No response from check result')
return retval
def _format_reg(self, reg):
"""
Given a regression check result, determine if it's a true regression,
and if so, format it
"""
valid = True
if str(reg['status']) == 'ok':
# ignore results that are not issues
valid = False
reg_str = self.chk_defs['owner'] + ': '
if 'metadata' in reg:
for field in reg['metadata']:
if isinstance(field, basestring):
reg_str += ' ' + str(field)
return valid, reg_str
def _process_regs(self, cur_run, new_regs, del_regs):
"""
Given zutrust run results, format and handle regression alerts
"""
new_needs, del_needs = False, False
body = ''
if new_regs:
tmpreg = ''
for reg in new_regs:
valid, reg_str = self._format_reg(reg)
if valid:
new_needs = True
# format alert
alert = 'Regression Alert: ' + self.chk_defs[self.check_id]['name']
alert += ' at ' + self.cur_dt.astimezone(LOCALTZ).strftime('%c') + '\n'
alert += reg_str
self._send_alert(alert, reg['status'])
tmpreg += reg_str + '<br>'
if new_needs:
body += '<h4>New Regressions:</h4>' + tmpreg
else:
body += '<h4>No new regressions.</h4>'
if del_regs:
tmpreg = ''
for reg in del_regs:
valid, reg_str = self._format_reg(reg)
if valid:
del_needs = True
tmpreg += reg_str + '<br>'
if del_needs:
body += '<h4>Regressions removed:</h4>' + tmpreg
else:
body += '<h4>No removed regressions.</h4>'
if new_needs or del_needs:
output = '<h3>Regression Check: ' + self.chk_defs[self.check_id]['name'] + '</h3>'
output += 'Account: ' + self.chk_defs['owner']
output += '<br>Time:' + self.cur_dt.astimezone(LOCALTZ).strftime('%c') + '</h3>'
output += '<br>Total resources checked: '
output += str(cur_run['resourcesSummary']['resourcesProcessed'])
output += body
return output
return None
def _send_alert(self, alert_text, sev):
"""
Send alert to AWS SNS endpoint
Note: SNS takes a max of 256KB.
"""
subj = str("Regression: " + self.cur_dt.astimezone(LOCALTZ).strftime('%c'))
if sev == 'error':
arn = self.error
else:
arn = self.warn
overage = len(alert_text) - MAX_SNS_MESSAGE
if overage > 0:
alert_text = alert_text[:-overage - 20] + '\n<message truncated/>'
SNS_C.publish(TopicArn=arn, Message=alert_text, Subject=subj)
def _check_run_time(self, chk):
"""
Given last check, determine if a new one should be run
"""
# Handle un-refreshable checks
if self.freq == 0:
result = False
else:
result = True
if chk:
try:
old_dtstr = chk['timestamp']
old_dt = parse(old_dtstr)
if self.cur_dt - old_dt < timedelta(minutes=self.freq):
result = False
except ValueError:
LOGGER.info('No time found in last check.')
return result
def run(self, conn):
"""
Retrieve old check, run new check, save new check, compare diffs,
process regressions, format report, and send it back to parent.
"""
check = {}
prev_results = load_s3_file(self.s3, self.bucket, self.check_file)
if self._check_run_time(prev_results):
self._refresh_check()
runtime = None
for i in range(0, 5):
sleep(i*10)
runtime = self._get_last_check_time()
if runtime >= self.last_dt:
break
chkresults = self._get_check_results()
response = self._save_check_history(chkresults, self.check_file)
del_regs, new_regs = self.determine_deltas(chkresults, prev_results)
check_text = self._process_regs(chkresults, new_regs, del_regs)
if check_text:
check['zipped'] = zlib.compress(check_text)
check['check_id'] = self.check_id
conn.send(check)
conn.close()
# END TrustCheck class
def dateconverter(date_obj):
"""
Stringify datetime.datetime in a given instance
"""
if isinstance(date_obj, datetime):
return date_obj.__str__()
def load_defs_file(file_name):
"""
Load JSON definitions file
"""
try:
with open(file_name, 'r') as monfile:
mydict = json.load(monfile)
except (ValueError, IOError) as error:
mydict = ""
LOGGER.warning('Failed to load: %s', file_name)
LOGGER.critical('Critical Error: %s', str(error))
return mydict
def load_s3_file(s3_client, bucket, filename):
"""
Load JSON S3 file, either checks or history
"""
hist = None
try:
obj = s3_client.get_object(Bucket=bucket, Key=filename)
last_str = obj['Body'].read().decode('utf-8')
hist = json.loads(last_str)
except exceptions.ClientError as err:
if err.response['Error']['Code'] == "NoSuchKey":
LOGGER.warning('No file found: %s', filename)
except ValueError:
pass
return hist
def load_check_definitions(lang):
"""
Retrieve Trust Advisor check definitions
"""
retval = {}
resp = TA_C.describe_trusted_advisor_checks(language=lang)
if resp:
try:
checks = resp['checks']
retval = {a['id']:a for a in checks}
except ValueError:
LOGGER.error('Received invalid check definitions: %s', str(resp))
else:
LOGGER.error('No response from check definitions')
return retval
def add_owner_name(chk_defs):
"""
Add Owner's DisplayName to Trust Advisor check definitions
"""
resp = S3_C.list_buckets()
if resp:
try:
owner = resp['Owner']['DisplayName']
chk_defs['owner'] = owner
except ValueError:
LOGGER.error('Received invalid bucket list: %s', str(resp))
else:
LOGGER.error('No response from S3 List Buckets ')
return chk_defs
def expand_check_result(body, chk_defs):
"""
Uncompress check result, adding definition
"""
output = zlib.decompress(body['zipped'])
output += '<details><summary>Definition(click to toggle):</summary><br>'
output += chk_defs[body['check_id']]['description']
output += '</details><hr>'
return output
def send_report(report_text, report_arn, now_dt):
"""
Publish report to AWS SNS endpoint
Note: publish takes a max of 256KB.
"""
subj = str("Regression Report for " + now_dt.astimezone(LOCALTZ).strftime('%c'))
overage = len(report_text) - MAX_SNS_MESSAGE
if overage > 0:
report_text = report_text[:-overage - 20] + '\n<message truncated/>'
resp = SNS_C.publish(TopicArn=report_arn, Message=report_text,
Subject=subj)
return resp
def main(event, context):
"""
Main functionality
"""
now_dt = datetime.now(pytz.utc)
checks = []
parent_connects = []
report = ''
retval = 1
##### PROGRAM FLOW #####
# Load team file
acct_info = load_defs_file(ACCT_FILEPATH)
checkfile = 'checks-' + acct_info['S3_suffix'] + '.json'
checkdata = load_s3_file(S3_C, acct_info['bucket'], checkfile)
# For each check, run the check, get the regressions
checkdefs = load_check_definitions(lang=LANG)
checkdefs = add_owner_name(checkdefs)
for chk in checkdata['checks']:
parent_conn, child_conn = Pipe()
parent_connects.append(parent_conn)
chkclass = TrustCheck(acct_info, chk, now_dt, checkdefs)
process = Process(target=chkclass.run, args=(child_conn,))
checks.append(process)
process.start()
for process in checks:
process.join()
# Receive report from each check (process)
for pconn in parent_connects:
resp = pconn.recv()
if 'zipped' in resp and isinstance(resp['zipped'], basestring):
report += expand_check_result(resp, checkdefs)
if acct_info['send_report'] and report:
report += 'Trusted Advisor Dashboard:\n' + acct_info['TA_link']
resp = send_report(report, acct_info['report_ARN'], now_dt)
try:
if resp['ResponseMetadata']['HTTPStatusCode'] == 200:
retval = 0
except (TypeError, ValueError):
pass
return retval
#main('foo', 'bar')