-
Notifications
You must be signed in to change notification settings - Fork 813
/
mesos.py
113 lines (94 loc) · 4.25 KB
/
mesos.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import time
import requests
from checks import AgentCheck
from util import json, headers
from hashlib import md5
import urllib2
class Mesos(AgentCheck):
def check(self, instance):
if 'url' not in instance:
self.log.info("Skipping instance, no url found.")
return
# Load values from the instance config
url = instance['url']
default_timeout = self.init_config.get('default_timeout', 5)
timeout = float(instance.get('timeout', default_timeout))
response = self.master_roles(url, timeout)
if response is not None:
for role in response['roles']:
tags = ['mesos','role:' + role['name']]
self.gauge('mesos.role.frameworks', len(role['frameworks']), tags=tags)
self.gauge('mesos.role.weight', role['weight'], tags=tags)
resources = role['resources']
for attr in ['cpus','mem']:
if attr in resources:
self.gauge('mesos.role.' + attr, resources[attr], tags=tags)
response = self.master_stats(url, timeout)
if response is not None:
for key in iter(response):
self.gauge('mesos.stats.' + key, response[key], tags=['mesos'])
response = self.master_state(url, timeout)
if response is not None:
for attr in ['deactivated_slaves','failed_tasks','finished_tasks','killed_tasks','lost_tasks','staged_tasks','started_tasks']:
tags = ['mesos']
self.gauge('mesos.state.' + attr, response[attr], tags=tags)
for framework in response['frameworks']:
tags = ['mesos','framework:' + framework['id']]
resources = framework['resources']
for attr in ['cpus','mem']:
if attr in resources:
self.gauge('mesos.state.framework.' + attr, resources[attr], tags=tags)
for slave in response['slaves']:
tags = ['mesos','slave:' + slave['id']]
resources = slave['resources']
for attr in ['cpus','mem','disk']:
if attr in resources:
self.gauge('mesos.state.slave.' + attr, resources[attr], tags=tags)
def master_roles(self, url, timeout):
return self.get_json(url + "/master/roles.json", timeout)
def master_stats(self, url, timeout):
return self.get_json(url + "/master/stats.json", timeout)
def master_state(self, url, timeout):
return self.get_json(url + "/master/state.json", timeout)
def get_json(self, url, timeout):
# Use a hash of the URL as an aggregation key
aggregation_key = md5(url).hexdigest()
try:
response = requests.get(url, timeout=timeout)
parsed = response.json()
return parsed
except requests.exceptions.Timeout as e:
# If there's a timeout
self.timeout_event(url, timeout, aggregation_key)
return None
if r.status_code != 200:
self.status_code_event(url, r, aggregation_key)
return None
def timeout_event(self, url, timeout, aggregation_key):
self.event({
'timestamp': int(time.time()),
'event_type': 'http_check',
'msg_title': 'URL timeout',
'msg_text': '%s timed out after %s seconds.' % (url, timeout),
'aggregation_key': aggregation_key
})
def status_code_event(self, url, r, aggregation_key):
self.event({
'timestamp': int(time.time()),
'event_type': 'http_check',
'msg_title': 'Invalid reponse code for %s' % url,
'msg_text': '%s returned a status of %s' % (url, r.status_code),
'aggregation_key': aggregation_key
})
if __name__ == '__main__':
check, instances = Mesos.from_yaml('/etc/dd-agent/conf.d/mesos.yaml')
for instance in instances:
print "\nRunning the check against url: %s" % (instance['url'])
check.check(instance)
if check.has_events():
print 'Events: %s' % (check.get_events())
i = 0
print 'Metrics:\n'
for metric in check.get_metrics():
print " %d: %s" % (i, metric)
i += 1