Skip to content

Commit

Permalink
Merge pull request #397 from DataDog/checksd_mongo
Browse files Browse the repository at this point in the history
Moving MongoDB to checks.d - fixes #387
  • Loading branch information
remh committed Mar 11, 2013
2 parents 4b1a7d6 + c990e64 commit b9f17ab
Show file tree
Hide file tree
Showing 5 changed files with 394 additions and 256 deletions.
234 changes: 234 additions & 0 deletions checks.d/mongo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
import re
import types
import time
from datetime import datetime

from checks import AgentCheck
from util import get_hostname

# When running with pymongo < 2.0
# Not the full spec for mongo URIs -- just extract username and password
# http://www.mongodb.org/display/DOCS/connections6
mongo_uri_re=re.compile(r'mongodb://(?P<username>[^:@]+):(?P<password>[^:@]+)@.*')

class MongoDb(AgentCheck):

GAUGES = [
"indexCounters.btree.missRatio",
"globalLock.ratio",
"connections.current",
"connections.available",
"mem.resident",
"mem.virtual",
"mem.mapped",
"cursors.totalOpen",
"cursors.timedOut",
"uptime",

"stats.indexes",
"stats.indexSize",
"stats.objects",
"stats.dataSize",
"stats.storageSize",

"replSet.health",
"replSet.state",
"replSet.replicationLag"
]

RATES = [
"indexCounters.btree.accesses",
"indexCounters.btree.hits",
"indexCounters.btree.misses",
"opcounters.insert",
"opcounters.query",
"opcounters.update",
"opcounters.delete",
"opcounters.getmore",
"opcounters.command",
"asserts.regular",
"asserts.warning",
"asserts.msg",
"asserts.user",
"asserts.rollovers"
]

METRICS = GAUGES + RATES

def __init__(self, name, init_config, agentConfig):
AgentCheck.__init__(self, name, init_config, agentConfig)

self._last_state = -1

def checkLastState(self, state, agentConfig):
if self._last_state != state:
self._last_state = state
return self.create_event(state, agentConfig)

def create_event(self, state, agentConfig):
"""Create an event with a message describing the replication
state of a mongo node"""

def get_state_description(state):
if state == 0: return 'Starting Up'
elif state == 1: return 'Primary'
elif state == 2: return 'Secondary'
elif state == 3: return 'Recovering'
elif state == 4: return 'Fatal'
elif state == 5: return 'Starting up (forking threads)'
elif state == 6: return 'Unknown'
elif state == 7: return 'Arbiter'
elif state == 8: return 'Down'
elif state == 9: return 'Rollback'

status = get_state_description(state)
hostname = get_hostname(agentConfig)
msg_title = "%s is %s" % (hostname, status)
msg = "MongoDB: %s just reported as %s" % (hostname, status)

self.event({
'timestamp': int(time.mktime(datetime.now().timetuple())),
'event_type': 'Mongo',
'api_key': agentConfig['api_key'],
'msg_title': msg_title,
'msg_text': msg,
'host': hostname
})

def check(self, instance):
"""
Returns a dictionary that looks a lot like what's sent back by db.serverStatus()
"""
if 'server' not in instance:
self.log.warn("Missing 'server' in mongo config")
return

tags = instance.get('tags', [])

try:
from pymongo import Connection
except ImportError:
self.log.error('mongo.yaml exists but pymongo module can not be imported. Skipping check.')
raise Exception('Python PyMongo Module can not be imported. Please check the installation instruction on the Datadog Website')

try:
from pymongo import uri_parser
# Configuration a URL, mongodb://user:pass@server/db
parsed = uri_parser.parse_uri(instance['server'])
except ImportError:
# uri_parser is pymongo 2.0+
matches = mongo_uri_re.match(instance['server'])
if matches:
parsed = matches.groupdict()
else:
parsed = {}
username = parsed.get('username')
password = parsed.get('password')

do_auth = True
if username is None or password is None:
self.log.debug("Mongo: cannot extract username and password from config %s" % instance['server'])
do_auth = False

conn = Connection(instance['server'])
db = conn['admin']
if do_auth:
if not db.authenticate(username, password):
self.log.error("Mongo: cannot connect with config %s" % instance['server'])

status = db.command('serverStatus') # Shorthand for {'serverStatus': 1}
status['stats'] = db.command('dbstats')

results = {}

# Handle replica data, if any
# See http://www.mongodb.org/display/DOCS/Replica+Set+Commands#ReplicaSetCommands-replSetGetStatus
try:
data = {}

replSet = db.command('replSetGetStatus')
if replSet:
primary = None
current = None

# find nodes: master and current node (ourself)
for member in replSet.get('members'):
if member.get('self'):
current = member
if int(member.get('state')) == 1:
primary = member

# If we have both we can compute a lag time
if current is not None and primary is not None:
lag = current['optimeDate'] - primary['optimeDate']
# Python 2.7 has this built in, python < 2.7 don't...
if hasattr(lag,'total_seconds'):
data['replicationLag'] = lag.total_seconds()
else:
data['replicationLag'] = (lag.microseconds + \
(lag.seconds + lag.days * 24 * 3600) * 10**6) / 10.0**6

if current is not None:
data['health'] = current['health']

data['state'] = replSet['myState']
self.checkLastState(data['state'], self.agentConfig)
status['replSet'] = data
except Exception, e:

from pymongo.errors import OperationFailure

if type(e) == OperationFailure and "replSetGetStatus" in str(e):
pass
else:
raise e

# If these keys exist, remove them for now as they cannot be serialized
try:
status['backgroundFlushing'].pop('last_finished')
except KeyError:
pass
try:
status.pop('localTime')
except KeyError:
pass

# Go through the metrics and save the values
for m in self.METRICS:
# each metric is of the form: x.y.z with z optional
# and can be found at status[x][y][z]
value = status
try:
for c in m.split("."):
value = value[c]
except KeyError:
continue

# value is now status[x][y][z]
assert type(value) in (types.IntType, types.LongType, types.FloatType)

# Check if metric is a gauge or rate
if m in self.GAUGES:
m = self.normalize(m.lower(), 'mongodb')
self.gauge(m, value, tags=tags)

if m in self.RATES:
m = self.normalize(m.lower(), 'mongodb') + "ps"
self.rate(m, value, tags=tags)

@staticmethod
def parse_agent_config(agentConfig):
if not agentConfig.get('mongodb_server'):
return False

return {
'instances': [{
'server': agentConfig.get('mongodb_server')
}]
}

if __name__ == "__main__":
check, instances = MongoDb.from_yaml('conf.d/mongo.yaml')
for instance in instances:
check.check(instance)
print check.get_metrics()
10 changes: 0 additions & 10 deletions checks/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from checks.nagios import Nagios
from checks.build import Hudson
from checks.db.mysql import MySql
from checks.db.mongo import MongoDb
from checks.db.mcache import Memcache
from checks.queue import RabbitMq
from checks.ganglia import Ganglia
Expand Down Expand Up @@ -77,7 +76,6 @@ def __init__(self, agentConfig, emitters, systemStats):
}

# Old-style metric checks
self._mongodb = MongoDb(log)
self._mysql = MySql(log)
self._rabbitmq = RabbitMq()
self._ganglia = Ganglia(log)
Expand Down Expand Up @@ -199,7 +197,6 @@ def run(self, checksd=None, start_event=True):
# Run old-style checks
mysqlStatus = self._mysql.check(self.agentConfig)
rabbitmq = self._rabbitmq.check(log, self.agentConfig)
mongodb = self._mongodb.check(self.agentConfig)
gangliaData = self._ganglia.check(self.agentConfig)
cassandraData = self._cassandra.check(log, self.agentConfig)
dogstreamData = self._dogstream.check(self.agentConfig)
Expand All @@ -218,13 +215,6 @@ def run(self, checksd=None, start_event=True):
# RabbitMQ
if rabbitmq:
payload['rabbitMQ'] = rabbitmq

# MongoDB
if mongodb:
if mongodb.has_key('events'):
events['Mongo'] = mongodb['events']['Mongo']
del mongodb['events']
payload['mongoDB'] = mongodb

# dogstream
if dogstreamData:
Expand Down
Loading

0 comments on commit b9f17ab

Please sign in to comment.