Skip to content

Commit

Permalink
mongo db to checks.d
Browse files Browse the repository at this point in the history
  • Loading branch information
pcockwell committed Mar 7, 2013
1 parent bca787b commit e0412d7
Show file tree
Hide file tree
Showing 4 changed files with 210 additions and 234 deletions.
202 changes: 202 additions & 0 deletions checks.d/mongo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
import re

This comment has been minimized.

Copy link
@clutchski

clutchski Mar 7, 2013

Contributor

awesome. this is great. thanks for doing this.

This comment has been minimized.

Copy link
@remh

remh Mar 7, 2013

+1 👍

import types
from datetime import datetime

from checks import AgentCheck

# When running with pymongo < 2.0
# Not the full spec for mongo URIs -- just extract username and password
# http://www.mongodb.org/display/DOCS/connections6
mongo_uri_re=re.compile(r'mongodb://(?P<username>[^:@]+):(?P<password>[^:@]+)@.*')

class MongoDb(AgentCheck):

GAUGES = [
"indexCounters.btree.missRatio",
"globalLock.ratio",
"connections.current",
"connections.available",
"mem.resident",
"mem.virtual",
"mem.mapped",
"cursors.totalOpen",
"cursors.timedOut",
"uptime",

"stats.indexes",
"stats.indexSize",
"stats.objects",
"stats.dataSize",
"stats.storageSize",

"replSet.health",
"replSet.state",
"replSet.replicationLag"
]

RATES = [
"indexCounters.btree.accesses",
"indexCounters.btree.hits",
"indexCounters.btree.misses",
"opcounters.insert",
"opcounters.query",
"opcounters.update",
"opcounters.delete",
"opcounters.getmore",
"opcounters.command",
"asserts.regular",
"asserts.warning",
"asserts.msg",
"asserts.user",
"asserts.rollovers"
]

METRICS = GAUGES + RATES

def __init__(self, name, init_config, agentConfig):
AgentCheck.__init__(self, name, init_config, agentConfig)

self._last_state = -1

def checkLastState(self, state, agentConfig, serverVersion):
if self._last_state != state:
self._last_state = state
return self.create_event(state, agentConfig, serverVersion)

def create_event(self, state, agentConfig, serverVersion):
"""Create an event with a message describing the replication
state of a mongo node"""

def get_state_description(state):
if state == 0: return 'Starting Up'
elif state == 1: return 'Primary'
elif state == 2: return 'Secondary'
elif state == 3: return 'Recovering'
elif state == 4: return 'Fatal'
elif state == 5: return 'Starting up (forking threads)'
elif state == 6: return 'Unknown'
elif state == 7: return 'Arbiter'
elif state == 8: return 'Down'
elif state == 9: return 'Rollback'

return { 'timestamp': int(time.mktime(datetime.now().timetuple())),
'event_type': 'Mongo',
'host': gethostname(agentConfig),
'api_key': agentConfig['api_key'],
'version': serverVersion,
'state': get_state_description(state) }

def check(self, instance):
"""
Returns a dictionary that looks a lot like what's sent back by db.serverStatus()
"""
if 'mongodb_server' not in instance:
self.log.warn("Missing 'mongodb_server' in mongo config")
return

tags = instance.get('tags', [])

from pymongo import Connection

This comment has been minimized.

Copy link
@remh

remh Mar 7, 2013

Can you put that import in a try, except block similar to this one:
/~https://github.com/DataDog/dd-agent/blob/master/checks.d/redisdb.py#L144-L148
So a better error would be displayed in case of failing import

try:
from pymongo import uri_parser
# Configuration a URL, mongodb://user:pass@server/db
parsed = uri_parser.parse_uri(instance['mongodb_server'])
except ImportError:
# uri_parser is pymongo 2.0+
matches = mongo_uri_re.match(instance['mongodb_server'])
if matches:
parsed = matches.groupdict()
else:
parsed = {}
username = parsed.get('username')
password = parsed.get('password')

do_auth = True
if username is None or password is None:
do_auth = False

conn = Connection(instance['mongodb_server'])
db = conn['admin']

status = db.command('serverStatus') # Shorthand for {'serverStatus': 1}
status['stats'] = db.command('dbstats')

results = {}

# Handle replica data, if any
# See http://www.mongodb.org/display/DOCS/Replica+Set+Commands#ReplicaSetCommands-replSetGetStatus
try:
data = {}

replSet = db.command('replSetGetStatus')
serverVersion = conn.server_info()['version']
if replSet:
primary = None
current = None

# find nodes: master and current node (ourself)
for member in replSet.get('members'):
if member.get('self'):
current = member
if int(member.get('state')) == 1:
primary = member

# If we have both we can compute a lag time
if current is not None and primary is not None:
lag = current['optimeDate'] - primary['optimeDate']
# Python 2.7 has this built in, python < 2.7 don't...
if hasattr(lag,'total_seconds'):
data['replicationLag'] = lag.total_seconds()
else:
data['replicationLag'] = (lag.microseconds + \
(lag.seconds + lag.days * 24 * 3600) * 10**6) / 10.0**6

if current is not None:
data['health'] = current['health']

data['state'] = replSet['myState']
event = self.checkLastState(data['state'], self.agentConfig, serverVersion)
if event is not None:
results['events'] = {'Mongo': [event]}
status['replSet'] = data
except:

This comment has been minimized.

Copy link
@clutchski

clutchski Mar 7, 2013

Contributor

This needs to be except Exception. Otherwise it will catch interrupts, etc. Can we also try to ensure we're only catching repllica errors and not others? Can we be more judicious about exactly is unexpected (maybe just the replSetGetStatus part)

This comment has been minimized.

Copy link
@pcockwell

pcockwell Mar 7, 2013

Author Contributor

Yea I can change that to look for replSetGetStatus errors only (aka no replicates) and only pass those and bubble the rest up.

pass

# If these keys exist, remove them for now as they cannot be serialized
try:
status['backgroundFlushing'].pop('last_finished')
except KeyError:
pass
try:
status.pop('localTime')
except KeyError:
pass

# Flatten the metrics first
# Collect samples
# Send a dictionary back

This comment has been minimized.

Copy link
@clutchski

clutchski Mar 7, 2013

Contributor

this comment is out of date now.

This comment has been minimized.

Copy link
@pcockwell

pcockwell Mar 7, 2013

Author Contributor

I will go through and re-comment things.


for m in self.METRICS:
# each metric is of the form: x.y.z with z optional
# and can be found at status[x][y][z]
value = status
try:
for c in m.split("."):
value = value[c]
except KeyError:
continue

# value is now status[x][y][z]
assert type(value) in (types.IntType, types.LongType, types.FloatType)

if m in self.GAUGES:
self.gauge(m, value, tags=tags)

if m in self.RATES:
self.rate(m, value, tags=tags)

This comment has been minimized.

Copy link
@remh

remh Mar 7, 2013

Can you add a parse_agent_config method here similar to:
/~https://github.com/DataDog/dd-agent/blob/master/checks.d/haproxy.py#L260-L271

Then if the configuration is set in datadog.conf the check would still work.

if __name__ == "__main__":
check, instances = MongoDb.from_yaml('conf.d/mongo.yaml')
for instance in instances:
check.check(instance)
print check.get_metrics()
10 changes: 0 additions & 10 deletions checks/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from checks.nagios import Nagios
from checks.build import Hudson
from checks.db.mysql import MySql
from checks.db.mongo import MongoDb
from checks.db.couch import CouchDb
from checks.db.mcache import Memcache
from checks.queue import RabbitMq
Expand Down Expand Up @@ -80,7 +79,6 @@ def __init__(self, agentConfig, emitters, systemStats):

# Old-style metric checks
self._couchdb = CouchDb(log)
self._mongodb = MongoDb(log)
self._mysql = MySql(log)
self._rabbitmq = RabbitMq()
self._ganglia = Ganglia(log)
Expand Down Expand Up @@ -200,7 +198,6 @@ def run(self, checksd=None):
# Run old-style checks
mysqlStatus = self._mysql.check(self.agentConfig)
rabbitmq = self._rabbitmq.check(log, self.agentConfig)
mongodb = self._mongodb.check(self.agentConfig)
couchdb = self._couchdb.check(self.agentConfig)
gangliaData = self._ganglia.check(self.agentConfig)
cassandraData = self._cassandra.check(log, self.agentConfig)
Expand All @@ -220,13 +217,6 @@ def run(self, checksd=None):
# RabbitMQ
if rabbitmq:
payload['rabbitMQ'] = rabbitmq

# MongoDB
if mongodb:
if mongodb.has_key('events'):
events['Mongo'] = mongodb['events']['Mongo']
del mongodb['events']
payload['mongoDB'] = mongodb

# CouchDB
if couchdb:
Expand Down
Loading

0 comments on commit e0412d7

Please sign in to comment.