Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mcache checksd fixes #390 #439

Merged
merged 23 commits into from
Apr 19, 2013
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
65a0d05
initial memcache to checks.d
pcockwell Apr 3, 2013
9db083d
Merge branch 'master' into mcache_checksd
pcockwell Apr 5, 2013
07d462a
memcache to checksd with tests working
pcockwell Apr 5, 2013
fde9796
removed old mcache check file
pcockwell Apr 5, 2013
ec0691b
removed memcache from collector.py
pcockwell Apr 5, 2013
ffebdb2
removing old memcache code
pcockwell Apr 5, 2013
9739581
uses exceptions and added test for bad hosts
pcockwell Apr 5, 2013
8496938
removed lots of unneeded debug lines
pcockwell Apr 5, 2013
ec1754b
fix in collector.py readding the self._metrics_checks variable
pcockwell Apr 8, 2013
a35de0e
read metrics to remove mem leak issue
pcockwell Apr 9, 2013
1419ec3
add sleep to make sure flush happens
pcockwell Apr 9, 2013
43f2eff
skip MemLeak test
pcockwell Apr 9, 2013
981d86f
fixed nitpicks and cleaned up a few tests
pcockwell Apr 9, 2013
4498b1f
closer look into garbage on travis
pcockwell Apr 10, 2013
a4bf6e9
removed some debug code - running travis test again
pcockwell Apr 10, 2013
afe0c3e
remove unneeded comments - MemLeak test no longer fails, fixes #438
pcockwell Apr 10, 2013
471a9c5
multi-tag support
pcockwell Apr 10, 2013
7eb4848
Merge branch 'master' of github.com:DataDog/dd-agent into mcache_checksd
pcockwell Apr 12, 2013
a85e523
rework to multi-tagging
pcockwell Apr 12, 2013
1158c3e
updated comments in example yaml file
pcockwell Apr 17, 2013
74bc9d0
remove memecached config from datadog.conf.example
pcockwell Apr 18, 2013
1c70afe
Merge branch 'master' of github.com:DataDog/dd-agent into mcache_checksd
pcockwell Apr 18, 2013
cb49da4
removed uneeded constructor and useless suffixes in rate dictionary
pcockwell Apr 19, 2013
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
236 changes: 114 additions & 122 deletions checks/db/mcache.py → checks.d/mcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,153 +54,106 @@
# http://www.couchbase.org/wiki/display/membase/Membase+Statistics
# /~https://github.com/membase/ep-engine/blob/master/docs/stats.org

class Memcache(Check):
class Memcache(AgentCheck):
DEFAULT_PORT = 11211

def __init__(self, logger):
Check.__init__(self, logger)
self.gauge("memcache.total_items")
self.gauge("memcache.curr_items")
self.gauge("memcache.limit_maxbytes")
self.gauge("memcache.uptime")
self.gauge("memcache.bytes")
self.gauge("memcache.curr_connections")
self.gauge("memcache.connection_structures")
self.gauge("memcache.threads")
self.gauge("memcache.pointer_size")

# these two are calculated from other metrics
self.gauge("memcache.get_hit_percent")
self.gauge("memcache.avg_item_size")
self.gauge("memcache.fill_percent")

self.counter("memcache.rusage_user_rate")
self.counter("memcache.rusage_system_rate")
self.counter("memcache.cmd_get_rate")
self.counter("memcache.cmd_set_rate")
self.counter("memcache.cmd_flush_rate")
self.counter("memcache.get_hits_rate")
self.counter("memcache.get_misses_rate")
self.counter("memcache.evictions_rate")
self.counter("memcache.bytes_read_rate")
self.counter("memcache.bytes_written_rate")
self.counter("memcache.total_connections_rate")

def _load_conf(self, agentConfig):

# Load the conf according to the old schema
memcache_url = agentConfig.get("memcache_server", None)
memcache_port = agentConfig.get("memcache_port", None)
memcache_urls = []
memcache_ports = []
tags = []
if memcache_url is not None:
memcache_urls.append(memcache_url)
memcache_ports.append(memcache_port)
tags.append(None)


# Load the conf according to the new schema
#memcache_instance_1: first_host:first_port:first_tag
#memcache_instance_2: second_host:second_port:second_tag
#memcache_instance_3: third_host:third_port:third_tag
index = 1
instance = agentConfig.get("memcache_instance_%s" % index, None)
while instance:
instance = instance.split(":")
memcache_urls.append(instance[0])
if len(instance)>1:
memcache_ports.append(instance[1])
else:
memcache_ports.append(self.DEFAULT_PORT)
if len(instance)==3:
tags.append(instance[2])
else:
tags.append(None)
index = index + 1
instance = agentConfig.get("memcache_instance_%s" % index, None)

return (memcache_urls, memcache_ports, tags)
GAUGES = [
"total_items",
"curr_items",
"limit_maxbytes",
"uptime",
"bytes",
"curr_connections",
"connection_structures",
"threads",
"pointer_size"
]

RATES = [
"rusage_user_rate",
"rusage_system_rate",
"cmd_get_rate",
"cmd_set_rate",
"cmd_flush_rate",
"get_hits_rate",
"get_misses_rate",
"evictions_rate",
"bytes_read_rate",
"bytes_written_rate",
"total_connections_rate"
]

def __init__(self, name, init_config, agentConfig):
AgentCheck.__init__(self, name, init_config, agentConfig)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't need a constructor if all you do is calling the super constructor.


def _get_metrics(self, server, port, tags, memcache):
mc = None # client
mc = None # client
try:
self.logger.debug("Connecting to %s:%s tags:%s", server, port, tags)
self.log.debug("Connecting to %s:%s tags:%s", server, port, tags)
mc = memcache.Client(["%s:%d" % (server, port)])
raw_stats = mc.get_stats()

assert len(raw_stats) == 1 and len(raw_stats[0]) == 2, "Malformed response: %s" % raw_stats
# Access the dict
stats = raw_stats[0][1]
for metric in stats:
self.logger.debug("Processing %s: %s", metric, stats[metric])
# Check if metric is a gauge or rate
if metric in self.GAUGES:
our_metric = self.normalize(metric.lower(), 'memcache')
self.gauge(our_metric, float(stats[metric]), tags=tags)

our_metric = "memcache." + metric
# Tweak the name if it's a counter so that we don't use the exact
# Tweak the name if it's a rate so that we don't use the exact
# same metric name as the memcache documentation
if self.is_counter(our_metric + "_rate"):
our_metric = our_metric + "_rate"

if self.is_metric(our_metric):
self.save_sample(our_metric, float(stats[metric]), tags=tags)
self.logger.debug("Saved %s: %s", our_metric, stats[metric])
except ValueError:
self.logger.exception("Cannot convert port value; check your configuration")
except CheckException:
self.logger.exception("Cannot save sampled data")
except Exception:
self.logger.exception("Cannot get data from memcache")
if metric + "_rate" in self.RATES:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should get rid of the _rate suffix in the dictionary as you don't use the value in the dictionary to create the metric name.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And obviously remove it in the RATES dictionary.

our_metric = self.normalize(metric.lower() + "_rate", 'memcache')
self.rate(our_metric, float(stats[metric]), tags=tags)

if mc is not None:
# calculate some metrics based on other metrics.
# stats should be present, but wrap in try/except
# and log an exception just in case.
try:
self.save_sample(
self.gauge(
"memcache.get_hit_percent",
100.0 * float(stats["get_hits"]) / float(stats["cmd_get"]),
tags=tags,
)
except ZeroDivisionError:
pass
except Exception:
self.logger.exception("Cannot calculate memcache.get_hit_percent for tags: %s", tags)

try:
self.save_sample(
self.gauge(
"memcache.fill_percent",
100.0 * float(stats["bytes"]) / float(stats["limit_maxbytes"]),
tags=tags,
)
except ZeroDivisionError:
pass
except Exception:
self.logger.exception("Cannot calculate memcache.fill_percent for tags: %s", tags)


try:
self.save_sample(
self.gauge(
"memcache.avg_item_size",
float(stats["bytes"]) / float(stats["curr_items"]),
tags=tags,
)
except ZeroDivisionError:
pass
except Exception:
self.logger.exception("Cannot calculate memcache.avg_item_size for tags: %s", tags)

except AssertionError:
raise Exception("Unable to retrieve stats from memcache instance: " + server + ":" + str(port) + ". Please check your configuration")

if mc is not None:
mc.disconnect_all()
self.logger.debug("Disconnected from memcached")
self.log.debug("Disconnected from memcached")
del mc

def check(self, agentConfig):
(memcache_urls, memcache_ports, tags) = self._load_conf(agentConfig)
if len(memcache_urls) == 0:
return False
try:
def check(self, instance):
server = instance.get('url', None)
if not server:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should raise an Exception here instead.

raise Exception("Missing or null 'url' in mcache config")

try:
import memcache
except ImportError:
self.logger.exception("Cannot import python-based memcache driver")
return False
raise Exception("Cannot import memcache module. Check the instructions to install this module at https://app.datadoghq.com/account/settings#integrations/mcache")

# Hacky monkeypatch to fix a memory leak in the memcache library.
# See /~https://github.com/DataDog/dd-agent/issues/278 for details.
Expand All @@ -209,23 +162,62 @@ def check(self, agentConfig):
except:
pass

for i in range(len(memcache_urls)):
server = memcache_urls[i]
if server is None:
continue
if memcache_ports[i] is None:
memcache_ports[i] = 11211
port = int(memcache_ports[i])

if tags[i] is not None:
tag = ["instance:%s" % tags[i]]
else:
tag=["instance:%s_%s" % (server, port)]

self._get_metrics(server, port, tag, memcache)

metrics = self.get_metrics()
self.logger.debug("Memcache samples: %s", metrics)
return metrics


port = int(instance.get('port', self.DEFAULT_PORT))

tags = instance.get('tag', None)
if tags:
tags = ["instance:%s" % tags]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're only allowing a single tag per instance here? Shouldn't we allow multiple tags?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can add that functionality. It wasn't in the previous check version. I assume the tags should each be of the 'instance:tag1', 'instance:tag2' format?

else:
tags = ["instance:%s_%s" % (server, port)]

self._get_metrics(server, port, tags, memcache)

@staticmethod
def parse_agent_config(agentConfig):
all_instances = []

# Load the conf according to the old schema
memcache_url = agentConfig.get("memcache_server", None)
memcache_port = agentConfig.get("memcache_port", Memcache.DEFAULT_PORT)
if memcache_url is not None:
instance = {
'url': memcache_url,
'port': memcache_port,
'tag': None
}
all_instances.append(instance)

# Load the conf according to the new schema
#memcache_instance_1: first_host:first_port:first_tag
#memcache_instance_2: second_host:second_port:second_tag
#memcache_instance_3: third_host:third_port:third_tag
index = 1
instance = agentConfig.get("memcache_instance_%s" % index, None)
while instance:
instance = instance.split(":")

url = instance[0]
port = Memcache.DEFAULT_PORT
tags = None

if len(instance) > 1:
port = instance[1]

if len(instance) == 3:
tags = instance[2]

all_instances.append({
'url': url,
'port': port,
'tag': tags
})

index = index + 1
instance = agentConfig.get("memcache_instance_%s" % index, None)

if len(all_instances) == 0:
return False

return {
'instances': all_instances
}
6 changes: 1 addition & 5 deletions checks/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import checks.system.win32 as w32
from checks.agent_metrics import CollectorMetrics
from checks.nagios import Nagios
from checks.db.mcache import Memcache
from checks.ganglia import Ganglia
from checks.cassandra import Cassandra
from checks.datadog import Dogstreams, DdForwarder
Expand Down Expand Up @@ -81,10 +80,7 @@ def __init__(self, agentConfig, emitters, systemStats):
# Agent Metrics
self._agent_metrics = CollectorMetrics(log)

# Metric Checks
self._metrics_checks = [
Memcache(log),
]
self._metrics_checks = []

# Custom metric checks
for module_spec in [s.strip() for s in self.agentConfig.get('custom_checks', '').split(',')]:
Expand Down
6 changes: 6 additions & 0 deletions conf.d/mcache.yaml.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
init_config:

instances:
# - url: localhost
# port: 11211
# tag: optional_tag
Loading