2016-02-01 20:07:46 +00:00
|
|
|
import datetime
|
|
|
|
import json
|
|
|
|
import logging
|
|
|
|
|
|
|
|
from Queue import Queue, Full, Empty
|
|
|
|
from threading import Thread
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
QUEUE_MAX = 1000
|
|
|
|
MAX_BATCH_SIZE = 100
|
|
|
|
REGISTER_WAIT = datetime.timedelta(hours=1)
|
|
|
|
|
2016-06-28 18:36:17 +00:00
|
|
|
class PrometheusPlugin(object):
|
|
|
|
""" Application plugin for reporting metrics to Prometheus. """
|
|
|
|
def __init__(self, app=None):
|
|
|
|
self.app = app
|
|
|
|
if app is not None:
|
|
|
|
self.state = self.init_app(app)
|
|
|
|
else:
|
|
|
|
self.state = None
|
2016-02-01 20:07:46 +00:00
|
|
|
|
2016-06-28 18:36:17 +00:00
|
|
|
def init_app(self, app):
|
|
|
|
prom_url = app.config.get('PROMETHEUS_AGGREGATOR_URL')
|
2016-10-05 07:33:35 +00:00
|
|
|
prom_namespace = app.config.get('PROMETHEUS_NAMESPACE')
|
2016-06-28 18:36:17 +00:00
|
|
|
logger.debug('Initializing prometheus with aggregator url: %s', prom_url)
|
2016-10-05 07:33:35 +00:00
|
|
|
prometheus = Prometheus(prom_url, prom_namespace)
|
2016-02-01 20:07:46 +00:00
|
|
|
|
2016-06-28 18:36:17 +00:00
|
|
|
# register extension with app
|
|
|
|
app.extensions = getattr(app, 'extensions', {})
|
|
|
|
app.extensions['prometheus'] = prometheus
|
|
|
|
return prometheus
|
2016-02-01 20:07:46 +00:00
|
|
|
|
2016-06-28 18:36:17 +00:00
|
|
|
def __getattr__(self, name):
|
|
|
|
return getattr(self.state, name, None)
|
2016-02-01 20:07:46 +00:00
|
|
|
|
|
|
|
|
|
|
|
class Prometheus(object):
|
2016-06-28 18:36:17 +00:00
|
|
|
""" Aggregator for collecting stats that are reported to Prometheus. """
|
2016-10-05 07:33:35 +00:00
|
|
|
def __init__(self, url=None, namespace=None):
|
2016-07-01 18:16:15 +00:00
|
|
|
self._metric_collectors = []
|
2016-02-01 20:07:46 +00:00
|
|
|
self._url = url
|
2016-10-05 07:33:35 +00:00
|
|
|
self._namespace = namespace or ''
|
2016-02-01 20:07:46 +00:00
|
|
|
|
|
|
|
if url is not None:
|
|
|
|
self._queue = Queue(QUEUE_MAX)
|
2016-07-01 18:16:15 +00:00
|
|
|
self._sender = _QueueSender(self._queue, url, self._metric_collectors)
|
2016-02-01 20:07:46 +00:00
|
|
|
self._sender.start()
|
|
|
|
logger.debug('Prometheus aggregator sending to %s', url)
|
|
|
|
else:
|
|
|
|
self._queue = None
|
|
|
|
logger.debug('Prometheus aggregator disabled')
|
|
|
|
|
|
|
|
def enqueue(self, call, data):
|
|
|
|
if not self._queue:
|
|
|
|
return
|
|
|
|
|
|
|
|
v = json.dumps({
|
|
|
|
'Call': call,
|
|
|
|
'Data': data,
|
|
|
|
})
|
2016-06-28 18:36:17 +00:00
|
|
|
|
2016-02-01 20:07:46 +00:00
|
|
|
if call == 'register':
|
2016-07-01 18:16:15 +00:00
|
|
|
self._metric_collectors.append(v)
|
2016-02-01 20:07:46 +00:00
|
|
|
return
|
2016-06-28 18:36:17 +00:00
|
|
|
|
2016-02-01 20:07:46 +00:00
|
|
|
try:
|
|
|
|
self._queue.put_nowait(v)
|
|
|
|
except Full:
|
|
|
|
# If the queue is full, it is because 1) no aggregator was enabled or 2)
|
|
|
|
# the aggregator is taking a long time to respond to requests. In the case
|
|
|
|
# of 1, it's probably enterprise mode and we don't care. In the case of 2,
|
2016-07-01 18:16:15 +00:00
|
|
|
# the response timeout error is printed inside the queue handler. In either case,
|
2016-02-01 20:07:46 +00:00
|
|
|
# we don't need to print an error here.
|
|
|
|
pass
|
|
|
|
|
|
|
|
def create_gauge(self, *args, **kwargs):
|
|
|
|
return self._create_collector('Gauge', args, kwargs)
|
|
|
|
|
|
|
|
def create_counter(self, *args, **kwargs):
|
|
|
|
return self._create_collector('Counter', args, kwargs)
|
|
|
|
|
|
|
|
def create_summary(self, *args, **kwargs):
|
|
|
|
return self._create_collector('Summary', args, kwargs)
|
|
|
|
|
|
|
|
def create_histogram(self, *args, **kwargs):
|
|
|
|
return self._create_collector('Histogram', args, kwargs)
|
|
|
|
|
|
|
|
def create_untyped(self, *args, **kwargs):
|
|
|
|
return self._create_collector('Untyped', args, kwargs)
|
|
|
|
|
2016-06-28 18:36:17 +00:00
|
|
|
def _create_collector(self, collector_type, args, kwargs):
|
2016-10-05 07:33:35 +00:00
|
|
|
kwargs['namespace'] = kwargs.get('namespace', self._namespace)
|
2016-06-28 18:36:17 +00:00
|
|
|
return _Collector(self.enqueue, collector_type, *args, **kwargs)
|
2016-02-01 20:07:46 +00:00
|
|
|
|
|
|
|
|
2016-06-28 18:36:17 +00:00
|
|
|
class _QueueSender(Thread):
|
|
|
|
""" Helper class which uses a thread to asynchronously send metrics to the local Prometheus
|
|
|
|
aggregator. """
|
2016-07-01 18:16:15 +00:00
|
|
|
def __init__(self, queue, url, metric_collectors):
|
2016-06-28 18:36:17 +00:00
|
|
|
Thread.__init__(self)
|
|
|
|
self.daemon = True
|
|
|
|
self.next_register = datetime.datetime.now()
|
|
|
|
self._queue = queue
|
|
|
|
self._url = url
|
2016-07-01 18:16:15 +00:00
|
|
|
self._metric_collectors = metric_collectors
|
2016-02-01 20:07:46 +00:00
|
|
|
|
2016-06-28 18:36:17 +00:00
|
|
|
def run(self):
|
|
|
|
while True:
|
|
|
|
reqs = []
|
|
|
|
reqs.append(self._queue.get())
|
2016-02-01 20:07:46 +00:00
|
|
|
|
2016-06-28 18:36:17 +00:00
|
|
|
while len(reqs) < MAX_BATCH_SIZE:
|
|
|
|
try:
|
|
|
|
req = self._queue.get_nowait()
|
|
|
|
reqs.append(req)
|
|
|
|
except Empty:
|
|
|
|
break
|
|
|
|
|
|
|
|
try:
|
|
|
|
resp = requests.post(self._url + '/call', '\n'.join(reqs))
|
|
|
|
if resp.status_code == 500 and self.next_register <= datetime.datetime.now():
|
2016-07-01 18:16:15 +00:00
|
|
|
resp = requests.post(self._url + '/call', '\n'.join(self._metric_collectors))
|
2016-06-28 18:36:17 +00:00
|
|
|
self.next_register = datetime.datetime.now() + REGISTER_WAIT
|
|
|
|
logger.debug('Register returned %s for %s metrics; setting next to %s', resp.status_code,
|
2016-07-01 18:16:15 +00:00
|
|
|
len(self._metric_collectors), self.next_register)
|
2016-06-28 18:36:17 +00:00
|
|
|
elif resp.status_code != 200:
|
|
|
|
logger.debug('Failed sending to prometheus: %s: %s: %s', resp.status_code, resp.text,
|
|
|
|
', '.join(reqs))
|
|
|
|
else:
|
|
|
|
logger.debug('Sent %d prometheus metrics', len(reqs))
|
|
|
|
except:
|
|
|
|
logger.exception('Failed to write to prometheus aggregator: %s', reqs)
|
2016-02-01 20:07:46 +00:00
|
|
|
|
|
|
|
|
|
|
|
class _Collector(object):
|
2016-06-28 18:36:17 +00:00
|
|
|
""" Collector for a Prometheus metric. """
|
2016-07-01 18:16:15 +00:00
|
|
|
def __init__(self, enqueue_method, collector_type, collector_name, collector_help,
|
|
|
|
namespace='', subsystem='', **kwargs):
|
2016-02-01 20:07:46 +00:00
|
|
|
self._enqueue_method = enqueue_method
|
|
|
|
self._base_args = {
|
2016-07-01 18:16:15 +00:00
|
|
|
'Name': collector_name,
|
2016-02-01 20:07:46 +00:00
|
|
|
'Namespace': namespace,
|
|
|
|
'Subsystem': subsystem,
|
2016-07-01 18:16:15 +00:00
|
|
|
'Type': collector_type,
|
2016-02-01 20:07:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
registration_params = dict(kwargs)
|
|
|
|
registration_params.update(self._base_args)
|
2016-07-01 18:16:15 +00:00
|
|
|
registration_params['Help'] = collector_help
|
2016-02-01 20:07:46 +00:00
|
|
|
|
|
|
|
self._enqueue_method('register', registration_params)
|
|
|
|
|
|
|
|
def __getattr__(self, method):
|
|
|
|
def f(value=0, labelvalues=()):
|
|
|
|
data = dict(self._base_args)
|
|
|
|
data.update({
|
|
|
|
'Value': value,
|
|
|
|
'LabelValues': [str(i) for i in labelvalues],
|
|
|
|
'Method': method,
|
|
|
|
})
|
2016-06-28 18:36:17 +00:00
|
|
|
|
2016-02-01 20:07:46 +00:00
|
|
|
self._enqueue_method('put', data)
|
2016-06-28 18:36:17 +00:00
|
|
|
|
2016-02-01 20:07:46 +00:00
|
|
|
return f
|