Move aggregator into its own repo and add it to the image
This commit is contained in:
parent
713ba3abaf
commit
a1009af61c
14 changed files with 38 additions and 370 deletions
122
util/metrics/metricqueue.py
Normal file
122
util/metrics/metricqueue.py
Normal file
|
@ -0,0 +1,122 @@
|
|||
import datetime
|
||||
import logging
|
||||
import time
|
||||
|
||||
from functools import wraps
|
||||
from Queue import Queue, Full
|
||||
|
||||
from flask import g, request
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Buckets for the API response times.
|
||||
API_RESPONSE_TIME_BUCKETS = [.01, .025, .05, .1, .25, .5, 1.0, 2.5, 5.0]
|
||||
|
||||
|
||||
class MetricQueue(object):
|
||||
""" Object to which various metrics are written, for distribution to metrics collection
|
||||
system(s) such Prometheus.
|
||||
"""
|
||||
def __init__(self, prom):
|
||||
# Define the various exported metrics.
|
||||
self.resp_time = prom.create_histogram('response_time', 'HTTP response time in seconds',
|
||||
labelnames=['endpoint'],
|
||||
buckets=API_RESPONSE_TIME_BUCKETS)
|
||||
self.resp_code = prom.create_counter('response_code', 'HTTP response code',
|
||||
labelnames=['endpoint', 'code'])
|
||||
self.non_200 = prom.create_counter('response_non200', 'Non-200 HTTP response codes',
|
||||
labelnames=['endpoint'])
|
||||
self.multipart_upload_start = prom.create_counter('multipart_upload_start',
|
||||
'Multipart upload startse')
|
||||
self.multipart_upload_end = prom.create_counter('self._metric_queue.multipart_upload_end',
|
||||
'Multipart upload ends.', labelnames=['type'])
|
||||
self.build_capacity_shortage = prom.create_gauge('build_capacity_shortage',
|
||||
'Build capacity shortage.')
|
||||
self.percent_building = prom.create_gauge('build_percent_building', 'Percent building.')
|
||||
self.build_counter = prom.create_counter('builds', 'Number of builds', labelnames=['name'])
|
||||
self.ephemeral_build_workers = prom.create_counter('ephemeral_build_workers',
|
||||
'Number of started ephemeral build workers', labelnames=['name', 'build_uuid'])
|
||||
self.ephemeral_build_worker_failure = prom.create_counter('ephemeral_build_worker_failure',
|
||||
'Number of failed-to-start ephemeral build workers', labelnames=['build_uuid'])
|
||||
|
||||
self.work_queue_running = prom.create_gauge('work_queue_running', 'Running items in a queue',
|
||||
labelnames=['queue_name'])
|
||||
self.work_queue_available = prom.create_gauge('work_queue_available',
|
||||
'Available items in a queue',
|
||||
labelnames=['queue_name'])
|
||||
|
||||
# Deprecated: Define an in-memory queue for reporting metrics to CloudWatch or another
|
||||
# provider.
|
||||
self._queue = None
|
||||
|
||||
def enable_deprecated(self, maxsize=10000):
|
||||
self._queue = Queue(maxsize)
|
||||
|
||||
def put_deprecated(self, name, value, **kwargs):
|
||||
if self._queue is None:
|
||||
logger.debug('No metric queue %s %s %s', name, value, kwargs)
|
||||
return
|
||||
|
||||
try:
|
||||
kwargs.setdefault('timestamp', datetime.datetime.now())
|
||||
kwargs.setdefault('dimensions', {})
|
||||
self._queue.put_nowait((name, value, kwargs))
|
||||
except Full:
|
||||
logger.error('Metric queue full')
|
||||
|
||||
def get_deprecated(self):
|
||||
return self._queue.get()
|
||||
|
||||
def get_nowait_deprecated(self):
|
||||
return self._queue.get_nowait()
|
||||
|
||||
|
||||
def time_decorator(name, metric_queue):
|
||||
""" Decorates an endpoint method to have its request time logged to the metrics queue. """
|
||||
after = _time_after_request(name, metric_queue)
|
||||
def decorator(func):
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
_time_before_request()
|
||||
rv = func(*args, **kwargs)
|
||||
after(rv)
|
||||
return rv
|
||||
return wrapper
|
||||
return decorator
|
||||
|
||||
|
||||
def time_blueprint(bp, metric_queue):
|
||||
""" Decorates a blueprint to have its request time logged to the metrics queue. """
|
||||
bp.before_request(_time_before_request)
|
||||
bp.after_request(_time_after_request(bp.name, metric_queue))
|
||||
|
||||
|
||||
def _time_before_request():
|
||||
g._request_start_time = time.time()
|
||||
|
||||
|
||||
def _time_after_request(name, metric_queue):
|
||||
def f(r):
|
||||
start = getattr(g, '_request_start_time', None)
|
||||
if start is None:
|
||||
return r
|
||||
|
||||
dur = time.time() - start
|
||||
dims = {'endpoint': request.endpoint}
|
||||
|
||||
metric_queue.put_deprecated('ResponseTime', dur, dimensions=dims, unit='Seconds')
|
||||
metric_queue.put_deprecated('ResponseCode', r.status_code, dimensions=dims)
|
||||
|
||||
metric_queue.resp_time.Observe(dur, labelvalues=[request.endpoint])
|
||||
metric_queue.resp_code.Inc(labelvalues=[request.endpoint, r.status_code])
|
||||
|
||||
if r.status_code >= 500:
|
||||
metric_queue.put_deprecated('5XXResponse', 1, dimensions={'name': name})
|
||||
elif r.status_code < 200 or r.status_code >= 300:
|
||||
metric_queue.put_deprecated('Non200Response', 1, dimensions={'name': name})
|
||||
metric_queue.non_200.Inc(labelvalues=[request.endpoint])
|
||||
|
||||
return r
|
||||
return f
|
||||
|
Reference in a new issue