Add duration metric collector decorator (#1885)
Track time-to-start for builders Track time-to-build for builders Track ec2 builder fallbacks Track build time
This commit is contained in:
parent
85d611e2fb
commit
832ee89923
5 changed files with 110 additions and 5 deletions
|
@ -4,6 +4,7 @@ import uuid
|
|||
import calendar
|
||||
import os.path
|
||||
import json
|
||||
import time
|
||||
|
||||
from collections import namedtuple
|
||||
from datetime import datetime, timedelta
|
||||
|
@ -385,7 +386,7 @@ class EphemeralBuilderManager(BaseManager):
|
|||
build_uuid = build_job.job_details['build_uuid']
|
||||
logger.debug('Calling schedule with job: %s', build_uuid)
|
||||
|
||||
# Check if there are worker slots avialable by checking the number of jobs in etcd
|
||||
# Check if there are worker slots available by checking the number of jobs in etcd
|
||||
allowed_worker_count = self._manager_config.get('ALLOWED_WORKER_COUNT', 1)
|
||||
try:
|
||||
active_jobs = yield From(self._etcd_client.read(self._etcd_job_prefix, recursive=True))
|
||||
|
@ -450,6 +451,7 @@ class EphemeralBuilderManager(BaseManager):
|
|||
|
||||
# Check if we can use this executor based on the retries remaining.
|
||||
if executor.minimum_retry_threshold > build_job.retries_remaining:
|
||||
metric_queue.builder_fallback.Inc()
|
||||
logger.debug('Job %s cannot use executor %s as it is below retry threshold %s (retry #%s)',
|
||||
build_uuid, executor.name, executor.minimum_retry_threshold,
|
||||
build_job.retries_remaining)
|
||||
|
@ -499,6 +501,7 @@ class EphemeralBuilderManager(BaseManager):
|
|||
'execution_id': execution_id,
|
||||
'executor_name': started_with_executor.name,
|
||||
'job_queue_item': build_job.job_item,
|
||||
'start_time': time.time(),
|
||||
})
|
||||
|
||||
try:
|
||||
|
@ -534,6 +537,14 @@ class EphemeralBuilderManager(BaseManager):
|
|||
logger.debug('Sending build %s to newly ready component on realm %s',
|
||||
job.build_uuid, build_component.builder_realm)
|
||||
yield From(build_component.start_build(job))
|
||||
|
||||
try:
|
||||
# log start time to prometheus
|
||||
realm_data = yield From(self._etcd_client.read(self._etcd_realm_key(build_component.builder_realm)))
|
||||
start_time = json.loads(realm_data.value)['start_time']
|
||||
metric_queue.builder_time_to_build(time.time() - start_time, labelvalues=[realm_data.executor_name])
|
||||
except (KeyError, etcd.EtcdKeyError):
|
||||
logger.warning('Could not read realm key %s', build_component.builder_realm)
|
||||
|
||||
try:
|
||||
# Clean up the bookkeeping for allowing any manager to take the job.
|
||||
|
@ -556,8 +567,16 @@ class EphemeralBuilderManager(BaseManager):
|
|||
|
||||
self.job_complete_callback(build_job, job_status, executor_name)
|
||||
|
||||
# Kill the ephmeral builder.
|
||||
# Kill the ephemeral builder.
|
||||
yield From(self.kill_builder_executor(build_job.build_uuid))
|
||||
|
||||
try:
|
||||
# log build time to prometheus
|
||||
realm_data = yield From(self._etcd_client.read(self._etcd_realm_key(build_component.builder_realm)))
|
||||
start_time = json.loads(realm_data.value)['start_time']
|
||||
metric_queue.build_time(time.time() - start_time, labelvalues=[realm_data.executor_name])
|
||||
except (KeyError, etcd.EtcdKeyError):
|
||||
logger.warning('Could not read realm key %s', build_component.builder_realm)
|
||||
|
||||
# Delete the build job from etcd.
|
||||
job_key = self._etcd_job_key(build_job)
|
||||
|
|
Reference in a new issue