From 1571b2867abc0c98761577aab5ede10c3be1cd99 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Fri, 16 Sep 2016 16:26:04 -0400 Subject: [PATCH] Add executor name to the build metric --- buildman/manager/ephemeral.py | 8 ++++++-- buildman/server.py | 8 ++++---- util/metrics/metricqueue.py | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/buildman/manager/ephemeral.py b/buildman/manager/ephemeral.py index c98cc7ef1..db63e81a3 100644 --- a/buildman/manager/ephemeral.py +++ b/buildman/manager/ephemeral.py @@ -182,6 +182,7 @@ class EphemeralBuilderManager(BaseManager): self._build_uuid_to_info.pop(build_job.build_uuid, None) raise Return() + executor_name = build_info.executor_name execution_id = build_info.execution_id # If we have not yet received a heartbeat, then the node failed to boot in some way. We mark @@ -194,7 +195,7 @@ class EphemeralBuilderManager(BaseManager): got_lock = yield From(self._take_etcd_atomic_lock('job-expired', build_job.build_uuid)) if got_lock: logger.warning('Marking job %s as incomplete', build_job.build_uuid) - self.job_complete_callback(build_job, BuildJobResult.INCOMPLETE) + self.job_complete_callback(build_job, BuildJobResult.INCOMPLETE, executor_name) # Finally, we terminate the build execution for the job. We don't do this under a lock as # terminating a node is an atomic operation; better to make sure it is terminated than not. @@ -540,7 +541,10 @@ class EphemeralBuilderManager(BaseManager): build_job.build_uuid, job_status) # Mark the job as completed. - self.job_complete_callback(build_job, job_status) + build_info = self._build_uuid_to_info.get(build_job.build_uuid, None) + executor_name = build_info.executor_name if build_info else None + + self.job_complete_callback(build_job, job_status, executor_name) # Kill the ephmeral builder. yield From(self.kill_builder_executor(build_job.build_uuid)) diff --git a/buildman/server.py b/buildman/server.py index 87ceb60f1..5fdf13751 100644 --- a/buildman/server.py +++ b/buildman/server.py @@ -141,7 +141,7 @@ class BuilderServer(object): self._queue.extend_processing(build_job.job_item, seconds_from_now=JOB_TIMEOUT_SECONDS, minimum_extension=MINIMUM_JOB_EXTENSION) - def _job_complete(self, build_job, job_status): + def _job_complete(self, build_job, job_status, executor_name=None): if job_status == BuildJobResult.INCOMPLETE: self._queue.incomplete(build_job.job_item, restore_retry=False, retry_after=30) else: @@ -152,7 +152,7 @@ class BuilderServer(object): if self._current_status == BuildServerStatus.SHUTDOWN and not self._job_count: self._shutdown_event.set() - _report_completion_status(build_job, job_status) + _report_completion_status(build_job, job_status, executor_name) @trollius.coroutine def _work_checker(self): @@ -229,10 +229,10 @@ class BuilderServer(object): # Initialize the work queue checker. yield From(self._work_checker()) -def _report_completion_status(build_job, status): +def _report_completion_status(build_job, status, executor_name): metric_queue.build_counter.Inc(labelvalues=[status]) metric_queue.repository_build_completed.Inc(labelvalues=[build_job.namespace, build_job.repo_name, - status]) + status, executor_name or 'executor']) if status == BuildJobResult.COMPLETE: status_name = 'CompleteBuilds' elif status == BuildJobResult.ERROR: diff --git a/util/metrics/metricqueue.py b/util/metrics/metricqueue.py index bcee73bce..a9c8a87ad 100644 --- a/util/metrics/metricqueue.py +++ b/util/metrics/metricqueue.py @@ -55,7 +55,7 @@ class MetricQueue(object): self.repository_build_completed = prom.create_counter('repository_build_completed', 'Repository Build Complete Count', labelnames=['namespace', 'repo_name', - 'status']) + 'status', 'executor']) self.repository_count = prom.create_gauge('repository_count', 'Number of repositories') self.user_count = prom.create_gauge('user_count', 'Number of users')