Add missing call to set_phase when a build doesn't start

This change fixes the build manager ephemeral executor to tell the overall build server to call set_phase when a build never starts. Before this change, we'd properly adjust the queue item, but not the repo build row or the logs, which is why users just saw "Preparing Build Node", with no indicating the node failed to start.

Fixes #1904
This commit is contained in:
Joseph Schorr 2016-09-30 14:54:49 +02:00
parent b4dd5ea4dd
commit f50bb8a1ce
5 changed files with 17 additions and 11 deletions

View file

@ -452,8 +452,8 @@ class BuildComponent(BaseComponent):
requeued=self._current_job.has_retries_remaining()) requeued=self._current_job.has_retries_remaining())
yield From(self.parent_manager.job_completed(self._current_job, yield From(self.parent_manager.job_completed(self._current_job,
BuildJobResult.INCOMPLETE, BuildJobResult.INCOMPLETE,
self)) self))
self._current_job = None self._current_job = None
# Unregister the current component so that it cannot be invoked again. # Unregister the current component so that it cannot be invoked again.

View file

@ -66,7 +66,6 @@ class StatusHandler(object):
repo_build = model.build.get_repository_build(self._uuid) repo_build = model.build.get_repository_build(self._uuid)
repo_build.phase = phase repo_build.phase = phase
repo_build.save() repo_build.save()
return True return True
def __enter__(self): def __enter__(self):

View file

@ -198,7 +198,8 @@ class EphemeralBuilderManager(BaseManager):
execution_id)) execution_id))
if got_lock: if got_lock:
logger.warning('Marking job %s as incomplete', build_job.build_uuid) logger.warning('Marking job %s as incomplete', build_job.build_uuid)
self.job_complete_callback(build_job, BuildJobResult.INCOMPLETE, executor_name) self.job_complete_callback(build_job, BuildJobResult.INCOMPLETE, executor_name,
update_phase=True)
# Finally, we terminate the build execution for the job. We don't do this under a lock as # Finally, we terminate the build execution for the job. We don't do this under a lock as
# terminating a node is an atomic operation; better to make sure it is terminated than not. # terminating a node is an atomic operation; better to make sure it is terminated than not.
@ -537,7 +538,7 @@ class EphemeralBuilderManager(BaseManager):
logger.debug('Sending build %s to newly ready component on realm %s', logger.debug('Sending build %s to newly ready component on realm %s',
job.build_uuid, build_component.builder_realm) job.build_uuid, build_component.builder_realm)
yield From(build_component.start_build(job)) yield From(build_component.start_build(job))
try: try:
# log start time to prometheus # log start time to prometheus
realm_data = yield From(self._etcd_client.read(self._etcd_realm_key(build_component.builder_realm))) realm_data = yield From(self._etcd_client.read(self._etcd_realm_key(build_component.builder_realm)))
@ -561,15 +562,15 @@ class EphemeralBuilderManager(BaseManager):
logger.debug('Calling job_completed for job %s with status: %s', logger.debug('Calling job_completed for job %s with status: %s',
build_job.build_uuid, job_status) build_job.build_uuid, job_status)
# Mark the job as completed. # Mark the job as completed. Since this is being invoked from the component, we don't need
# to ask for the phase to be updated as well.
build_info = self._build_uuid_to_info.get(build_job.build_uuid, None) build_info = self._build_uuid_to_info.get(build_job.build_uuid, None)
executor_name = build_info.executor_name if build_info else None executor_name = build_info.executor_name if build_info else None
self.job_complete_callback(build_job, job_status, executor_name, update_phase=False)
self.job_complete_callback(build_job, job_status, executor_name)
# Kill the ephemeral builder. # Kill the ephemeral builder.
yield From(self.kill_builder_executor(build_job.build_uuid)) yield From(self.kill_builder_executor(build_job.build_uuid))
try: try:
# log build time to prometheus # log build time to prometheus
realm_data = yield From(self._etcd_client.read(self._etcd_realm_key(build_component.builder_realm))) realm_data = yield From(self._etcd_client.read(self._etcd_realm_key(build_component.builder_realm)))

View file

@ -141,12 +141,16 @@ class BuilderServer(object):
self._queue.extend_processing(build_job.job_item, seconds_from_now=JOB_TIMEOUT_SECONDS, self._queue.extend_processing(build_job.job_item, seconds_from_now=JOB_TIMEOUT_SECONDS,
minimum_extension=MINIMUM_JOB_EXTENSION) minimum_extension=MINIMUM_JOB_EXTENSION)
def _job_complete(self, build_job, job_status, executor_name=None): def _job_complete(self, build_job, job_status, executor_name=None, update_phase=False):
if job_status == BuildJobResult.INCOMPLETE: if job_status == BuildJobResult.INCOMPLETE:
self._queue.incomplete(build_job.job_item, restore_retry=False, retry_after=30) self._queue.incomplete(build_job.job_item, restore_retry=False, retry_after=30)
else: else:
self._queue.complete(build_job.job_item) self._queue.complete(build_job.job_item)
if update_phase:
status_handler = StatusHandler(self._build_logs, build_job.repo_build.uuid)
status_handler.set_phase(job_status)
self._job_count = self._job_count - 1 self._job_count = self._job_count - 1
if self._current_status == BuildServerStatus.SHUTDOWN and not self._job_count: if self._current_status == BuildServerStatus.SHUTDOWN and not self._job_count:

View file

@ -384,8 +384,10 @@ class TestEphemeralLifecycle(EphemeralBuilderTestCase):
self.test_executor.stop_builder.assert_called_once_with('123') self.test_executor.stop_builder.assert_called_once_with('123')
self.assertEqual(self.test_executor.stop_builder.call_count, 1) self.assertEqual(self.test_executor.stop_builder.call_count, 1)
# Ensure the job was marked as incomplete, with an update_phase to True (so the DB record and
# logs are updated as well)
self.job_complete_callback.assert_called_once_with(ANY, BuildJobResult.INCOMPLETE, self.job_complete_callback.assert_called_once_with(ANY, BuildJobResult.INCOMPLETE,
'MockExecutor') 'MockExecutor', update_phase=True)
@async_test @async_test
def test_change_worker(self): def test_change_worker(self):