- Have the heartbeat fail to update if the worker has timed out

- Add additional build component logging for tracking down problems in the future
This commit is contained in:
Joseph Schorr 2015-05-22 15:24:14 -04:00
parent dfc2df1885
commit 5589bfc6d5

View file

@ -55,6 +55,7 @@ class BuildComponent(BaseComponent):
def onConnect(self):
self.join(self.builder_realm)
@trollius.coroutine
def onJoin(self, details):
logger.debug('Registering methods and listeners for component %s', self.builder_realm)
yield trollius.From(self.register(self._on_ready, u'io.quay.buildworker.ready'))
@ -277,6 +278,9 @@ class BuildComponent(BaseComponent):
# Send the notification that the build has completed successfully.
self._current_job.send_notification('build_success', image_id=kwargs.get('image_id'))
except ApplicationError as aex:
build_id = self._current_job.repo_build.uuid
logger.exception('Got remote exception for build: %s', build_id)
worker_error = WorkerError(aex.error, aex.kwargs.get('base_error'))
# Write the error to the log.
@ -310,6 +314,7 @@ class BuildComponent(BaseComponent):
@trollius.coroutine
def _on_ready(self, token, version):
logger.debug('On ready called (token "%s")', token)
self._worker_version = version
if not version in SUPPORTED_WORKER_VERSIONS:
@ -343,6 +348,10 @@ class BuildComponent(BaseComponent):
def _on_heartbeat(self):
""" Updates the last known heartbeat. """
if not self._current_job or self._component_status == ComponentStatus.TIMED_OUT:
return
logger.debug('Got heartbeat for build %s', self._current_job.repo_build.uuid)
self._last_heartbeat = datetime.datetime.utcnow()
@trollius.coroutine
@ -374,9 +383,15 @@ class BuildComponent(BaseComponent):
logger.debug('Checking heartbeat on realm %s', self.builder_realm)
if (self._last_heartbeat and
self._last_heartbeat < datetime.datetime.utcnow() - HEARTBEAT_DELTA):
logger.debug('Heartbeat on realm %s has expired: %s', self.builder_realm,
self._last_heartbeat)
yield trollius.From(self._timeout())
raise trollius.Return()
logger.debug('Heartbeat on realm %s is valid: %s.', self.builder_realm,
self._last_heartbeat)
yield trollius.From(trollius.sleep(HEARTBEAT_TIMEOUT))
@trollius.coroutine