- Have the heartbeat fail to update if the worker has timed out
- Add additional build component logging for tracking down problems in the future
This commit is contained in:
parent
dfc2df1885
commit
5589bfc6d5
1 changed files with 15 additions and 0 deletions
|
@ -55,6 +55,7 @@ class BuildComponent(BaseComponent):
|
|||
def onConnect(self):
|
||||
self.join(self.builder_realm)
|
||||
|
||||
@trollius.coroutine
|
||||
def onJoin(self, details):
|
||||
logger.debug('Registering methods and listeners for component %s', self.builder_realm)
|
||||
yield trollius.From(self.register(self._on_ready, u'io.quay.buildworker.ready'))
|
||||
|
@ -277,6 +278,9 @@ class BuildComponent(BaseComponent):
|
|||
# Send the notification that the build has completed successfully.
|
||||
self._current_job.send_notification('build_success', image_id=kwargs.get('image_id'))
|
||||
except ApplicationError as aex:
|
||||
build_id = self._current_job.repo_build.uuid
|
||||
logger.exception('Got remote exception for build: %s', build_id)
|
||||
|
||||
worker_error = WorkerError(aex.error, aex.kwargs.get('base_error'))
|
||||
|
||||
# Write the error to the log.
|
||||
|
@ -310,6 +314,7 @@ class BuildComponent(BaseComponent):
|
|||
|
||||
@trollius.coroutine
|
||||
def _on_ready(self, token, version):
|
||||
logger.debug('On ready called (token "%s")', token)
|
||||
self._worker_version = version
|
||||
|
||||
if not version in SUPPORTED_WORKER_VERSIONS:
|
||||
|
@ -343,6 +348,10 @@ class BuildComponent(BaseComponent):
|
|||
|
||||
def _on_heartbeat(self):
|
||||
""" Updates the last known heartbeat. """
|
||||
if not self._current_job or self._component_status == ComponentStatus.TIMED_OUT:
|
||||
return
|
||||
|
||||
logger.debug('Got heartbeat for build %s', self._current_job.repo_build.uuid)
|
||||
self._last_heartbeat = datetime.datetime.utcnow()
|
||||
|
||||
@trollius.coroutine
|
||||
|
@ -374,9 +383,15 @@ class BuildComponent(BaseComponent):
|
|||
logger.debug('Checking heartbeat on realm %s', self.builder_realm)
|
||||
if (self._last_heartbeat and
|
||||
self._last_heartbeat < datetime.datetime.utcnow() - HEARTBEAT_DELTA):
|
||||
logger.debug('Heartbeat on realm %s has expired: %s', self.builder_realm,
|
||||
self._last_heartbeat)
|
||||
|
||||
yield trollius.From(self._timeout())
|
||||
raise trollius.Return()
|
||||
|
||||
logger.debug('Heartbeat on realm %s is valid: %s.', self.builder_realm,
|
||||
self._last_heartbeat)
|
||||
|
||||
yield trollius.From(trollius.sleep(HEARTBEAT_TIMEOUT))
|
||||
|
||||
@trollius.coroutine
|
||||
|
|
Reference in a new issue