Make build workers report that they are unhealthy when we get an LXC error or a Docker connection issue
This commit is contained in:
parent
b12d63ce9a
commit
7e935f5a8c
2 changed files with 57 additions and 14 deletions
|
@ -96,6 +96,14 @@ class Worker(object):
|
|||
if self.current_queue_item is not None:
|
||||
self._queue.extend_processing(self.current_queue_item, seconds_from_now)
|
||||
|
||||
def run_watchdog(self):
|
||||
logger.debug('Running watchdog.')
|
||||
try:
|
||||
self.watchdog()
|
||||
except WorkerUnhealthyException:
|
||||
logger.error('The worker has encountered an error and will not take new jobs.')
|
||||
self._stop.set()
|
||||
|
||||
def poll_queue(self):
|
||||
logger.debug('Getting work item from queue.')
|
||||
|
||||
|
@ -112,7 +120,7 @@ class Worker(object):
|
|||
logger.warning('An error occurred processing request: %s', self.current_queue_item.body)
|
||||
self._queue.incomplete(self.current_queue_item)
|
||||
except WorkerUnhealthyException:
|
||||
logger.error('The worker has encountered an error and will not take new jobs.')
|
||||
logger.error('The worker has encountered an error and will not take new jobs. Job is being requeued.')
|
||||
self._stop.set()
|
||||
self._queue.incomplete(self.current_queue_item, restore_retry=True)
|
||||
finally:
|
||||
|
@ -147,7 +155,7 @@ class Worker(object):
|
|||
self._sched.add_interval_job(self.poll_queue, seconds=self._poll_period_seconds,
|
||||
start_date=soon)
|
||||
self._sched.add_interval_job(self.update_queue_metrics, seconds=60, start_date=soon)
|
||||
self._sched.add_interval_job(self.watchdog, seconds=self._watchdog_period_seconds)
|
||||
self._sched.add_interval_job(self.run_watchdog, seconds=self._watchdog_period_seconds)
|
||||
|
||||
signal.signal(signal.SIGTERM, self.terminate)
|
||||
signal.signal(signal.SIGINT, self.terminate)
|
||||
|
|
Reference in a new issue