Add more check conditions for unhealthy workers and make the messaging better.
This commit is contained in:
parent
67905c277e
commit
510bbe7889
2 changed files with 7 additions and 5 deletions
|
@ -41,12 +41,13 @@ def matches_system_error(status_str):
|
|||
""" Returns true if the given status string matches a known system error in the
|
||||
Docker builder.
|
||||
"""
|
||||
KNOWN_MATCHES = ['lxc-start: invalid', 'lxc-start: failed to', 'lxc-start: Permission denied']
|
||||
KNOWN_MATCHES = ['lxc-start: invalid', 'lxc-start: failed to', 'lxc-start: Permission denied',
|
||||
'lxc-start: The container failed']
|
||||
|
||||
for match in KNOWN_MATCHES:
|
||||
# 4 because we might have a Unix control code at the start.
|
||||
found = status_str.find(match[0:len(match) + 4])
|
||||
if found >= 0 and found <= 4:
|
||||
# 10 because we might have a Unix control code at the start.
|
||||
found = status_str.find(match[0:len(match) + 10])
|
||||
if found >= 0 and found <= 10:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
@ -613,6 +614,7 @@ class DockerfileBuildWorker(Worker):
|
|||
|
||||
except WorkerUnhealthyException as exc:
|
||||
# Spawn a notification that the build has failed.
|
||||
log_appender('Worker has become unhealthy. Will retry shortly.', build_logs.ERROR)
|
||||
spawn_failure(exc.message, event_data)
|
||||
|
||||
# Raise the exception to the queue.
|
||||
|
|
|
@ -135,8 +135,8 @@ class Worker(object):
|
|||
|
||||
except WorkerUnhealthyException:
|
||||
logger.error('The worker has encountered an error and will not take new jobs. Job is being requeued.')
|
||||
self._stop.set()
|
||||
self.mark_current_incomplete(restore_retry=True)
|
||||
self._stop.set()
|
||||
|
||||
finally:
|
||||
# Close the db handle periodically
|
||||
|
|
Reference in a new issue