Add more check conditions for unhealthy workers and make the messaging better.

This commit is contained in:
Joseph Schorr 2014-08-26 12:41:43 -04:00
parent 67905c277e
commit 510bbe7889
2 changed files with 7 additions and 5 deletions

View file

@ -41,12 +41,13 @@ def matches_system_error(status_str):
""" Returns true if the given status string matches a known system error in the
Docker builder.
"""
KNOWN_MATCHES = ['lxc-start: invalid', 'lxc-start: failed to', 'lxc-start: Permission denied']
KNOWN_MATCHES = ['lxc-start: invalid', 'lxc-start: failed to', 'lxc-start: Permission denied',
'lxc-start: The container failed']
for match in KNOWN_MATCHES:
# 4 because we might have a Unix control code at the start.
found = status_str.find(match[0:len(match) + 4])
if found >= 0 and found <= 4:
# 10 because we might have a Unix control code at the start.
found = status_str.find(match[0:len(match) + 10])
if found >= 0 and found <= 10:
return True
return False
@ -613,6 +614,7 @@ class DockerfileBuildWorker(Worker):
except WorkerUnhealthyException as exc:
# Spawn a notification that the build has failed.
log_appender('Worker has become unhealthy. Will retry shortly.', build_logs.ERROR)
spawn_failure(exc.message, event_data)
# Raise the exception to the queue.

View file

@ -135,8 +135,8 @@ class Worker(object):
except WorkerUnhealthyException:
logger.error('The worker has encountered an error and will not take new jobs. Job is being requeued.')
self._stop.set()
self.mark_current_incomplete(restore_retry=True)
self._stop.set()
finally:
# Close the db handle periodically