diff --git a/workers/dockerfilebuild.py b/workers/dockerfilebuild.py index a4de1cc47..9d3e92ae7 100644 --- a/workers/dockerfilebuild.py +++ b/workers/dockerfilebuild.py @@ -41,12 +41,13 @@ def matches_system_error(status_str): """ Returns true if the given status string matches a known system error in the Docker builder. """ - KNOWN_MATCHES = ['lxc-start: invalid', 'lxc-start: failed to', 'lxc-start: Permission denied'] + KNOWN_MATCHES = ['lxc-start: invalid', 'lxc-start: failed to', 'lxc-start: Permission denied', + 'lxc-start: The container failed'] for match in KNOWN_MATCHES: - # 4 because we might have a Unix control code at the start. - found = status_str.find(match[0:len(match) + 4]) - if found >= 0 and found <= 4: + # 10 because we might have a Unix control code at the start. + found = status_str.find(match[0:len(match) + 10]) + if found >= 0 and found <= 10: return True return False @@ -613,6 +614,7 @@ class DockerfileBuildWorker(Worker): except WorkerUnhealthyException as exc: # Spawn a notification that the build has failed. + log_appender('Worker has become unhealthy. Will retry shortly.', build_logs.ERROR) spawn_failure(exc.message, event_data) # Raise the exception to the queue. diff --git a/workers/worker.py b/workers/worker.py index e7750c232..c29d10f41 100644 --- a/workers/worker.py +++ b/workers/worker.py @@ -135,8 +135,8 @@ class Worker(object): except WorkerUnhealthyException: logger.error('The worker has encountered an error and will not take new jobs. Job is being requeued.') - self._stop.set() self.mark_current_incomplete(restore_retry=True) + self._stop.set() finally: # Close the db handle periodically