Add a watchdog timer to the build worker to kill a build step that takes more than 20 minutes.
This commit is contained in:
parent
204fecc1f9
commit
b95d3ec329
2 changed files with 38 additions and 5 deletions
|
@ -12,6 +12,8 @@ from docker import Client, APIError
|
|||
from tempfile import TemporaryFile, mkdtemp
|
||||
from zipfile import ZipFile
|
||||
from functools import partial
|
||||
from datetime import datetime, timedelta
|
||||
from threading import Event
|
||||
|
||||
from data.queue import dockerfile_build_queue
|
||||
from data import model
|
||||
|
@ -31,6 +33,8 @@ logger = logging.getLogger(__name__)
|
|||
user_files = app.config['USERFILES']
|
||||
build_logs = app.config['BUILDLOGS']
|
||||
|
||||
TIMEOUT_PERIOD_MINUTES = 20
|
||||
|
||||
|
||||
class StatusWrapper(object):
|
||||
def __init__(self, build_uuid):
|
||||
|
@ -316,6 +320,8 @@ class DockerfileBuildWorker(Worker):
|
|||
'application/gzip': DockerfileBuildWorker.__prepare_tarball,
|
||||
}
|
||||
|
||||
self._timeout = Event()
|
||||
|
||||
@staticmethod
|
||||
def __prepare_zip(request_file):
|
||||
build_dir = mkdtemp(prefix='docker-build-')
|
||||
|
@ -347,7 +353,24 @@ class DockerfileBuildWorker(Worker):
|
|||
|
||||
return build_dir
|
||||
|
||||
def watchdog(self):
|
||||
logger.debug('Running build watchdog code.')
|
||||
|
||||
docker_cl = Client()
|
||||
|
||||
# Iterate the running containers and kill ones that have been running more than 20 minutes
|
||||
for container in docker_cl.containers():
|
||||
start_time = datetime.fromtimestamp(container[u'Created'])
|
||||
running_time = datetime.now() - start_time
|
||||
if running_time > timedelta(minutes=TIMEOUT_PERIOD_MINUTES):
|
||||
logger.warning('Container has been running too long: %s with command: %s',
|
||||
container[u'Id'], container[u'Command'])
|
||||
docker_cl.kill(container[u'Id'])
|
||||
self._timeout.set()
|
||||
|
||||
def process_queue_item(self, job_details):
|
||||
self._timeout.clear()
|
||||
|
||||
repository_build = model.get_repository_build(job_details['namespace'],
|
||||
job_details['repository'],
|
||||
job_details['build_uuid'])
|
||||
|
@ -370,7 +393,7 @@ class DockerfileBuildWorker(Worker):
|
|||
|
||||
start_msg = ('Starting job with resource url: %s repo: %s' % (resource_url,
|
||||
repo))
|
||||
log_appender(start_msg)
|
||||
logger.debug(start_msg)
|
||||
|
||||
docker_resource = requests.get(resource_url, stream=True)
|
||||
c_type = docker_resource.headers['content-type']
|
||||
|
@ -402,7 +425,11 @@ class DockerfileBuildWorker(Worker):
|
|||
log_appender('error', build_logs.PHASE)
|
||||
repository_build.phase = 'error'
|
||||
repository_build.save()
|
||||
log_appender('Unable to build dockerfile.', build_logs.ERROR)
|
||||
if self._timeout.is_set():
|
||||
log_appender('Build step was terminated after %s minutes.' % TIMEOUT_PERIOD_MINUTES,
|
||||
build_logs.ERROR)
|
||||
else:
|
||||
log_appender('Unable to build dockerfile.', build_logs.ERROR)
|
||||
return True
|
||||
|
||||
log_appender('pushing', build_logs.PHASE)
|
||||
|
|
Reference in a new issue