From de18236358c46a0c813e64cc7c5c52aee85da8f0 Mon Sep 17 00:00:00 2001 From: jakedt Date: Mon, 14 Apr 2014 15:21:05 -0400 Subject: [PATCH] Allow for caching of previous docker builds for 24 hours. --- workers/dockerfilebuild.py | 96 ++++++++++++++++++++++++++------------ 1 file changed, 66 insertions(+), 30 deletions(-) diff --git a/workers/dockerfilebuild.py b/workers/dockerfilebuild.py index 73c491d36..b357cd008 100644 --- a/workers/dockerfilebuild.py +++ b/workers/dockerfilebuild.py @@ -14,6 +14,7 @@ from zipfile import ZipFile from functools import partial from datetime import datetime, timedelta from threading import Event +from uuid import uuid4 from data.queue import dockerfile_build_queue from data import model @@ -34,6 +35,7 @@ user_files = app.config['USERFILES'] build_logs = app.config['BUILDLOGS'] TIMEOUT_PERIOD_MINUTES = 20 +CACHE_EXPIRATION_PERIOD_HOURS = 24 class StatusWrapper(object): @@ -95,6 +97,9 @@ class StreamingDockerClient(Client): class DockerfileBuildContext(object): + image_id_to_cache_time = {} + public_repos = set() + def __init__(self, build_context_dir, dockerfile_subdir, repo, tag_names, push_token, build_uuid, pull_credentials=None): self._build_dir = build_context_dir @@ -120,6 +125,7 @@ class DockerfileBuildContext(object): (self._repo, self._tag_names)) def __enter__(self): + self.__evict_expired_images() return self def __exit__(self, exc_type, value, traceback): @@ -261,7 +267,19 @@ class DockerfileBuildContext(object): raise RuntimeError(message) - def __cleanup(self): + def __is_repo_public(self, repo_name): + if repo_name in self.public_repos: + return True + + repo_url = 'https://index.docker.io/v1/repositories/%s/images' % repo_name + repo_info = requests.get(repo_url) + if repo_info.status_code / 100 == 2: + self.public_repos.add(repo_name) + return True + else: + return False + + def __cleanup_containers(self): # First clean up any containers that might be holding the images for running in self._build_cl.containers(quiet=True): logger.debug('Killing container: %s' % running['Id']) @@ -272,40 +290,58 @@ class DockerfileBuildContext(object): logger.debug('Removing container: %s' % container['Id']) self._build_cl.remove_container(container['Id']) - # Iterate all of the images and remove the ones that the public registry - # doesn't know about, this should preserve base images. - images_to_remove = set() - repos = set() + def __evict_expired_images(self): + self.__cleanup_containers() + + logger.debug('Cleaning images older than %s hours.', CACHE_EXPIRATION_PERIOD_HOURS) + now = datetime.now() + verify_removed = set() + for image in self._build_cl.images(): - images_to_remove.add(image['Id']) + image_id = image[u'Id'] + created = datetime.fromtimestamp(image[u'Created']) + + # If we don't have a cache time, use the created time (e.g. worker reboot) + cache_time = self.image_id_to_cache_time.get(image_id, created) + expiration = cache_time + timedelta(hours=CACHE_EXPIRATION_PERIOD_HOURS) + + if expiration < now: + logger.debug('Removing expired image: %s' % image_id) + verify_removed.add(image_id) + try: + self._build_cl.remove_image(image_id) + except APIError: + # Sometimes an upstream image removed this one + pass + + # Verify that our images were actually removed + for image in self._build_cl.images(): + if image['Id'] in verify_removed: + raise RuntimeError('Image was not removed: %s' % image['Id']) + + def __cleanup(self): + self.__cleanup_containers() + + # Iterate all of the images and rename the ones that aren't public. This should preserve + # base images and also allow the cache to function. + now = datetime.now() + for image in self._build_cl.images(): + image_id = image[u'Id'] + + if image_id not in self.image_id_to_cache_time: + logger.debug('Setting image %s cache time to %s', image_id, now) + self.image_id_to_cache_time[image_id] = now for tag in image['RepoTags']: tag_repo = tag.split(':')[0] if tag_repo != '': - repos.add(tag_repo) - - for repo in repos: - repo_url = 'https://index.docker.io/v1/repositories/%s/images' % repo - repo_info = requests.get(repo_url) - if repo_info.status_code / 100 == 2: - for repo_image in repo_info.json(): - if repo_image['id'] in images_to_remove: - logger.debug('Image was deemed public: %s' % repo_image['id']) - images_to_remove.remove(repo_image['id']) - - for to_remove in images_to_remove: - logger.debug('Removing private image: %s' % to_remove) - try: - self._build_cl.remove_image(to_remove) - except APIError: - # Sometimes an upstream image removed this one - pass - - # Verify that our images were actually removed - for image in self._build_cl.images(): - if image['Id'] in images_to_remove: - raise RuntimeError('Image was not removed: %s' % image['Id']) - + if self.__is_repo_public(tag_repo): + logger.debug('Repo was deemed public: %s', tag_repo) + else: + new_name = str(uuid4()) + logger.debug('Private repo tag being renamed %s -> %s', tag, new_name) + self._build_cl.tag(image_id, new_name) + self._build_cl.remove_image(tag) class DockerfileBuildWorker(Worker): def __init__(self, *vargs, **kwargs):