Add an ImageTree class and change to searching *all applicable* branches when looking for the best cache tag.

This commit is contained in:
Joseph Schorr 2015-02-10 21:46:58 -05:00
parent 98b4f62ef7
commit 893ae46dec
4 changed files with 228 additions and 20 deletions

View file

@ -4,6 +4,7 @@ import logging
from cachetools import lru_cache from cachetools import lru_cache
from endpoints.notificationhelper import spawn_notification from endpoints.notificationhelper import spawn_notification
from data import model from data import model
from util.imagetree import ImageTree
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -91,31 +92,31 @@ class BuildJob(object):
repo_namespace = repo_build.repository.namespace_user.username repo_namespace = repo_build.repository.namespace_user.username
repo_name = repo_build.repository.name repo_name = repo_build.repository.name
current_image = model.get_image(repo_build.repository, base_image_id) base_image = model.get_image(repo_build.repository, base_image_id)
next_image = None if base_image is None:
if current_image is None:
return None return None
# For each cache comment, find a child image that matches the command. # Build an in-memory tree of the full heirarchy of images in the repository.
for cache_command in cache_commands: all_images = model.get_repository_images(repo_namespace, repo_name)
full_command = '["/bin/sh", "-c", "%s"]' % cache_command all_tags = model.list_repository_tags(repo_namespace, repo_name)
next_image = model.find_child_image(repo_build.repository, current_image, full_command) tree = ImageTree(all_images, all_tags, base_filter=base_image.id)
if next_image is None:
break
current_image = next_image # Find a path in the tree, starting at the base image, that matches the cache comments
logger.debug('Found cached image %s for comment %s', current_image.id, full_command) # or some subset thereof.
def checker(step, image):
if step >= len(cache_commands):
return False
# Find a tag associated with the image, if any. full_command = '["/bin/sh", "-c", "%s"]' % cache_commands[step]
# TODO(jschorr): We should just return the image ID instead of a parent tag, OR we should return image.storage.comment == full_command
# make this more efficient.
for tag in model.list_repository_tags(repo_namespace, repo_name): path = tree.find_longest_path(base_image.id, checker)
tag_image = tag.image if not path:
ancestor_index = '/%s/' % current_image.id return None
if ancestor_index in tag_image.ancestors or tag_image.id == current_image.id:
return tag.name # Find any tag associated with the last image in the path.
return tree.tag_containing_images(path[-1])
return None
def _determine_cached_tag_by_tag(self): def _determine_cached_tag_by_tag(self):
""" Determines the cached tag by looking for one of the tags being built, and seeing if it """ Determines the cached tag by looking for one of the tags being built, and seeing if it

View file

@ -7,6 +7,7 @@ class StatusHandler(object):
def __init__(self, build_logs, repository_build_uuid): def __init__(self, build_logs, repository_build_uuid):
self._current_phase = None self._current_phase = None
self._current_command = None
self._uuid = repository_build_uuid self._uuid = repository_build_uuid
self._build_logs = build_logs self._build_logs = build_logs
@ -26,9 +27,16 @@ class StatusHandler(object):
self._build_logs.append_log_message(self._uuid, log_message, log_type, log_data) self._build_logs.append_log_message(self._uuid, log_message, log_type, log_data)
def append_log(self, log_message, extra_data=None): def append_log(self, log_message, extra_data=None):
if log_message is None:
return
self._append_log_message(log_message, log_data=extra_data) self._append_log_message(log_message, log_data=extra_data)
def set_command(self, command, extra_data=None): def set_command(self, command, extra_data=None):
if self._current_command == command:
return
self._current_command = command
self._append_log_message(command, self._build_logs.COMMAND, extra_data) self._append_log_message(command, self._build_logs.COMMAND, extra_data)
def set_error(self, error_message, extra_data=None, internal_error=False): def set_error(self, error_message, extra_data=None, internal_error=False):

96
test/test_imagetree.py Normal file
View file

@ -0,0 +1,96 @@
import unittest
from app import app
from util.imagetree import ImageTree
from initdb import setup_database_for_testing, finished_database_for_testing
from data import model
NAMESPACE = 'devtable'
SIMPLE_REPO = 'simple'
COMPLEX_REPO = 'complex'
class TestImageTree(unittest.TestCase):
def setUp(self):
setup_database_for_testing(self)
self.app = app.test_client()
self.ctx = app.test_request_context()
self.ctx.__enter__()
def tearDown(self):
finished_database_for_testing(self)
self.ctx.__exit__(True, None, None)
def _get_base_image(self, all_images):
for image in all_images:
if image.ancestors == '/':
return image
return None
def test_longest_path_simple_repo(self):
all_images = list(model.get_repository_images(NAMESPACE, SIMPLE_REPO))
all_tags = list(model.list_repository_tags(NAMESPACE, SIMPLE_REPO))
tree = ImageTree(all_images, all_tags)
base_image = self._get_base_image(all_images)
tag_image = all_tags[0].image
def checker(index, image):
return True
ancestors = tag_image.ancestors.split('/')[2:-1] # Skip the first image.
result = tree.find_longest_path(base_image.id, checker)
self.assertEquals(3, len(result))
for index in range(0, 2):
self.assertEquals(int(ancestors[index]), result[index].id)
self.assertEquals('latest', tree.tag_containing_image(result[-1]))
def test_longest_path_complex_repo(self):
all_images = list(model.get_repository_images(NAMESPACE, COMPLEX_REPO))
all_tags = list(model.list_repository_tags(NAMESPACE, COMPLEX_REPO))
tree = ImageTree(all_images, all_tags)
base_image = self._get_base_image(all_images)
def checker(index, image):
return True
result = tree.find_longest_path(base_image.id, checker)
self.assertEquals(4, len(result))
self.assertEquals('v2.0', tree.tag_containing_image(result[-1]))
def test_filtering(self):
all_images = list(model.get_repository_images(NAMESPACE, COMPLEX_REPO))
all_tags = list(model.list_repository_tags(NAMESPACE, COMPLEX_REPO))
tree = ImageTree(all_images, all_tags, parent_filter=1245)
base_image = self._get_base_image(all_images)
def checker(index, image):
return True
result = tree.find_longest_path(base_image.id, checker)
self.assertEquals(0, len(result))
def test_find_tag_parent_image(self):
all_images = list(model.get_repository_images(NAMESPACE, COMPLEX_REPO))
all_tags = list(model.list_repository_tags(NAMESPACE, COMPLEX_REPO))
tree = ImageTree(all_images, all_tags)
base_image = self._get_base_image(all_images)
def checker(index, image):
return True
result = tree.find_longest_path(base_image.id, checker)
self.assertEquals(4, len(result))
# Only use the first two images. They don't have tags, but the method should
# still return the tag that contains them.
self.assertEquals('v2.0', tree.tag_containing_image(result[0]))
if __name__ == '__main__':
unittest.main()

103
util/imagetree.py Normal file
View file

@ -0,0 +1,103 @@
class ImageTreeNode(object):
""" A node in the image tree. """
def __init__(self, image):
self.image = image
self.parent = None
self.children = []
self.tags = []
def add_child(self, child):
self.children.append(child)
child.parent = self
def add_tag(self, tag):
self.tags.append(tag)
class ImageTree(object):
""" In-memory tree for easy traversal and lookup of images in a repository. """
def __init__(self, all_images, all_tags, base_filter=None):
self._tag_map = {}
self._image_map = {}
self._build(all_images, all_tags, base_filter)
def _build(self, all_images, all_tags, base_filter=None):
# Build nodes for each of the images.
for image in all_images:
ancestors = image.ancestors.split('/')[1:-1]
# Filter any unneeded images.
if base_filter is not None:
if image.id != base_filter and not str(base_filter) in ancestors:
continue
self._image_map[image.id] = ImageTreeNode(image)
# Connect the nodes to their parents.
for image_node in self._image_map.values():
image = image_node.image
parent_image_id = image.ancestors.split('/')[-2] if image.ancestors else None
if not parent_image_id:
continue
parent_node = self._image_map.get(int(parent_image_id))
if parent_node is not None:
parent_node.add_child(image_node)
# Build the tag map.
for tag in all_tags:
image_node = self._image_map.get(tag.image.id)
if not image_node:
continue
self._tag_map = image_node
image_node.add_tag(tag.name)
def find_longest_path(self, image_id, checker):
""" Returns a list of images representing the longest path that matches the given
checker function, starting from the given image_id *exclusive*.
"""
start_node = self._image_map.get(image_id)
if not start_node:
return []
return self._find_longest_path(start_node, checker, -1)[1:]
def _find_longest_path(self, image_node, checker, index):
found_path = []
for child_node in image_node.children:
if not checker(index + 1, child_node.image):
continue
found = self._find_longest_path(child_node, checker, index + 1)
if found and len(found) > len(found_path):
found_path = found
return [image_node.image] + found_path
def tag_containing_image(self, image):
""" Returns the name of the closest tag containing the given image. """
if not image:
return None
# Check the current image for a tag.
image_node = self._image_map.get(image.id)
if image_node is None:
return None
if image_node.tags:
return image_node.tags[0]
# Check any deriving images for a tag.
for child_node in image_node.children:
found = self.tag_containing_image(child_node.image)
if found is not None:
return found
return None