Add an ImageTree class and change to searching *all applicable* branches when looking for the best cache tag.

This commit is contained in:
Joseph Schorr 2015-02-10 21:46:58 -05:00
parent 98b4f62ef7
commit 893ae46dec
4 changed files with 228 additions and 20 deletions

View file

@ -4,6 +4,7 @@ import logging
from cachetools import lru_cache
from endpoints.notificationhelper import spawn_notification
from data import model
from util.imagetree import ImageTree
logger = logging.getLogger(__name__)
@ -91,31 +92,31 @@ class BuildJob(object):
repo_namespace = repo_build.repository.namespace_user.username
repo_name = repo_build.repository.name
current_image = model.get_image(repo_build.repository, base_image_id)
next_image = None
if current_image is None:
base_image = model.get_image(repo_build.repository, base_image_id)
if base_image is None:
return None
# For each cache comment, find a child image that matches the command.
for cache_command in cache_commands:
full_command = '["/bin/sh", "-c", "%s"]' % cache_command
next_image = model.find_child_image(repo_build.repository, current_image, full_command)
if next_image is None:
break
# Build an in-memory tree of the full heirarchy of images in the repository.
all_images = model.get_repository_images(repo_namespace, repo_name)
all_tags = model.list_repository_tags(repo_namespace, repo_name)
tree = ImageTree(all_images, all_tags, base_filter=base_image.id)
current_image = next_image
logger.debug('Found cached image %s for comment %s', current_image.id, full_command)
# Find a path in the tree, starting at the base image, that matches the cache comments
# or some subset thereof.
def checker(step, image):
if step >= len(cache_commands):
return False
# Find a tag associated with the image, if any.
# TODO(jschorr): We should just return the image ID instead of a parent tag, OR we should
# make this more efficient.
for tag in model.list_repository_tags(repo_namespace, repo_name):
tag_image = tag.image
ancestor_index = '/%s/' % current_image.id
if ancestor_index in tag_image.ancestors or tag_image.id == current_image.id:
return tag.name
full_command = '["/bin/sh", "-c", "%s"]' % cache_commands[step]
return image.storage.comment == full_command
path = tree.find_longest_path(base_image.id, checker)
if not path:
return None
# Find any tag associated with the last image in the path.
return tree.tag_containing_images(path[-1])
return None
def _determine_cached_tag_by_tag(self):
""" Determines the cached tag by looking for one of the tags being built, and seeing if it

View file

@ -7,6 +7,7 @@ class StatusHandler(object):
def __init__(self, build_logs, repository_build_uuid):
self._current_phase = None
self._current_command = None
self._uuid = repository_build_uuid
self._build_logs = build_logs
@ -26,9 +27,16 @@ class StatusHandler(object):
self._build_logs.append_log_message(self._uuid, log_message, log_type, log_data)
def append_log(self, log_message, extra_data=None):
if log_message is None:
return
self._append_log_message(log_message, log_data=extra_data)
def set_command(self, command, extra_data=None):
if self._current_command == command:
return
self._current_command = command
self._append_log_message(command, self._build_logs.COMMAND, extra_data)
def set_error(self, error_message, extra_data=None, internal_error=False):

96
test/test_imagetree.py Normal file
View file

@ -0,0 +1,96 @@
import unittest
from app import app
from util.imagetree import ImageTree
from initdb import setup_database_for_testing, finished_database_for_testing
from data import model
NAMESPACE = 'devtable'
SIMPLE_REPO = 'simple'
COMPLEX_REPO = 'complex'
class TestImageTree(unittest.TestCase):
def setUp(self):
setup_database_for_testing(self)
self.app = app.test_client()
self.ctx = app.test_request_context()
self.ctx.__enter__()
def tearDown(self):
finished_database_for_testing(self)
self.ctx.__exit__(True, None, None)
def _get_base_image(self, all_images):
for image in all_images:
if image.ancestors == '/':
return image
return None
def test_longest_path_simple_repo(self):
all_images = list(model.get_repository_images(NAMESPACE, SIMPLE_REPO))
all_tags = list(model.list_repository_tags(NAMESPACE, SIMPLE_REPO))
tree = ImageTree(all_images, all_tags)
base_image = self._get_base_image(all_images)
tag_image = all_tags[0].image
def checker(index, image):
return True
ancestors = tag_image.ancestors.split('/')[2:-1] # Skip the first image.
result = tree.find_longest_path(base_image.id, checker)
self.assertEquals(3, len(result))
for index in range(0, 2):
self.assertEquals(int(ancestors[index]), result[index].id)
self.assertEquals('latest', tree.tag_containing_image(result[-1]))
def test_longest_path_complex_repo(self):
all_images = list(model.get_repository_images(NAMESPACE, COMPLEX_REPO))
all_tags = list(model.list_repository_tags(NAMESPACE, COMPLEX_REPO))
tree = ImageTree(all_images, all_tags)
base_image = self._get_base_image(all_images)
def checker(index, image):
return True
result = tree.find_longest_path(base_image.id, checker)
self.assertEquals(4, len(result))
self.assertEquals('v2.0', tree.tag_containing_image(result[-1]))
def test_filtering(self):
all_images = list(model.get_repository_images(NAMESPACE, COMPLEX_REPO))
all_tags = list(model.list_repository_tags(NAMESPACE, COMPLEX_REPO))
tree = ImageTree(all_images, all_tags, parent_filter=1245)
base_image = self._get_base_image(all_images)
def checker(index, image):
return True
result = tree.find_longest_path(base_image.id, checker)
self.assertEquals(0, len(result))
def test_find_tag_parent_image(self):
all_images = list(model.get_repository_images(NAMESPACE, COMPLEX_REPO))
all_tags = list(model.list_repository_tags(NAMESPACE, COMPLEX_REPO))
tree = ImageTree(all_images, all_tags)
base_image = self._get_base_image(all_images)
def checker(index, image):
return True
result = tree.find_longest_path(base_image.id, checker)
self.assertEquals(4, len(result))
# Only use the first two images. They don't have tags, but the method should
# still return the tag that contains them.
self.assertEquals('v2.0', tree.tag_containing_image(result[0]))
if __name__ == '__main__':
unittest.main()

103
util/imagetree.py Normal file
View file

@ -0,0 +1,103 @@
class ImageTreeNode(object):
""" A node in the image tree. """
def __init__(self, image):
self.image = image
self.parent = None
self.children = []
self.tags = []
def add_child(self, child):
self.children.append(child)
child.parent = self
def add_tag(self, tag):
self.tags.append(tag)
class ImageTree(object):
""" In-memory tree for easy traversal and lookup of images in a repository. """
def __init__(self, all_images, all_tags, base_filter=None):
self._tag_map = {}
self._image_map = {}
self._build(all_images, all_tags, base_filter)
def _build(self, all_images, all_tags, base_filter=None):
# Build nodes for each of the images.
for image in all_images:
ancestors = image.ancestors.split('/')[1:-1]
# Filter any unneeded images.
if base_filter is not None:
if image.id != base_filter and not str(base_filter) in ancestors:
continue
self._image_map[image.id] = ImageTreeNode(image)
# Connect the nodes to their parents.
for image_node in self._image_map.values():
image = image_node.image
parent_image_id = image.ancestors.split('/')[-2] if image.ancestors else None
if not parent_image_id:
continue
parent_node = self._image_map.get(int(parent_image_id))
if parent_node is not None:
parent_node.add_child(image_node)
# Build the tag map.
for tag in all_tags:
image_node = self._image_map.get(tag.image.id)
if not image_node:
continue
self._tag_map = image_node
image_node.add_tag(tag.name)
def find_longest_path(self, image_id, checker):
""" Returns a list of images representing the longest path that matches the given
checker function, starting from the given image_id *exclusive*.
"""
start_node = self._image_map.get(image_id)
if not start_node:
return []
return self._find_longest_path(start_node, checker, -1)[1:]
def _find_longest_path(self, image_node, checker, index):
found_path = []
for child_node in image_node.children:
if not checker(index + 1, child_node.image):
continue
found = self._find_longest_path(child_node, checker, index + 1)
if found and len(found) > len(found_path):
found_path = found
return [image_node.image] + found_path
def tag_containing_image(self, image):
""" Returns the name of the closest tag containing the given image. """
if not image:
return None
# Check the current image for a tag.
image_node = self._image_map.get(image.id)
if image_node is None:
return None
if image_node.tags:
return image_node.tags[0]
# Check any deriving images for a tag.
for child_node in image_node.children:
found = self.tag_containing_image(child_node.image)
if found is not None:
return found
return None