From 893ae46dec6b7114504fd41252890ddb461bf205 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 10 Feb 2015 21:46:58 -0500 Subject: [PATCH] Add an ImageTree class and change to searching *all applicable* branches when looking for the best cache tag. --- buildman/jobutil/buildjob.py | 41 ++++++------- buildman/jobutil/buildstatus.py | 8 +++ test/test_imagetree.py | 96 +++++++++++++++++++++++++++++ util/imagetree.py | 103 ++++++++++++++++++++++++++++++++ 4 files changed, 228 insertions(+), 20 deletions(-) create mode 100644 test/test_imagetree.py create mode 100644 util/imagetree.py diff --git a/buildman/jobutil/buildjob.py b/buildman/jobutil/buildjob.py index bcf2c33c2..46dab931c 100644 --- a/buildman/jobutil/buildjob.py +++ b/buildman/jobutil/buildjob.py @@ -4,6 +4,7 @@ import logging from cachetools import lru_cache from endpoints.notificationhelper import spawn_notification from data import model +from util.imagetree import ImageTree logger = logging.getLogger(__name__) @@ -91,31 +92,31 @@ class BuildJob(object): repo_namespace = repo_build.repository.namespace_user.username repo_name = repo_build.repository.name - current_image = model.get_image(repo_build.repository, base_image_id) - next_image = None - if current_image is None: + base_image = model.get_image(repo_build.repository, base_image_id) + if base_image is None: return None - # For each cache comment, find a child image that matches the command. - for cache_command in cache_commands: - full_command = '["/bin/sh", "-c", "%s"]' % cache_command - next_image = model.find_child_image(repo_build.repository, current_image, full_command) - if next_image is None: - break + # Build an in-memory tree of the full heirarchy of images in the repository. + all_images = model.get_repository_images(repo_namespace, repo_name) + all_tags = model.list_repository_tags(repo_namespace, repo_name) + tree = ImageTree(all_images, all_tags, base_filter=base_image.id) - current_image = next_image - logger.debug('Found cached image %s for comment %s', current_image.id, full_command) + # Find a path in the tree, starting at the base image, that matches the cache comments + # or some subset thereof. + def checker(step, image): + if step >= len(cache_commands): + return False - # Find a tag associated with the image, if any. - # TODO(jschorr): We should just return the image ID instead of a parent tag, OR we should - # make this more efficient. - for tag in model.list_repository_tags(repo_namespace, repo_name): - tag_image = tag.image - ancestor_index = '/%s/' % current_image.id - if ancestor_index in tag_image.ancestors or tag_image.id == current_image.id: - return tag.name + full_command = '["/bin/sh", "-c", "%s"]' % cache_commands[step] + return image.storage.comment == full_command + + path = tree.find_longest_path(base_image.id, checker) + if not path: + return None + + # Find any tag associated with the last image in the path. + return tree.tag_containing_images(path[-1]) - return None def _determine_cached_tag_by_tag(self): """ Determines the cached tag by looking for one of the tags being built, and seeing if it diff --git a/buildman/jobutil/buildstatus.py b/buildman/jobutil/buildstatus.py index 393ecd3b4..2ae127ee0 100644 --- a/buildman/jobutil/buildstatus.py +++ b/buildman/jobutil/buildstatus.py @@ -7,6 +7,7 @@ class StatusHandler(object): def __init__(self, build_logs, repository_build_uuid): self._current_phase = None + self._current_command = None self._uuid = repository_build_uuid self._build_logs = build_logs @@ -26,9 +27,16 @@ class StatusHandler(object): self._build_logs.append_log_message(self._uuid, log_message, log_type, log_data) def append_log(self, log_message, extra_data=None): + if log_message is None: + return + self._append_log_message(log_message, log_data=extra_data) def set_command(self, command, extra_data=None): + if self._current_command == command: + return + + self._current_command = command self._append_log_message(command, self._build_logs.COMMAND, extra_data) def set_error(self, error_message, extra_data=None, internal_error=False): diff --git a/test/test_imagetree.py b/test/test_imagetree.py new file mode 100644 index 000000000..824709be9 --- /dev/null +++ b/test/test_imagetree.py @@ -0,0 +1,96 @@ +import unittest + +from app import app +from util.imagetree import ImageTree +from initdb import setup_database_for_testing, finished_database_for_testing +from data import model + +NAMESPACE = 'devtable' +SIMPLE_REPO = 'simple' +COMPLEX_REPO = 'complex' + +class TestImageTree(unittest.TestCase): + def setUp(self): + setup_database_for_testing(self) + self.app = app.test_client() + self.ctx = app.test_request_context() + self.ctx.__enter__() + + def tearDown(self): + finished_database_for_testing(self) + self.ctx.__exit__(True, None, None) + + def _get_base_image(self, all_images): + for image in all_images: + if image.ancestors == '/': + return image + + return None + + def test_longest_path_simple_repo(self): + all_images = list(model.get_repository_images(NAMESPACE, SIMPLE_REPO)) + all_tags = list(model.list_repository_tags(NAMESPACE, SIMPLE_REPO)) + tree = ImageTree(all_images, all_tags) + + base_image = self._get_base_image(all_images) + tag_image = all_tags[0].image + + def checker(index, image): + return True + + ancestors = tag_image.ancestors.split('/')[2:-1] # Skip the first image. + result = tree.find_longest_path(base_image.id, checker) + self.assertEquals(3, len(result)) + for index in range(0, 2): + self.assertEquals(int(ancestors[index]), result[index].id) + + self.assertEquals('latest', tree.tag_containing_image(result[-1])) + + def test_longest_path_complex_repo(self): + all_images = list(model.get_repository_images(NAMESPACE, COMPLEX_REPO)) + all_tags = list(model.list_repository_tags(NAMESPACE, COMPLEX_REPO)) + tree = ImageTree(all_images, all_tags) + + base_image = self._get_base_image(all_images) + + def checker(index, image): + return True + + result = tree.find_longest_path(base_image.id, checker) + self.assertEquals(4, len(result)) + self.assertEquals('v2.0', tree.tag_containing_image(result[-1])) + + def test_filtering(self): + all_images = list(model.get_repository_images(NAMESPACE, COMPLEX_REPO)) + all_tags = list(model.list_repository_tags(NAMESPACE, COMPLEX_REPO)) + tree = ImageTree(all_images, all_tags, parent_filter=1245) + + base_image = self._get_base_image(all_images) + + def checker(index, image): + return True + + result = tree.find_longest_path(base_image.id, checker) + self.assertEquals(0, len(result)) + + def test_find_tag_parent_image(self): + all_images = list(model.get_repository_images(NAMESPACE, COMPLEX_REPO)) + all_tags = list(model.list_repository_tags(NAMESPACE, COMPLEX_REPO)) + tree = ImageTree(all_images, all_tags) + + base_image = self._get_base_image(all_images) + + def checker(index, image): + return True + + result = tree.find_longest_path(base_image.id, checker) + self.assertEquals(4, len(result)) + + # Only use the first two images. They don't have tags, but the method should + # still return the tag that contains them. + self.assertEquals('v2.0', tree.tag_containing_image(result[0])) + + +if __name__ == '__main__': + unittest.main() + diff --git a/util/imagetree.py b/util/imagetree.py new file mode 100644 index 000000000..39cd5c3c9 --- /dev/null +++ b/util/imagetree.py @@ -0,0 +1,103 @@ +class ImageTreeNode(object): + """ A node in the image tree. """ + def __init__(self, image): + self.image = image + self.parent = None + self.children = [] + self.tags = [] + + def add_child(self, child): + self.children.append(child) + child.parent = self + + def add_tag(self, tag): + self.tags.append(tag) + + +class ImageTree(object): + """ In-memory tree for easy traversal and lookup of images in a repository. """ + + def __init__(self, all_images, all_tags, base_filter=None): + self._tag_map = {} + self._image_map = {} + + self._build(all_images, all_tags, base_filter) + + def _build(self, all_images, all_tags, base_filter=None): + # Build nodes for each of the images. + for image in all_images: + ancestors = image.ancestors.split('/')[1:-1] + + # Filter any unneeded images. + if base_filter is not None: + if image.id != base_filter and not str(base_filter) in ancestors: + continue + + self._image_map[image.id] = ImageTreeNode(image) + + # Connect the nodes to their parents. + for image_node in self._image_map.values(): + image = image_node.image + parent_image_id = image.ancestors.split('/')[-2] if image.ancestors else None + if not parent_image_id: + continue + + parent_node = self._image_map.get(int(parent_image_id)) + if parent_node is not None: + parent_node.add_child(image_node) + + # Build the tag map. + for tag in all_tags: + image_node = self._image_map.get(tag.image.id) + if not image_node: + continue + + self._tag_map = image_node + image_node.add_tag(tag.name) + + + def find_longest_path(self, image_id, checker): + """ Returns a list of images representing the longest path that matches the given + checker function, starting from the given image_id *exclusive*. + """ + start_node = self._image_map.get(image_id) + if not start_node: + return [] + + return self._find_longest_path(start_node, checker, -1)[1:] + + + def _find_longest_path(self, image_node, checker, index): + found_path = [] + + for child_node in image_node.children: + if not checker(index + 1, child_node.image): + continue + + found = self._find_longest_path(child_node, checker, index + 1) + if found and len(found) > len(found_path): + found_path = found + + return [image_node.image] + found_path + + + def tag_containing_image(self, image): + """ Returns the name of the closest tag containing the given image. """ + if not image: + return None + + # Check the current image for a tag. + image_node = self._image_map.get(image.id) + if image_node is None: + return None + + if image_node.tags: + return image_node.tags[0] + + # Check any deriving images for a tag. + for child_node in image_node.children: + found = self.tag_containing_image(child_node.image) + if found is not None: + return found + + return None \ No newline at end of file