From c30214c7a89c726aeec0f98d324f7870c5ba7fec Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 13 Aug 2018 18:09:05 -0400 Subject: [PATCH] Start on a basic registry_model interface and change a single module to use it. This will allow us to completely abstract out how we deal with registry-related tables and ensure that transitioning to the new OCI-like model will be easier to do. --- buildman/jobutil/buildjob.py | 68 ++++--------------- data/model/tag.py | 27 ++++++++ data/registry_model/__init__.py | 3 + data/registry_model/datatypes.py | 20 ++++++ data/registry_model/interface.py | 21 ++++++ data/registry_model/registry_pre_oci_model.py | 27 ++++++++ .../registry_model/test/test_pre_oci_model.py | 40 +++++++++++ 7 files changed, 150 insertions(+), 56 deletions(-) create mode 100644 data/registry_model/__init__.py create mode 100644 data/registry_model/datatypes.py create mode 100644 data/registry_model/interface.py create mode 100644 data/registry_model/registry_pre_oci_model.py create mode 100644 data/registry_model/test/test_pre_oci_model.py diff --git a/buildman/jobutil/buildjob.py b/buildman/jobutil/buildjob.py index 9ea3cac56..253d85ac9 100644 --- a/buildman/jobutil/buildjob.py +++ b/buildman/jobutil/buildjob.py @@ -5,6 +5,8 @@ from app import app from cachetools import lru_cache from notifications import spawn_notification from data import model +from data.registry_model import registry_model +from data.registry_model.datatypes import RepositoryReference from data.database import UseThenDisconnect from util.imagetree import ImageTree from util.morecollections import AttrDict @@ -27,7 +29,7 @@ class BuildJob(object): self.build_notifier = BuildJobNotifier(self.build_uuid) except ValueError: raise BuildJobLoadException( - 'Could not parse build queue item config with ID %s' % self.job_details['build_uuid'] + 'Could not parse build queue item config with ID %s' % self.job_details['build_uuid'] ) @property @@ -95,70 +97,24 @@ class BuildJob(object): def determine_cached_tag(self, base_image_id=None, cache_comments=None): """ Returns the tag to pull to prime the cache or None if none. """ - cached_tag = None - if base_image_id and cache_comments: - cached_tag = self._determine_cached_tag_by_comments(base_image_id, cache_comments) - - if not cached_tag: - cached_tag = self._determine_cached_tag_by_tag() - + cached_tag = self._determine_cached_tag_by_tag() logger.debug('Determined cached tag %s for %s: %s', cached_tag, base_image_id, cache_comments) - return cached_tag - def _determine_cached_tag_by_comments(self, base_image_id, cache_commands): - """ Determines the tag to use for priming the cache for this build job, by matching commands - starting at the given base_image_id. This mimics the Docker cache checking, so it should, - in theory, provide "perfect" caching. - """ - with UseThenDisconnect(app.config): - # Lookup the base image in the repository. If it doesn't exist, nothing more to do. - repo_build = self.repo_build - repo_namespace = repo_build.repository.namespace_user.username - repo_name = repo_build.repository.name - - base_image = model.image.get_image(repo_build.repository, base_image_id) - if base_image is None: - return None - - # Build an in-memory tree of the full heirarchy of images in the repository. - all_images = model.image.get_repository_images_without_placements(repo_build.repository, - with_ancestor=base_image) - - all_tags = model.tag.list_repository_tags(repo_namespace, repo_name) - tree = ImageTree(all_images, all_tags, base_filter=base_image.id) - - # Find a path in the tree, starting at the base image, that matches the cache comments - # or some subset thereof. - def checker(step, image): - if step >= len(cache_commands): - return False - - full_command = '["/bin/sh", "-c", "%s"]' % cache_commands[step] - logger.debug('Checking step #%s: %s, %s == %s', step, image.id, image.command, full_command) - - return image.command == full_command - - path = tree.find_longest_path(base_image.id, checker) - if not path: - return None - - # Find any tag associated with the last image in the path. - return tree.tag_containing_image(path[-1]) - - def _determine_cached_tag_by_tag(self): """ Determines the cached tag by looking for one of the tags being built, and seeing if it exists in the repository. This is a fallback for when no comment information is available. """ with UseThenDisconnect(app.config): tags = self.build_config.get('docker_tags', ['latest']) - repository = self.repo_build.repository - existing_tags = model.tag.list_repository_tags(repository.namespace_user.username, - repository.name) - cached_tags = set(tags) & set([tag.name for tag in existing_tags]) - if cached_tags: - return list(cached_tags)[0] + repository = RepositoryReference.for_repo_obj(self.repo_build.repository) + matching_tag = registry_model.find_matching_tag(repository, tags) + if matching_tag is not None: + return matching_tag.name + + most_recent_tag = registry_model.get_most_recent_tag(repository) + if most_recent_tag is not None: + return most_recent_tag.name return None diff --git a/data/model/tag.py b/data/model/tag.py index ef49cf51f..9748b5b73 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -722,3 +722,30 @@ def change_tag_expiration(tag, expiration_date): .execute()) return (tag.lifetime_end_ts, result > 0) + + +def find_matching_tag(repo_id, tag_names): + """ Finds the most recently pushed alive tag in the repository with one of the given names, + if any. + """ + try: + return (_tag_alive(RepositoryTag + .select() + .where(RepositoryTag.repository == repo_id, + RepositoryTag.name << list(tag_names)) + .order_by(RepositoryTag.lifetime_start_ts.desc())) + .get()) + except RepositoryTag.DoesNotExist: + return None + + +def get_most_recent_tag(repo_id): + """ Returns the most recently pushed alive tag in the repository, or None if none. """ + try: + return (_tag_alive(RepositoryTag + .select() + .where(RepositoryTag.repository == repo_id) + .order_by(RepositoryTag.lifetime_start_ts.desc())) + .get()) + except RepositoryTag.DoesNotExist: + return None diff --git a/data/registry_model/__init__.py b/data/registry_model/__init__.py new file mode 100644 index 000000000..484bb7e41 --- /dev/null +++ b/data/registry_model/__init__.py @@ -0,0 +1,3 @@ +from data.registry_model.registry_pre_oci_model import pre_oci_model + +registry_model = pre_oci_model diff --git a/data/registry_model/datatypes.py b/data/registry_model/datatypes.py new file mode 100644 index 000000000..3f2cae187 --- /dev/null +++ b/data/registry_model/datatypes.py @@ -0,0 +1,20 @@ +from collections import namedtuple + +class RepositoryReference(object): + """ RepositoryReference is a reference to a repository, passed to registry interface methods. """ + def __init__(self, repo_id): + self.repo_id = repo_id + + @classmethod + def for_repo_obj(cls, repo_obj): + return RepositoryReference(repo_obj.id) + + +class Tag(namedtuple('Tag', ['id', 'name'])): + """ Tag represents a tag in a repository, which points to a manifest or image. """ + @classmethod + def for_repository_tag(cls, repository_tag): + if repository_tag is None: + return None + + return Tag(id=repository_tag.id, name=repository_tag.name) diff --git a/data/registry_model/interface.py b/data/registry_model/interface.py new file mode 100644 index 000000000..e67366733 --- /dev/null +++ b/data/registry_model/interface.py @@ -0,0 +1,21 @@ +from abc import ABCMeta, abstractmethod +from six import add_metaclass + +@add_metaclass(ABCMeta) +class RegistryDataInterface(object): + """ Interface for code to work with the registry data model. The registry data model consists + of all tables that store registry-specific information, such as Manifests, Blobs, Images, + and Labels. + """ + + @abstractmethod + def find_matching_tag(self, repository_ref, tag_names): + """ Finds an alive tag in the repository matching one of the given tag names and returns it + or None if none. + """ + + @abstractmethod + def get_most_recent_tag(self, repository_ref): + """ Returns the most recently pushed alive tag in the repository, if any. If none, returns + None. + """ diff --git a/data/registry_model/registry_pre_oci_model.py b/data/registry_model/registry_pre_oci_model.py new file mode 100644 index 000000000..5cd6ed631 --- /dev/null +++ b/data/registry_model/registry_pre_oci_model.py @@ -0,0 +1,27 @@ +from data import model +from data.registry_model.interface import RegistryDataInterface +from data.registry_model.datatypes import Tag + + +class PreOCIModel(RegistryDataInterface): + """ + PreOCIModel implements the data model for the registry API using a database schema + before it was changed to support the OCI specification. + """ + + def find_matching_tag(self, repository_ref, tag_names): + """ Finds an alive tag in the repository matching one of the given tag names and returns it + or None if none. + """ + found_tag = model.tag.find_matching_tag(repository_ref.repo_id, tag_names) + return Tag.for_repository_tag(found_tag) + + def get_most_recent_tag(self, repository_ref): + """ Returns the most recently pushed alive tag in the repository, if any. If none, returns + None. + """ + found_tag = model.tag.get_most_recent_tag(repository_ref.repo_id) + return Tag.for_repository_tag(found_tag) + + +pre_oci_model = PreOCIModel() diff --git a/data/registry_model/test/test_pre_oci_model.py b/data/registry_model/test/test_pre_oci_model.py new file mode 100644 index 000000000..3d7140475 --- /dev/null +++ b/data/registry_model/test/test_pre_oci_model.py @@ -0,0 +1,40 @@ +import pytest + +from data import model +from data.registry_model.registry_pre_oci_model import PreOCIModel +from data.registry_model.datatypes import RepositoryReference +from test.fixtures import * + +@pytest.fixture() +def pre_oci_model(initialized_db): + return PreOCIModel() + + +@pytest.mark.parametrize('names, expected', [ + (['unknown'], None), + (['latest'], 'latest'), + (['latest', 'prod'], 'latest'), + (['foo', 'prod'], 'prod'), +]) +def test_find_matching_tag(names, expected, pre_oci_model): + repo = model.repository.get_repository('devtable', 'simple') + repository_ref = RepositoryReference.for_repo_obj(repo) + found = pre_oci_model.find_matching_tag(repository_ref, names) + if expected is None: + assert found is None + else: + assert found.name == expected + + +@pytest.mark.parametrize('repo_namespace, repo_name, expected', [ + ('devtable', 'simple', 'latest'), + ('buynlarge', 'orgrepo', 'latest'), +]) +def test_get_most_recent_tag(repo_namespace, repo_name, expected, pre_oci_model): + repo = model.repository.get_repository(repo_namespace, repo_name) + repository_ref = RepositoryReference.for_repo_obj(repo) + found = pre_oci_model.get_most_recent_tag(repository_ref) + if expected is None: + assert found is None + else: + assert found.name == expected