import logging from uuid import uuid4 from peewee import IntegrityError, JOIN_LEFT_OUTER, fn from data.model import (image, db_transaction, DataModelException, _basequery, InvalidManifestException, TagAlreadyCreatedException, StaleTagException) from data.database import (RepositoryTag, Repository, Image, ImageStorage, Namespace, TagManifest, RepositoryNotification, Label, TagManifestLabel, get_epoch_timestamp, db_for_update) logger = logging.getLogger(__name__) def get_max_id_for_sec_scan(): """ Gets the maximum id for security scanning """ return RepositoryTag.select(fn.Max(RepositoryTag.id)).scalar() def get_min_id_for_sec_scan(version): """ Gets the minimum id for a security scanning """ return _tag_alive(RepositoryTag .select(fn.Min(RepositoryTag.id)) .join(Image) .where(Image.security_indexed_engine < version)).scalar() def get_tag_pk_field(): """ Returns the primary key for Image DB model """ return RepositoryTag.id def get_tags_images_eligible_for_scan(clair_version): Parent = Image.alias() ParentImageStorage = ImageStorage.alias() return _tag_alive(RepositoryTag .select(Image, ImageStorage, Parent, ParentImageStorage, RepositoryTag) .join(Image, on=(RepositoryTag.image == Image.id)) .join(ImageStorage, on=(Image.storage == ImageStorage.id)) .switch(Image) .join(Parent, JOIN_LEFT_OUTER, on=(Image.parent == Parent.id)) .join(ParentImageStorage, JOIN_LEFT_OUTER, on=(ParentImageStorage.id == Parent.storage)) .where(RepositoryTag.hidden == False) .where(Image.security_indexed_engine < clair_version)) def _tag_alive(query, now_ts=None): if now_ts is None: now_ts = get_epoch_timestamp() return query.where((RepositoryTag.lifetime_end_ts >> None) | (RepositoryTag.lifetime_end_ts > now_ts)) def filter_has_repository_event(query, event): """ Filters the query by ensuring the repositories returned have the given event. """ return (query .join(Repository) .join(RepositoryNotification) .where(RepositoryNotification.event == event)) def filter_tags_have_repository_event(query, event): """ Filters the query by ensuring the repository tags live in a repository that has the given event. Also returns the image storage for the tag's image and orders the results by lifetime_start_ts. """ query = filter_has_repository_event(query, event) query = query.switch(Image).join(ImageStorage) query = query.switch(RepositoryTag).order_by(RepositoryTag.lifetime_start_ts.desc()) return query _MAX_SUB_QUERIES = 100 _MAX_IMAGE_LOOKUP_COUNT = 500 def get_matching_tags_for_images(image_pairs, filter_images=None, filter_tags=None, selections=None): """ Returns all tags that contain the images with the given docker_image_id and storage_uuid, as specified as an iterable of pairs. """ if not image_pairs: return [] image_pairs_set = set(image_pairs) # Find all possible matching image+storages. images = [] while image_pairs: image_pairs_slice = image_pairs[:_MAX_IMAGE_LOOKUP_COUNT] ids = [pair[0] for pair in image_pairs_slice] uuids = [pair[1] for pair in image_pairs_slice] images_query = (Image .select(Image.id, Image.docker_image_id, Image.ancestors, ImageStorage.uuid) .join(ImageStorage) .where(Image.docker_image_id << ids, ImageStorage.uuid << uuids) .switch(Image)) if filter_images is not None: images_query = filter_images(images_query) images.extend(list(images_query)) image_pairs = image_pairs[_MAX_IMAGE_LOOKUP_COUNT:] # Filter down to those images actually in the pairs set and build the set of queries to run. individual_image_queries = [] for img in images: # Make sure the image found is in the set of those requested, and that we haven't already # processed it. We need this check because the query above checks for images with matching # IDs OR storage UUIDs, rather than the expected ID+UUID pair. We do this for efficiency # reasons, and it is highly unlikely we'll find an image with a mismatch, but we need this # check to be absolutely sure. pair = (img.docker_image_id, img.storage.uuid) if pair not in image_pairs_set: continue # Remove the pair so we don't try it again. image_pairs_set.remove(pair) ancestors_str = '%s%s/%%' % (img.ancestors, img.id) query = (Image .select(Image.id) .where((Image.id == img.id) | (Image.ancestors ** ancestors_str))) individual_image_queries.append(query) if not individual_image_queries: return [] # Shard based on the max subquery count. This is used to prevent going over the DB's max query # size, as well as to prevent the DB from locking up on a massive query. sharded_queries = [] while individual_image_queries: shard = individual_image_queries[:_MAX_SUB_QUERIES] sharded_queries.append(_basequery.reduce_as_tree(shard)) individual_image_queries = individual_image_queries[_MAX_SUB_QUERIES:] # Collect IDs of the tags found for each query. tags = {} for query in sharded_queries: tag_query = (_tag_alive(RepositoryTag .select(*(selections or [])) .distinct() .join(Image) .where(RepositoryTag.hidden == False) .where(Image.id << query) .switch(RepositoryTag))) if filter_tags is not None: tag_query = filter_tags(tag_query) for tag in tag_query: tags[tag.id] = tag return tags.values() def get_matching_tags(docker_image_id, storage_uuid, *args): """ Returns a query pointing to all tags that contain the image with the given docker_image_id and storage_uuid. """ image_row = image.get_image_with_storage(docker_image_id, storage_uuid) if image_row is None: return RepositoryTag.select().where(RepositoryTag.id < 0) # Empty query. ancestors_str = '%s%s/%%' % (image_row.ancestors, image_row.id) return _tag_alive(RepositoryTag .select(*args) .distinct() .join(Image) .join(ImageStorage) .where(RepositoryTag.hidden == False) .where((Image.id == image_row.id) | (Image.ancestors ** ancestors_str))) def get_tags_for_image(image_id, *args): return _tag_alive(RepositoryTag .select(*args) .distinct() .where(RepositoryTag.image == image_id, RepositoryTag.hidden == False)) def get_tag_manifest_digests(tags): """ Returns a map from tag ID to its associated manifest digest, if any. """ if not tags: return dict() manifests = (TagManifest .select(TagManifest.tag, TagManifest.digest) .where(TagManifest.tag << [t.id for t in tags])) return {manifest.tag_id: manifest.digest for manifest in manifests} def list_active_repo_tags(repo): """ Returns all of the active, non-hidden tags in a repository, joined to they images and (if present), their manifest. """ query = _tag_alive(RepositoryTag .select(RepositoryTag, Image, TagManifest.digest) .join(Image) .where(RepositoryTag.repository == repo, RepositoryTag.hidden == False) .switch(RepositoryTag) .join(TagManifest, JOIN_LEFT_OUTER)) return query def list_repository_tags(namespace_name, repository_name, include_hidden=False, include_storage=False): to_select = (RepositoryTag, Image) if include_storage: to_select = (RepositoryTag, Image, ImageStorage) query = _tag_alive(RepositoryTag .select(*to_select) .join(Repository) .join(Namespace, on=(Repository.namespace_user == Namespace.id)) .switch(RepositoryTag) .join(Image) .where(Repository.name == repository_name, Namespace.username == namespace_name)) if not include_hidden: query = query.where(RepositoryTag.hidden == False) if include_storage: query = query.switch(Image).join(ImageStorage) return query def create_or_update_tag(namespace_name, repository_name, tag_name, tag_docker_image_id, reversion=False): try: repo = _basequery.get_existing_repository(namespace_name, repository_name) except Repository.DoesNotExist: raise DataModelException('Invalid repository %s/%s' % (namespace_name, repository_name)) now_ts = get_epoch_timestamp() with db_transaction(): try: tag = db_for_update(_tag_alive(RepositoryTag .select() .where(RepositoryTag.repository == repo, RepositoryTag.name == tag_name), now_ts)).get() tag.lifetime_end_ts = now_ts tag.save() except RepositoryTag.DoesNotExist: pass except IntegrityError: msg = 'Tag with name %s was stale when we tried to update it; Please retry the push' raise StaleTagException(msg % tag_name) try: image_obj = Image.get(Image.docker_image_id == tag_docker_image_id, Image.repository == repo) except Image.DoesNotExist: raise DataModelException('Invalid image with id: %s' % tag_docker_image_id) try: return RepositoryTag.create(repository=repo, image=image_obj, name=tag_name, lifetime_start_ts=now_ts, reversion=reversion) except IntegrityError: msg = 'Tag with name %s and lifetime start %s under repository %s/%s already exists' raise TagAlreadyCreatedException(msg % (tag_name, now_ts, namespace_name, repository_name)) def create_temporary_hidden_tag(repo, image_obj, expiration_s): """ Create a tag with a defined timeline, that will not appear in the UI or CLI. Returns the name of the temporary tag. """ now_ts = get_epoch_timestamp() expire_ts = now_ts + expiration_s tag_name = str(uuid4()) RepositoryTag.create(repository=repo, image=image_obj, name=tag_name, lifetime_start_ts=now_ts, lifetime_end_ts=expire_ts, hidden=True) return tag_name def delete_tag(namespace_name, repository_name, tag_name): now_ts = get_epoch_timestamp() with db_transaction(): try: query = _tag_alive(RepositoryTag .select(RepositoryTag, Repository) .join(Repository) .join(Namespace, on=(Repository.namespace_user == Namespace.id)) .where(Repository.name == repository_name, Namespace.username == namespace_name, RepositoryTag.name == tag_name), now_ts) found = db_for_update(query).get() except RepositoryTag.DoesNotExist: msg = ('Invalid repository tag \'%s\' on repository \'%s/%s\'' % (tag_name, namespace_name, repository_name)) raise DataModelException(msg) found.lifetime_end_ts = now_ts found.save() return found def garbage_collect_tags(repo): """ Remove all of the tags that have gone past their garbage collection expiration window, and return a set of image ids which *may* have been orphaned. """ def add_expiration_data(base_query): expired_clause = get_epoch_timestamp() - Namespace.removed_tag_expiration_s return (base_query .switch(RepositoryTag) .join(Repository) .join(Namespace, on=(Repository.namespace_user == Namespace.id)) .where(~(RepositoryTag.lifetime_end_ts >> None), RepositoryTag.lifetime_end_ts <= expired_clause)) return _delete_tags(repo, add_expiration_data) def purge_all_tags(repo): """ Remove all tags from the repository, and return a set of all of the images ids which are now orphaned. """ return _delete_tags(repo) def _delete_tags(repo, query_modifier=None): """ Garbage collect the tags for a repository and return a set of the image ids which may now be orphaned. """ tags_to_delete_q = (RepositoryTag .select(RepositoryTag.id, Image.ancestors, Image.id) .join(Image) .where(RepositoryTag.repository == repo)) if query_modifier is not None: tags_to_delete_q = query_modifier(tags_to_delete_q) tags_to_delete = list(tags_to_delete_q) if len(tags_to_delete) == 0: return set() with db_transaction(): manifests_to_delete = list(TagManifest .select(TagManifest.id) .join(RepositoryTag) .where(RepositoryTag.id << tags_to_delete)) num_deleted_manifests = 0 if len(manifests_to_delete) > 0: # Find the set of IDs for all the labels to delete. manifest_labels_query = (TagManifestLabel .select() .where(TagManifestLabel.repository == repo, TagManifestLabel.annotated << manifests_to_delete)) label_ids = [manifest_label.label_id for manifest_label in manifest_labels_query] if label_ids: # Delete all the mapping entries. (TagManifestLabel .delete() .where(TagManifestLabel.repository == repo, TagManifestLabel.annotated << manifests_to_delete) .execute()) # Delete all the matching labels. Label.delete().where(Label.id << label_ids).execute() num_deleted_manifests = (TagManifest .delete() .where(TagManifest.id << manifests_to_delete) .execute()) num_deleted_tags = (RepositoryTag .delete() .where(RepositoryTag.id << tags_to_delete) .execute()) logger.debug('Removed %s tags with %s manifests', num_deleted_tags, num_deleted_manifests) ancestors = reduce(lambda r, l: r | l, (set(tag.image.ancestor_id_list()) for tag in tags_to_delete)) direct_referenced = {tag.image.id for tag in tags_to_delete} return ancestors | direct_referenced def _get_repo_tag_image(tag_name, include_storage, modifier): query = Image.select().join(RepositoryTag) if include_storage: query = (Image .select(Image, ImageStorage) .join(ImageStorage) .switch(Image) .join(RepositoryTag)) images = _tag_alive(modifier(query.where(RepositoryTag.name == tag_name))) if not images: raise DataModelException('Unable to find image for tag.') else: return images[0] def get_repo_tag_image(repo, tag_name, include_storage=False): def modifier(query): return query.where(RepositoryTag.repository == repo) return _get_repo_tag_image(tag_name, include_storage, modifier) def get_tag_image(namespace_name, repository_name, tag_name, include_storage=False): def modifier(query): return (query .switch(RepositoryTag) .join(Repository) .join(Namespace) .where(Namespace.username == namespace_name, Repository.name == repository_name)) return _get_repo_tag_image(tag_name, include_storage, modifier) def list_repository_tag_history(repo_obj, page=1, size=100, specific_tag=None): query = (RepositoryTag .select(RepositoryTag, Image) .join(Image) .switch(RepositoryTag) .where(RepositoryTag.repository == repo_obj) .where(RepositoryTag.hidden == False) .order_by(RepositoryTag.lifetime_start_ts.desc(), RepositoryTag.name) .limit(size + 1) .offset(size * (page - 1))) if specific_tag: query = query.where(RepositoryTag.name == specific_tag) tags = list(query) if not tags: return [], {}, False manifest_map = get_tag_manifest_digests(tags) return tags[0:size], manifest_map, len(tags) > size def restore_tag_to_manifest(repo_obj, tag_name, manifest_digest): """ Restores a tag to a specific manifest digest. """ with db_transaction(): # Verify that the manifest digest already existed under this repository under the # tag. try: manifest = (TagManifest .select(TagManifest, RepositoryTag, Image) .join(RepositoryTag) .join(Image) .where(RepositoryTag.repository == repo_obj) .where(RepositoryTag.name == tag_name) .where(TagManifest.digest == manifest_digest) .get()) except TagManifest.DoesNotExist: raise DataModelException('Cannot restore to unknown or invalid digest') # Lookup the existing image, if any. try: existing_image = get_repo_tag_image(repo_obj, tag_name) except DataModelException: existing_image = None docker_image_id = manifest.tag.image.docker_image_id store_tag_manifest(repo_obj.namespace_user.username, repo_obj.name, tag_name, docker_image_id, manifest_digest, manifest.json_data, reversion=True) return existing_image def restore_tag_to_image(repo_obj, tag_name, docker_image_id): """ Restores a tag to a specific image ID. """ with db_transaction(): # Verify that the image ID already existed under this repository under the # tag. try: (RepositoryTag .select() .join(Image) .where(RepositoryTag.repository == repo_obj) .where(RepositoryTag.name == tag_name) .where(Image.docker_image_id == docker_image_id) .get()) except RepositoryTag.DoesNotExist: raise DataModelException('Cannot restore to unknown or invalid image') # Lookup the existing image, if any. try: existing_image = get_repo_tag_image(repo_obj, tag_name) except DataModelException: existing_image = None create_or_update_tag(repo_obj.namespace_user.username, repo_obj.name, tag_name, docker_image_id, reversion=True) return existing_image def store_tag_manifest(namespace, repo_name, tag_name, docker_image_id, manifest_digest, manifest_data, reversion=False): """ Stores a tag manifest for a specific tag name in the database. Returns the TagManifest object, as well as a boolean indicating whether the TagManifest was created. """ with db_transaction(): tag = create_or_update_tag(namespace, repo_name, tag_name, docker_image_id, reversion=reversion) try: manifest = TagManifest.get(digest=manifest_digest) manifest.tag = tag manifest.save() return manifest, False except TagManifest.DoesNotExist: return TagManifest.create(tag=tag, digest=manifest_digest, json_data=manifest_data), True def get_active_tag(namespace, repo_name, tag_name): return _tag_alive(RepositoryTag .select() .join(Repository) .join(Namespace, on=(Repository.namespace_user == Namespace.id)) .where(RepositoryTag.name == tag_name, Repository.name == repo_name, Namespace.username == namespace)).get() def associate_generated_tag_manifest(namespace, repo_name, tag_name, manifest_digest, manifest_data): tag = get_active_tag(namespace, repo_name, tag_name) return TagManifest.create(tag=tag, digest=manifest_digest, json_data=manifest_data) def load_tag_manifest(namespace, repo_name, tag_name): try: return (_load_repo_manifests(namespace, repo_name) .where(RepositoryTag.name == tag_name) .get()) except TagManifest.DoesNotExist: msg = 'Manifest not found for tag {0} in repo {1}/{2}'.format(tag_name, namespace, repo_name) raise InvalidManifestException(msg) def delete_manifest_by_digest(namespace, repo_name, digest): tag_manifests = list(_load_repo_manifests(namespace, repo_name) .where(TagManifest.digest == digest)) for tag_manifest in tag_manifests: delete_tag(namespace, repo_name, tag_manifest.tag.name) return [tag_manifest.tag for tag_manifest in tag_manifests] def load_manifest_by_digest(namespace, repo_name, digest): try: return (_load_repo_manifests(namespace, repo_name) .where(TagManifest.digest == digest) .get()) except TagManifest.DoesNotExist: msg = 'Manifest not found with digest {0} in repo {1}/{2}'.format(digest, namespace, repo_name) raise InvalidManifestException(msg) def _load_repo_manifests(namespace, repo_name): return _tag_alive(TagManifest .select(TagManifest, RepositoryTag) .join(RepositoryTag) .join(Image) .join(Repository) .join(Namespace, on=(Namespace.id == Repository.namespace_user)) .where(Repository.name == repo_name, Namespace.username == namespace))