This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/data/model/tag.py
Joseph Schorr 7d4fed6892 Change error message when trying to pull a deleted or expired tag
Will let the users know they can recover the tag via time machine

Note: This was tested with the Docker protocol, but the new error code is *technically* out of spec; we should make sure its okay.
2017-07-19 17:13:48 -04:00

623 lines
23 KiB
Python

import logging
import time
from calendar import timegm
from uuid import uuid4
from peewee import IntegrityError, JOIN_LEFT_OUTER, fn
from data.model import (image, db_transaction, DataModelException, _basequery,
InvalidManifestException, TagAlreadyCreatedException, StaleTagException,
config)
from data.database import (RepositoryTag, Repository, Image, ImageStorage, Namespace, TagManifest,
RepositoryNotification, Label, TagManifestLabel, get_epoch_timestamp,
db_for_update)
from util.timedeltastring import convert_to_timedelta
logger = logging.getLogger(__name__)
def get_max_id_for_sec_scan():
""" Gets the maximum id for security scanning """
return RepositoryTag.select(fn.Max(RepositoryTag.id)).scalar()
def get_min_id_for_sec_scan(version):
""" Gets the minimum id for a security scanning """
return _tag_alive(RepositoryTag
.select(fn.Min(RepositoryTag.id))
.join(Image)
.where(Image.security_indexed_engine < version)).scalar()
def get_tag_pk_field():
""" Returns the primary key for Image DB model """
return RepositoryTag.id
def get_tags_images_eligible_for_scan(clair_version):
Parent = Image.alias()
ParentImageStorage = ImageStorage.alias()
return _tag_alive(RepositoryTag
.select(Image, ImageStorage, Parent, ParentImageStorage, RepositoryTag)
.join(Image, on=(RepositoryTag.image == Image.id))
.join(ImageStorage, on=(Image.storage == ImageStorage.id))
.switch(Image)
.join(Parent, JOIN_LEFT_OUTER, on=(Image.parent == Parent.id))
.join(ParentImageStorage, JOIN_LEFT_OUTER, on=(ParentImageStorage.id == Parent.storage))
.where(RepositoryTag.hidden == False)
.where(Image.security_indexed_engine < clair_version))
def _tag_alive(query, now_ts=None):
if now_ts is None:
now_ts = get_epoch_timestamp()
return query.where((RepositoryTag.lifetime_end_ts >> None) |
(RepositoryTag.lifetime_end_ts > now_ts))
def filter_has_repository_event(query, event):
""" Filters the query by ensuring the repositories returned have the given event. """
return (query
.join(Repository)
.join(RepositoryNotification)
.where(RepositoryNotification.event == event))
def filter_tags_have_repository_event(query, event):
""" Filters the query by ensuring the repository tags live in a repository that has the given
event. Also returns the image storage for the tag's image and orders the results by
lifetime_start_ts.
"""
query = filter_has_repository_event(query, event)
query = query.switch(Image).join(ImageStorage)
query = query.switch(RepositoryTag).order_by(RepositoryTag.lifetime_start_ts.desc())
return query
_MAX_SUB_QUERIES = 100
_MAX_IMAGE_LOOKUP_COUNT = 500
def get_matching_tags_for_images(image_pairs, filter_images=None, filter_tags=None,
selections=None):
""" Returns all tags that contain the images with the given docker_image_id and storage_uuid,
as specified as an iterable of pairs. """
if not image_pairs:
return []
image_pairs_set = set(image_pairs)
# Find all possible matching image+storages.
images = []
while image_pairs:
image_pairs_slice = image_pairs[:_MAX_IMAGE_LOOKUP_COUNT]
ids = [pair[0] for pair in image_pairs_slice]
uuids = [pair[1] for pair in image_pairs_slice]
images_query = (Image
.select(Image.id, Image.docker_image_id, Image.ancestors, ImageStorage.uuid)
.join(ImageStorage)
.where(Image.docker_image_id << ids, ImageStorage.uuid << uuids)
.switch(Image))
if filter_images is not None:
images_query = filter_images(images_query)
images.extend(list(images_query))
image_pairs = image_pairs[_MAX_IMAGE_LOOKUP_COUNT:]
# Filter down to those images actually in the pairs set and build the set of queries to run.
individual_image_queries = []
for img in images:
# Make sure the image found is in the set of those requested, and that we haven't already
# processed it. We need this check because the query above checks for images with matching
# IDs OR storage UUIDs, rather than the expected ID+UUID pair. We do this for efficiency
# reasons, and it is highly unlikely we'll find an image with a mismatch, but we need this
# check to be absolutely sure.
pair = (img.docker_image_id, img.storage.uuid)
if pair not in image_pairs_set:
continue
# Remove the pair so we don't try it again.
image_pairs_set.remove(pair)
ancestors_str = '%s%s/%%' % (img.ancestors, img.id)
query = (Image
.select(Image.id)
.where((Image.id == img.id) | (Image.ancestors ** ancestors_str)))
individual_image_queries.append(query)
if not individual_image_queries:
return []
# Shard based on the max subquery count. This is used to prevent going over the DB's max query
# size, as well as to prevent the DB from locking up on a massive query.
sharded_queries = []
while individual_image_queries:
shard = individual_image_queries[:_MAX_SUB_QUERIES]
sharded_queries.append(_basequery.reduce_as_tree(shard))
individual_image_queries = individual_image_queries[_MAX_SUB_QUERIES:]
# Collect IDs of the tags found for each query.
tags = {}
for query in sharded_queries:
tag_query = (_tag_alive(RepositoryTag
.select(*(selections or []))
.distinct()
.join(Image)
.where(RepositoryTag.hidden == False)
.where(Image.id << query)
.switch(RepositoryTag)))
if filter_tags is not None:
tag_query = filter_tags(tag_query)
for tag in tag_query:
tags[tag.id] = tag
return tags.values()
def get_matching_tags(docker_image_id, storage_uuid, *args):
""" Returns a query pointing to all tags that contain the image with the
given docker_image_id and storage_uuid. """
image_row = image.get_image_with_storage(docker_image_id, storage_uuid)
if image_row is None:
return RepositoryTag.select().where(RepositoryTag.id < 0) # Empty query.
ancestors_str = '%s%s/%%' % (image_row.ancestors, image_row.id)
return _tag_alive(RepositoryTag
.select(*args)
.distinct()
.join(Image)
.join(ImageStorage)
.where(RepositoryTag.hidden == False)
.where((Image.id == image_row.id) |
(Image.ancestors ** ancestors_str)))
def get_tags_for_image(image_id, *args):
return _tag_alive(RepositoryTag
.select(*args)
.distinct()
.where(RepositoryTag.image == image_id,
RepositoryTag.hidden == False))
def get_tag_manifest_digests(tags):
""" Returns a map from tag ID to its associated manifest digest, if any. """
if not tags:
return dict()
manifests = (TagManifest
.select(TagManifest.tag, TagManifest.digest)
.where(TagManifest.tag << [t.id for t in tags]))
return {manifest.tag_id: manifest.digest for manifest in manifests}
def list_active_repo_tags(repo):
""" Returns all of the active, non-hidden tags in a repository, joined to they images
and (if present), their manifest.
"""
query = _tag_alive(RepositoryTag
.select(RepositoryTag, Image, TagManifest.digest)
.join(Image)
.where(RepositoryTag.repository == repo, RepositoryTag.hidden == False)
.switch(RepositoryTag)
.join(TagManifest, JOIN_LEFT_OUTER))
return query
def list_repository_tags(namespace_name, repository_name, include_hidden=False,
include_storage=False):
to_select = (RepositoryTag, Image)
if include_storage:
to_select = (RepositoryTag, Image, ImageStorage)
query = _tag_alive(RepositoryTag
.select(*to_select)
.join(Repository)
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.switch(RepositoryTag)
.join(Image)
.where(Repository.name == repository_name,
Namespace.username == namespace_name))
if not include_hidden:
query = query.where(RepositoryTag.hidden == False)
if include_storage:
query = query.switch(Image).join(ImageStorage)
return query
def create_or_update_tag(namespace_name, repository_name, tag_name, tag_docker_image_id,
reversion=False):
try:
repo = _basequery.get_existing_repository(namespace_name, repository_name)
except Repository.DoesNotExist:
raise DataModelException('Invalid repository %s/%s' % (namespace_name, repository_name))
now_ts = get_epoch_timestamp()
with db_transaction():
try:
tag = db_for_update(_tag_alive(RepositoryTag
.select()
.where(RepositoryTag.repository == repo,
RepositoryTag.name == tag_name), now_ts)).get()
tag.lifetime_end_ts = now_ts
tag.save()
except RepositoryTag.DoesNotExist:
pass
except IntegrityError:
msg = 'Tag with name %s was stale when we tried to update it; Please retry the push'
raise StaleTagException(msg % tag_name)
try:
image_obj = Image.get(Image.docker_image_id == tag_docker_image_id, Image.repository == repo)
except Image.DoesNotExist:
raise DataModelException('Invalid image with id: %s' % tag_docker_image_id)
try:
return RepositoryTag.create(repository=repo, image=image_obj, name=tag_name,
lifetime_start_ts=now_ts, reversion=reversion)
except IntegrityError:
msg = 'Tag with name %s and lifetime start %s under repository %s/%s already exists'
raise TagAlreadyCreatedException(msg % (tag_name, now_ts, namespace_name, repository_name))
def create_temporary_hidden_tag(repo, image_obj, expiration_s):
""" Create a tag with a defined timeline, that will not appear in the UI or CLI. Returns the name
of the temporary tag. """
now_ts = get_epoch_timestamp()
expire_ts = now_ts + expiration_s
tag_name = str(uuid4())
RepositoryTag.create(repository=repo, image=image_obj, name=tag_name, lifetime_start_ts=now_ts,
lifetime_end_ts=expire_ts, hidden=True)
return tag_name
def delete_tag(namespace_name, repository_name, tag_name):
now_ts = get_epoch_timestamp()
with db_transaction():
try:
query = _tag_alive(RepositoryTag
.select(RepositoryTag, Repository)
.join(Repository)
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.where(Repository.name == repository_name,
Namespace.username == namespace_name,
RepositoryTag.name == tag_name), now_ts)
found = db_for_update(query).get()
except RepositoryTag.DoesNotExist:
msg = ('Invalid repository tag \'%s\' on repository \'%s/%s\'' %
(tag_name, namespace_name, repository_name))
raise DataModelException(msg)
found.lifetime_end_ts = now_ts
found.save()
return found
def garbage_collect_tags(repo):
""" Remove all of the tags that have gone past their garbage collection
expiration window, and return a set of image ids which *may* have been
orphaned.
"""
def add_expiration_data(base_query):
expired_clause = get_epoch_timestamp() - Namespace.removed_tag_expiration_s
return (base_query
.switch(RepositoryTag)
.join(Repository)
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.where(~(RepositoryTag.lifetime_end_ts >> None),
RepositoryTag.lifetime_end_ts <= expired_clause))
return _delete_tags(repo, add_expiration_data)
def purge_all_tags(repo):
""" Remove all tags from the repository, and return a set of all of the images
ids which are now orphaned.
"""
return _delete_tags(repo)
def _delete_tags(repo, query_modifier=None):
""" Garbage collect the tags for a repository and return a set of the image
ids which may now be orphaned.
"""
tags_to_delete_q = (RepositoryTag
.select(RepositoryTag.id, Image.ancestors, Image.id)
.join(Image)
.where(RepositoryTag.repository == repo))
if query_modifier is not None:
tags_to_delete_q = query_modifier(tags_to_delete_q)
tags_to_delete = list(tags_to_delete_q)
if len(tags_to_delete) == 0:
return set()
with db_transaction():
manifests_to_delete = list(TagManifest
.select(TagManifest.id)
.join(RepositoryTag)
.where(RepositoryTag.id << tags_to_delete))
num_deleted_manifests = 0
if len(manifests_to_delete) > 0:
# Find the set of IDs for all the labels to delete.
manifest_labels_query = (TagManifestLabel
.select()
.where(TagManifestLabel.repository == repo,
TagManifestLabel.annotated << manifests_to_delete))
label_ids = [manifest_label.label_id for manifest_label in manifest_labels_query]
if label_ids:
# Delete all the mapping entries.
(TagManifestLabel
.delete()
.where(TagManifestLabel.repository == repo,
TagManifestLabel.annotated << manifests_to_delete)
.execute())
# Delete all the matching labels.
Label.delete().where(Label.id << label_ids).execute()
num_deleted_manifests = (TagManifest
.delete()
.where(TagManifest.id << manifests_to_delete)
.execute())
num_deleted_tags = (RepositoryTag
.delete()
.where(RepositoryTag.id << tags_to_delete)
.execute())
logger.debug('Removed %s tags with %s manifests', num_deleted_tags, num_deleted_manifests)
ancestors = reduce(lambda r, l: r | l,
(set(tag.image.ancestor_id_list()) for tag in tags_to_delete))
direct_referenced = {tag.image.id for tag in tags_to_delete}
return ancestors | direct_referenced
def _get_repo_tag_image(tag_name, include_storage, modifier):
query = Image.select().join(RepositoryTag)
if include_storage:
query = (Image
.select(Image, ImageStorage)
.join(ImageStorage)
.switch(Image)
.join(RepositoryTag))
images = _tag_alive(modifier(query.where(RepositoryTag.name == tag_name)))
if not images:
raise DataModelException('Unable to find image for tag.')
else:
return images[0]
def get_repo_tag_image(repo, tag_name, include_storage=False):
def modifier(query):
return query.where(RepositoryTag.repository == repo)
return _get_repo_tag_image(tag_name, include_storage, modifier)
def get_tag_image(namespace_name, repository_name, tag_name, include_storage=False):
def modifier(query):
return (query
.switch(RepositoryTag)
.join(Repository)
.join(Namespace)
.where(Namespace.username == namespace_name, Repository.name == repository_name))
return _get_repo_tag_image(tag_name, include_storage, modifier)
def list_repository_tag_history(repo_obj, page=1, size=100, specific_tag=None):
query = (RepositoryTag
.select(RepositoryTag, Image)
.join(Image)
.switch(RepositoryTag)
.where(RepositoryTag.repository == repo_obj)
.where(RepositoryTag.hidden == False)
.order_by(RepositoryTag.lifetime_start_ts.desc(), RepositoryTag.name)
.limit(size + 1)
.offset(size * (page - 1)))
if specific_tag:
query = query.where(RepositoryTag.name == specific_tag)
tags = list(query)
if not tags:
return [], {}, False
manifest_map = get_tag_manifest_digests(tags)
return tags[0:size], manifest_map, len(tags) > size
def restore_tag_to_manifest(repo_obj, tag_name, manifest_digest):
""" Restores a tag to a specific manifest digest. """
with db_transaction():
# Verify that the manifest digest already existed under this repository under the
# tag.
try:
manifest = (TagManifest
.select(TagManifest, RepositoryTag, Image)
.join(RepositoryTag)
.join(Image)
.where(RepositoryTag.repository == repo_obj)
.where(RepositoryTag.name == tag_name)
.where(TagManifest.digest == manifest_digest)
.get())
except TagManifest.DoesNotExist:
raise DataModelException('Cannot restore to unknown or invalid digest')
# Lookup the existing image, if any.
try:
existing_image = get_repo_tag_image(repo_obj, tag_name)
except DataModelException:
existing_image = None
docker_image_id = manifest.tag.image.docker_image_id
store_tag_manifest(repo_obj.namespace_user.username, repo_obj.name, tag_name, docker_image_id,
manifest_digest, manifest.json_data, reversion=True)
return existing_image
def restore_tag_to_image(repo_obj, tag_name, docker_image_id):
""" Restores a tag to a specific image ID. """
with db_transaction():
# Verify that the image ID already existed under this repository under the
# tag.
try:
(RepositoryTag
.select()
.join(Image)
.where(RepositoryTag.repository == repo_obj)
.where(RepositoryTag.name == tag_name)
.where(Image.docker_image_id == docker_image_id)
.get())
except RepositoryTag.DoesNotExist:
raise DataModelException('Cannot restore to unknown or invalid image')
# Lookup the existing image, if any.
try:
existing_image = get_repo_tag_image(repo_obj, tag_name)
except DataModelException:
existing_image = None
create_or_update_tag(repo_obj.namespace_user.username, repo_obj.name, tag_name,
docker_image_id, reversion=True)
return existing_image
def store_tag_manifest(namespace, repo_name, tag_name, docker_image_id, manifest_digest,
manifest_data, reversion=False):
""" Stores a tag manifest for a specific tag name in the database. Returns the TagManifest
object, as well as a boolean indicating whether the TagManifest was created.
"""
with db_transaction():
tag = create_or_update_tag(namespace, repo_name, tag_name, docker_image_id, reversion=reversion)
try:
manifest = TagManifest.get(digest=manifest_digest)
manifest.tag = tag
manifest.save()
return manifest, False
except TagManifest.DoesNotExist:
return TagManifest.create(tag=tag, digest=manifest_digest, json_data=manifest_data), True
def get_active_tag(namespace, repo_name, tag_name):
return _tag_alive(RepositoryTag
.select()
.join(Repository)
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.where(RepositoryTag.name == tag_name, Repository.name == repo_name,
Namespace.username == namespace)).get()
def get_possibly_expired_tag(namespace, repo_name, tag_name):
return (RepositoryTag
.select()
.join(Repository)
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.where(RepositoryTag.name == tag_name, Repository.name == repo_name,
Namespace.username == namespace)).get()
def associate_generated_tag_manifest(namespace, repo_name, tag_name, manifest_digest,
manifest_data):
tag = get_active_tag(namespace, repo_name, tag_name)
return TagManifest.create(tag=tag, digest=manifest_digest, json_data=manifest_data)
def load_tag_manifest(namespace, repo_name, tag_name):
try:
return (_load_repo_manifests(namespace, repo_name)
.where(RepositoryTag.name == tag_name)
.get())
except TagManifest.DoesNotExist:
msg = 'Manifest not found for tag {0} in repo {1}/{2}'.format(tag_name, namespace, repo_name)
raise InvalidManifestException(msg)
def delete_manifest_by_digest(namespace, repo_name, digest):
tag_manifests = list(_load_repo_manifests(namespace, repo_name)
.where(TagManifest.digest == digest))
for tag_manifest in tag_manifests:
delete_tag(namespace, repo_name, tag_manifest.tag.name)
return [tag_manifest.tag for tag_manifest in tag_manifests]
def load_manifest_by_digest(namespace, repo_name, digest):
try:
return (_load_repo_manifests(namespace, repo_name)
.where(TagManifest.digest == digest)
.get())
except TagManifest.DoesNotExist:
msg = 'Manifest not found with digest {0} in repo {1}/{2}'.format(digest, namespace, repo_name)
raise InvalidManifestException(msg)
def _load_repo_manifests(namespace, repo_name):
return _tag_alive(TagManifest
.select(TagManifest, RepositoryTag)
.join(RepositoryTag)
.join(Image)
.join(Repository)
.join(Namespace, on=(Namespace.id == Repository.namespace_user))
.where(Repository.name == repo_name, Namespace.username == namespace))
def change_repository_tag_expiration(namespace_name, repo_name, tag_name, expiration_date):
""" Changes the expiration of the tag with the given name to the given expiration datetime. If
the expiration datetime is None, then the tag is marked as not expiring.
"""
try:
tag = get_active_tag(namespace_name, repo_name, tag_name)
return change_tag_expiration(tag, expiration_date)
except RepositoryTag.DoesNotExist:
return (None, False)
def change_tag_expiration(tag, expiration_date):
""" Changes the expiration of the given tag to the given expiration datetime. If
the expiration datetime is None, then the tag is marked as not expiring.
"""
end_ts = None
min_expire_sec = convert_to_timedelta(config.app_config.get('LABELED_EXPIRATION_MINIMUM', '1h'))
max_expire_sec = convert_to_timedelta(config.app_config.get('LABELED_EXPIRATION_MAXIMUM', '104w'))
if expiration_date is not None:
offset = timegm(expiration_date.utctimetuple()) - tag.lifetime_start_ts
offset = min(max(offset, min_expire_sec.total_seconds()), max_expire_sec.total_seconds())
end_ts = tag.lifetime_start_ts + offset
if end_ts == tag.lifetime_end_ts:
return (None, True)
# Note: We check not just the ID of the tag but also its lifetime_end_ts, to ensure that it has
# not changed while we were updatings it expiration.
result = (RepositoryTag
.update(lifetime_end_ts=end_ts)
.where(RepositoryTag.id == tag.id,
RepositoryTag.lifetime_end_ts == tag.lifetime_end_ts)
.execute())
return (tag.lifetime_end_ts, result > 0)