initial import for Open Source 🎉

This commit is contained in:
Jimmy Zelinskie 2019-11-12 11:09:47 -05:00
parent 1898c361f3
commit 9c0dd3b722
2048 changed files with 218743 additions and 0 deletions

View file

@ -0,0 +1,9 @@
# There MUST NOT be any circular dependencies between these subsections. If there are fix it by
# moving the minimal number of things to shared
from data.model.oci import (
blob,
label,
manifest,
shared,
tag,
)

26
data/model/oci/blob.py Normal file
View file

@ -0,0 +1,26 @@
from data.database import ImageStorage, ManifestBlob
from data.model import BlobDoesNotExist
from data.model.storage import get_storage_by_uuid, InvalidImageException
from data.model.blob import get_repository_blob_by_digest as legacy_get
def get_repository_blob_by_digest(repository, blob_digest):
""" Find the content-addressable blob linked to the specified repository and
returns it or None if none.
"""
try:
storage = (ImageStorage
.select(ImageStorage.uuid)
.join(ManifestBlob)
.where(ManifestBlob.repository == repository,
ImageStorage.content_checksum == blob_digest,
ImageStorage.uploading == False)
.get())
return get_storage_by_uuid(storage.uuid)
except (ImageStorage.DoesNotExist, InvalidImageException):
# TODO: Remove once we are no longer using the legacy tables.
# Try the legacy call.
try:
return legacy_get(repository, blob_digest)
except BlobDoesNotExist:
return None

142
data/model/oci/label.py Normal file
View file

@ -0,0 +1,142 @@
import logging
from data.model import InvalidLabelKeyException, InvalidMediaTypeException, DataModelException
from data.database import (Label, Manifest, TagManifestLabel, MediaType, LabelSourceType,
db_transaction, ManifestLabel, TagManifestLabelMap,
TagManifestToManifest, Repository, TagManifest)
from data.text import prefix_search
from util.validation import validate_label_key
from util.validation import is_json
logger = logging.getLogger(__name__)
def list_manifest_labels(manifest_id, prefix_filter=None):
""" Lists all labels found on the given manifest, with an optional filter by key prefix. """
query = (Label
.select(Label, MediaType)
.join(MediaType)
.switch(Label)
.join(LabelSourceType)
.switch(Label)
.join(ManifestLabel)
.where(ManifestLabel.manifest == manifest_id))
if prefix_filter is not None:
query = query.where(prefix_search(Label.key, prefix_filter))
return query
def get_manifest_label(label_uuid, manifest):
""" Retrieves the manifest label on the manifest with the given UUID or None if none. """
try:
return (Label
.select(Label, LabelSourceType)
.join(LabelSourceType)
.where(Label.uuid == label_uuid)
.switch(Label)
.join(ManifestLabel)
.where(ManifestLabel.manifest == manifest)
.get())
except Label.DoesNotExist:
return None
def create_manifest_label(manifest_id, key, value, source_type_name, media_type_name=None,
adjust_old_model=True):
""" Creates a new manifest label on a specific tag manifest. """
if not key:
raise InvalidLabelKeyException()
# Note that we don't prevent invalid label names coming from the manifest to be stored, as Docker
# does not currently prevent them from being put into said manifests.
if not validate_label_key(key) and source_type_name != 'manifest':
raise InvalidLabelKeyException('Key `%s` is invalid' % key)
# Find the matching media type. If none specified, we infer.
if media_type_name is None:
media_type_name = 'text/plain'
if is_json(value):
media_type_name = 'application/json'
try:
media_type_id = Label.media_type.get_id(media_type_name)
except MediaType.DoesNotExist:
raise InvalidMediaTypeException()
source_type_id = Label.source_type.get_id(source_type_name)
# Ensure the manifest exists.
try:
manifest = (Manifest
.select(Manifest, Repository)
.join(Repository)
.where(Manifest.id == manifest_id)
.get())
except Manifest.DoesNotExist:
return None
repository = manifest.repository
# TODO: Remove this code once the TagManifest table is gone.
tag_manifest = None
if adjust_old_model:
try:
mapping_row = (TagManifestToManifest
.select(TagManifestToManifest, TagManifest)
.join(TagManifest)
.where(TagManifestToManifest.manifest == manifest)
.get())
tag_manifest = mapping_row.tag_manifest
except TagManifestToManifest.DoesNotExist:
tag_manifest = None
with db_transaction():
label = Label.create(key=key, value=value, source_type=source_type_id, media_type=media_type_id)
manifest_label = ManifestLabel.create(manifest=manifest_id, label=label, repository=repository)
# If there exists a mapping to a TagManifest, add the old-style label.
# TODO: Remove this code once the TagManifest table is gone.
if tag_manifest:
tag_manifest_label = TagManifestLabel.create(annotated=tag_manifest, label=label,
repository=repository)
TagManifestLabelMap.create(manifest_label=manifest_label,
tag_manifest_label=tag_manifest_label,
label=label,
manifest=manifest,
tag_manifest=tag_manifest)
return label
def delete_manifest_label(label_uuid, manifest):
""" Deletes the manifest label on the tag manifest with the given ID. Returns the label deleted
or None if none.
"""
# Find the label itself.
label = get_manifest_label(label_uuid, manifest)
if label is None:
return None
if not label.source_type.mutable:
raise DataModelException('Cannot delete immutable label')
# Delete the mapping records and label.
# TODO: Remove this code once the TagManifest table is gone.
with db_transaction():
(TagManifestLabelMap
.delete()
.where(TagManifestLabelMap.label == label)
.execute())
deleted_count = TagManifestLabel.delete().where(TagManifestLabel.label == label).execute()
if deleted_count != 1:
logger.warning('More than a single label deleted for matching label %s', label_uuid)
deleted_count = ManifestLabel.delete().where(ManifestLabel.label == label).execute()
if deleted_count != 1:
logger.warning('More than a single label deleted for matching label %s', label_uuid)
label.delete_instance(recursive=False)
return label

321
data/model/oci/manifest.py Normal file
View file

@ -0,0 +1,321 @@
import logging
from collections import namedtuple
from peewee import IntegrityError
from data.database import (Tag, Manifest, ManifestBlob, ManifestLegacyImage, ManifestChild,
db_transaction)
from data.model import BlobDoesNotExist
from data.model.blob import get_or_create_shared_blob, get_shared_blob
from data.model.oci.tag import filter_to_alive_tags, create_temporary_tag_if_necessary
from data.model.oci.label import create_manifest_label
from data.model.oci.retriever import RepositoryContentRetriever
from data.model.storage import lookup_repo_storages_by_content_checksum
from data.model.image import lookup_repository_images, get_image, synthesize_v1_image
from image.docker.schema2 import EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES
from image.docker.schema1 import ManifestException
from image.docker.schema2.list import MalformedSchema2ManifestList
from util.validation import is_json
TEMP_TAG_EXPIRATION_SEC = 300 # 5 minutes
logger = logging.getLogger(__name__)
CreatedManifest = namedtuple('CreatedManifest', ['manifest', 'newly_created', 'labels_to_apply'])
class CreateManifestException(Exception):
""" Exception raised when creating a manifest fails and explicit exception
raising is requested. """
def lookup_manifest(repository_id, manifest_digest, allow_dead=False, require_available=False,
temp_tag_expiration_sec=TEMP_TAG_EXPIRATION_SEC):
""" Returns the manifest with the specified digest under the specified repository
or None if none. If allow_dead is True, then manifests referenced by only
dead tags will also be returned. If require_available is True, the manifest
will be marked with a temporary tag to ensure it remains available.
"""
if not require_available:
return _lookup_manifest(repository_id, manifest_digest, allow_dead=allow_dead)
with db_transaction():
found = _lookup_manifest(repository_id, manifest_digest, allow_dead=allow_dead)
if found is None:
return None
create_temporary_tag_if_necessary(found, temp_tag_expiration_sec)
return found
def _lookup_manifest(repository_id, manifest_digest, allow_dead=False):
query = (Manifest
.select()
.where(Manifest.repository == repository_id)
.where(Manifest.digest == manifest_digest))
if allow_dead:
try:
return query.get()
except Manifest.DoesNotExist:
return None
# Try first to filter to those manifests referenced by an alive tag,
try:
return filter_to_alive_tags(query.join(Tag)).get()
except Manifest.DoesNotExist:
pass
# Try referenced as the child of a manifest that has an alive tag.
query = (query
.join(ManifestChild, on=(ManifestChild.child_manifest == Manifest.id))
.join(Tag, on=(Tag.manifest == ManifestChild.manifest)))
query = filter_to_alive_tags(query)
try:
return query.get()
except Manifest.DoesNotExist:
return None
def get_or_create_manifest(repository_id, manifest_interface_instance, storage,
temp_tag_expiration_sec=TEMP_TAG_EXPIRATION_SEC,
for_tagging=False, raise_on_error=False):
""" Returns a CreatedManifest for the manifest in the specified repository with the matching
digest (if it already exists) or, if not yet created, creates and returns the manifest.
Returns None if there was an error creating the manifest, unless raise_on_error is specified,
in which case a CreateManifestException exception will be raised instead to provide more
context to the error.
Note that *all* blobs referenced by the manifest must exist already in the repository or this
method will fail with a None.
"""
existing = lookup_manifest(repository_id, manifest_interface_instance.digest, allow_dead=True,
require_available=True,
temp_tag_expiration_sec=temp_tag_expiration_sec)
if existing is not None:
return CreatedManifest(manifest=existing, newly_created=False, labels_to_apply=None)
return _create_manifest(repository_id, manifest_interface_instance, storage,
temp_tag_expiration_sec, for_tagging=for_tagging,
raise_on_error=raise_on_error)
def _create_manifest(repository_id, manifest_interface_instance, storage,
temp_tag_expiration_sec=TEMP_TAG_EXPIRATION_SEC,
for_tagging=False, raise_on_error=False):
# Validate the manifest.
retriever = RepositoryContentRetriever.for_repository(repository_id, storage)
try:
manifest_interface_instance.validate(retriever)
except (ManifestException, MalformedSchema2ManifestList, BlobDoesNotExist, IOError) as ex:
logger.exception('Could not validate manifest `%s`', manifest_interface_instance.digest)
if raise_on_error:
raise CreateManifestException(ex)
return None
# Load, parse and get/create the child manifests, if any.
child_manifest_refs = manifest_interface_instance.child_manifests(retriever)
child_manifest_rows = {}
child_manifest_label_dicts = []
if child_manifest_refs is not None:
for child_manifest_ref in child_manifest_refs:
# Load and parse the child manifest.
try:
child_manifest = child_manifest_ref.manifest_obj
except (ManifestException, MalformedSchema2ManifestList, BlobDoesNotExist, IOError) as ex:
logger.exception('Could not load manifest list for manifest `%s`',
manifest_interface_instance.digest)
if raise_on_error:
raise CreateManifestException(ex)
return None
# Retrieve its labels.
labels = child_manifest.get_manifest_labels(retriever)
if labels is None:
logger.exception('Could not load manifest labels for child manifest')
return None
# Get/create the child manifest in the database.
child_manifest_info = get_or_create_manifest(repository_id, child_manifest, storage,
raise_on_error=raise_on_error)
if child_manifest_info is None:
logger.error('Could not get/create child manifest')
return None
child_manifest_rows[child_manifest_info.manifest.digest] = child_manifest_info.manifest
child_manifest_label_dicts.append(labels)
# Ensure all the blobs in the manifest exist.
digests = set(manifest_interface_instance.local_blob_digests)
blob_map = {}
# If the special empty layer is required, simply load it directly. This is much faster
# than trying to load it on a per repository basis, and that is unnecessary anyway since
# this layer is predefined.
if EMPTY_LAYER_BLOB_DIGEST in digests:
digests.remove(EMPTY_LAYER_BLOB_DIGEST)
blob_map[EMPTY_LAYER_BLOB_DIGEST] = get_shared_blob(EMPTY_LAYER_BLOB_DIGEST)
if not blob_map[EMPTY_LAYER_BLOB_DIGEST]:
logger.warning('Could not find the special empty blob in storage')
return None
if digests:
query = lookup_repo_storages_by_content_checksum(repository_id, digests)
blob_map.update({s.content_checksum: s for s in query})
for digest_str in digests:
if digest_str not in blob_map:
logger.warning('Unknown blob `%s` under manifest `%s` for repository `%s`', digest_str,
manifest_interface_instance.digest, repository_id)
if raise_on_error:
raise CreateManifestException('Unknown blob `%s`' % digest_str)
return None
# Special check: If the empty layer blob is needed for this manifest, add it to the
# blob map. This is necessary because Docker decided to elide sending of this special
# empty layer in schema version 2, but we need to have it referenced for GC and schema version 1.
if EMPTY_LAYER_BLOB_DIGEST not in blob_map:
if manifest_interface_instance.get_requires_empty_layer_blob(retriever):
shared_blob = get_or_create_shared_blob(EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES, storage)
assert not shared_blob.uploading
assert shared_blob.content_checksum == EMPTY_LAYER_BLOB_DIGEST
blob_map[EMPTY_LAYER_BLOB_DIGEST] = shared_blob
# Determine and populate the legacy image if necessary. Manifest lists will not have a legacy
# image.
legacy_image = None
if manifest_interface_instance.has_legacy_image:
legacy_image_id = _populate_legacy_image(repository_id, manifest_interface_instance, blob_map,
retriever)
if legacy_image_id is None:
return None
legacy_image = get_image(repository_id, legacy_image_id)
if legacy_image is None:
return None
# Create the manifest and its blobs.
media_type = Manifest.media_type.get_id(manifest_interface_instance.media_type)
storage_ids = {storage.id for storage in blob_map.values()}
with db_transaction():
# Check for the manifest. This is necessary because Postgres doesn't handle IntegrityErrors
# well under transactions.
try:
manifest = Manifest.get(repository=repository_id, digest=manifest_interface_instance.digest)
return CreatedManifest(manifest=manifest, newly_created=False, labels_to_apply=None)
except Manifest.DoesNotExist:
pass
# Create the manifest.
try:
manifest = Manifest.create(repository=repository_id,
digest=manifest_interface_instance.digest,
media_type=media_type,
manifest_bytes=manifest_interface_instance.bytes.as_encoded_str())
except IntegrityError:
manifest = Manifest.get(repository=repository_id, digest=manifest_interface_instance.digest)
return CreatedManifest(manifest=manifest, newly_created=False, labels_to_apply=None)
# Insert the blobs.
blobs_to_insert = [dict(manifest=manifest, repository=repository_id,
blob=storage_id) for storage_id in storage_ids]
if blobs_to_insert:
ManifestBlob.insert_many(blobs_to_insert).execute()
# Set the legacy image (if applicable).
if legacy_image is not None:
ManifestLegacyImage.create(repository=repository_id, image=legacy_image, manifest=manifest)
# Insert the manifest child rows (if applicable).
if child_manifest_rows:
children_to_insert = [dict(manifest=manifest, child_manifest=child_manifest,
repository=repository_id)
for child_manifest in child_manifest_rows.values()]
ManifestChild.insert_many(children_to_insert).execute()
# If this manifest is being created not for immediate tagging, add a temporary tag to the
# manifest to ensure it isn't being GCed. If the manifest *is* for tagging, then since we're
# creating a new one here, it cannot be GCed (since it isn't referenced by anything yet), so
# its safe to elide the temp tag operation. If we ever change GC code to collect *all* manifests
# in a repository for GC, then we will have to reevaluate this optimization at that time.
if not for_tagging:
create_temporary_tag_if_necessary(manifest, temp_tag_expiration_sec)
# Define the labels for the manifest (if any).
labels = manifest_interface_instance.get_manifest_labels(retriever)
if labels:
for key, value in labels.iteritems():
media_type = 'application/json' if is_json(value) else 'text/plain'
create_manifest_label(manifest, key, value, 'manifest', media_type)
# Return the dictionary of labels to apply (i.e. those labels that cause an action to be taken
# on the manifest or its resulting tags). We only return those labels either defined on
# the manifest or shared amongst all the child manifests. We intersect amongst all child manifests
# to ensure that any action performed is defined in all manifests.
labels_to_apply = labels or {}
if child_manifest_label_dicts:
labels_to_apply = child_manifest_label_dicts[0].viewitems()
for child_manifest_label_dict in child_manifest_label_dicts[1:]:
# Intersect the key+values of the labels to ensure we get the exact same result
# for all the child manifests.
labels_to_apply = labels_to_apply & child_manifest_label_dict.viewitems()
labels_to_apply = dict(labels_to_apply)
return CreatedManifest(manifest=manifest, newly_created=True, labels_to_apply=labels_to_apply)
def _populate_legacy_image(repository_id, manifest_interface_instance, blob_map, retriever):
# Lookup all the images and their parent images (if any) inside the manifest.
# This will let us know which v1 images we need to synthesize and which ones are invalid.
docker_image_ids = list(manifest_interface_instance.get_legacy_image_ids(retriever))
images_query = lookup_repository_images(repository_id, docker_image_ids)
image_storage_map = {i.docker_image_id: i.storage for i in images_query}
# Rewrite any v1 image IDs that do not match the checksum in the database.
try:
rewritten_images = manifest_interface_instance.generate_legacy_layers(image_storage_map,
retriever)
rewritten_images = list(rewritten_images)
parent_image_map = {}
for rewritten_image in rewritten_images:
if not rewritten_image.image_id in image_storage_map:
parent_image = None
if rewritten_image.parent_image_id:
parent_image = parent_image_map.get(rewritten_image.parent_image_id)
if parent_image is None:
parent_image = get_image(repository_id, rewritten_image.parent_image_id)
if parent_image is None:
return None
storage_reference = blob_map[rewritten_image.content_checksum]
synthesized = synthesize_v1_image(
repository_id,
storage_reference.id,
storage_reference.image_size,
rewritten_image.image_id,
rewritten_image.created,
rewritten_image.comment,
rewritten_image.command,
rewritten_image.compat_json,
parent_image,
)
parent_image_map[rewritten_image.image_id] = synthesized
except ManifestException:
logger.exception("exception when rewriting v1 metadata")
return None
return rewritten_images[-1].image_id

View file

@ -0,0 +1,37 @@
from image.docker.interfaces import ContentRetriever
from data.database import Manifest
from data.model.oci.blob import get_repository_blob_by_digest
from data.model.storage import get_layer_path
class RepositoryContentRetriever(ContentRetriever):
""" Implementation of the ContentRetriever interface for manifests that retrieves
config blobs and child manifests for the specified repository.
"""
def __init__(self, repository_id, storage):
self.repository_id = repository_id
self.storage = storage
@classmethod
def for_repository(cls, repository_id, storage):
return RepositoryContentRetriever(repository_id, storage)
def get_manifest_bytes_with_digest(self, digest):
""" Returns the bytes of the manifest with the given digest or None if none found. """
query = (Manifest
.select()
.where(Manifest.repository == self.repository_id)
.where(Manifest.digest == digest))
try:
return query.get().manifest_bytes
except Manifest.DoesNotExist:
return None
def get_blob_bytes_with_digest(self, digest):
""" Returns the bytes of the blob with the given digest or None if none found. """
blob = get_repository_blob_by_digest(self.repository_id, digest)
if blob is None:
return None
assert blob.locations is not None
return self.storage.get_content(blob.locations, get_layer_path(blob))

24
data/model/oci/shared.py Normal file
View file

@ -0,0 +1,24 @@
from data.database import Manifest, ManifestLegacyImage, Image
def get_legacy_image_for_manifest(manifest_id):
""" Returns the legacy image associated with the given manifest, if any, or None if none. """
try:
query = (ManifestLegacyImage
.select(ManifestLegacyImage, Image)
.join(Image)
.where(ManifestLegacyImage.manifest == manifest_id))
return query.get().image
except ManifestLegacyImage.DoesNotExist:
return None
def get_manifest_for_legacy_image(image_id):
""" Returns a manifest that is associated with the given image, if any, or None if none. """
try:
query = (ManifestLegacyImage
.select(ManifestLegacyImage, Manifest)
.join(Manifest)
.where(ManifestLegacyImage.image == image_id))
return query.get().manifest
except ManifestLegacyImage.DoesNotExist:
return None

505
data/model/oci/tag.py Normal file
View file

@ -0,0 +1,505 @@
import uuid
import logging
from calendar import timegm
from peewee import fn
from data.database import (Tag, Manifest, ManifestLegacyImage, Image, ImageStorage,
MediaType, RepositoryTag, TagManifest, TagManifestToManifest,
get_epoch_timestamp_ms, db_transaction, Repository,
TagToRepositoryTag, Namespace, RepositoryNotification,
ExternalNotificationEvent)
from data.model.oci.shared import get_legacy_image_for_manifest
from data.model import config
from image.docker.schema1 import (DOCKER_SCHEMA1_CONTENT_TYPES, DockerSchema1Manifest,
MalformedSchema1Manifest)
from util.bytes import Bytes
from util.timedeltastring import convert_to_timedelta
logger = logging.getLogger(__name__)
def get_tag_by_id(tag_id):
""" Returns the tag with the given ID, joined with its manifest or None if none. """
try:
return Tag.select(Tag, Manifest).join(Manifest).where(Tag.id == tag_id).get()
except Tag.DoesNotExist:
return None
def get_tag(repository_id, tag_name):
""" Returns the alive, non-hidden tag with the given name under the specified repository or
None if none. The tag is returned joined with its manifest.
"""
query = (Tag
.select(Tag, Manifest)
.join(Manifest)
.where(Tag.repository == repository_id)
.where(Tag.name == tag_name))
query = filter_to_alive_tags(query)
try:
found = query.get()
assert not found.hidden
return found
except Tag.DoesNotExist:
return None
def lookup_alive_tags_shallow(repository_id, start_pagination_id=None, limit=None):
""" Returns a list of the tags alive in the specified repository. Note that the tags returned
*only* contain their ID and name. Also note that the Tags are returned ordered by ID.
"""
query = (Tag
.select(Tag.id, Tag.name)
.where(Tag.repository == repository_id)
.order_by(Tag.id))
if start_pagination_id is not None:
query = query.where(Tag.id >= start_pagination_id)
if limit is not None:
query = query.limit(limit)
return filter_to_alive_tags(query)
def list_alive_tags(repository_id):
""" Returns a list of all the tags alive in the specified repository.
Tag's returned are joined with their manifest.
"""
query = (Tag
.select(Tag, Manifest)
.join(Manifest)
.where(Tag.repository == repository_id))
return filter_to_alive_tags(query)
def list_repository_tag_history(repository_id, page, page_size, specific_tag_name=None,
active_tags_only=False, since_time_ms=None):
""" Returns a tuple of the full set of tags found in the specified repository, including those
that are no longer alive (unless active_tags_only is True), and whether additional tags exist.
If specific_tag_name is given, the tags are further filtered by name. If since is given, tags
are further filtered to newer than that date.
Note that the returned Manifest will not contain the manifest contents.
"""
query = (Tag
.select(Tag, Manifest.id, Manifest.digest, Manifest.media_type)
.join(Manifest)
.where(Tag.repository == repository_id)
.order_by(Tag.lifetime_start_ms.desc(), Tag.name)
.limit(page_size + 1)
.offset(page_size * (page - 1)))
if specific_tag_name is not None:
query = query.where(Tag.name == specific_tag_name)
if since_time_ms is not None:
query = query.where((Tag.lifetime_start_ms > since_time_ms) | (Tag.lifetime_end_ms > since_time_ms))
if active_tags_only:
query = filter_to_alive_tags(query)
query = filter_to_visible_tags(query)
results = list(query)
return results[0:page_size], len(results) > page_size
def get_legacy_images_for_tags(tags):
""" Returns a map from tag ID to the legacy image for the tag. """
if not tags:
return {}
query = (ManifestLegacyImage
.select(ManifestLegacyImage, Image, ImageStorage)
.join(Image)
.join(ImageStorage)
.where(ManifestLegacyImage.manifest << [tag.manifest_id for tag in tags]))
by_manifest = {mli.manifest_id: mli.image for mli in query}
return {tag.id: by_manifest[tag.manifest_id] for tag in tags if tag.manifest_id in by_manifest}
def find_matching_tag(repository_id, tag_names, tag_kinds=None):
""" Finds an alive tag in the specified repository with one of the specified tag names and
returns it or None if none. Tag's returned are joined with their manifest.
"""
assert repository_id
assert tag_names
query = (Tag
.select(Tag, Manifest)
.join(Manifest)
.where(Tag.repository == repository_id)
.where(Tag.name << tag_names))
if tag_kinds:
query = query.where(Tag.tag_kind << tag_kinds)
try:
found = filter_to_alive_tags(query).get()
assert not found.hidden
return found
except Tag.DoesNotExist:
return None
def get_most_recent_tag_lifetime_start(repository_ids):
""" Returns a map from repo ID to the timestamp of the most recently pushed alive tag
for each specified repository or None if none.
"""
assert len(repository_ids) > 0 and None not in repository_ids
query = (Tag.select(Tag.repository, fn.Max(Tag.lifetime_start_ms))
.where(Tag.repository << [repo_id for repo_id in repository_ids])
.group_by(Tag.repository))
tuples = filter_to_alive_tags(query).tuples()
return {repo_id: timestamp for repo_id, timestamp in tuples}
def get_most_recent_tag(repository_id):
""" Returns the most recently pushed alive tag in the specified repository or None if none.
The Tag returned is joined with its manifest.
"""
assert repository_id
query = (Tag
.select(Tag, Manifest)
.join(Manifest)
.where(Tag.repository == repository_id)
.order_by(Tag.lifetime_start_ms.desc()))
try:
found = filter_to_alive_tags(query).get()
assert not found.hidden
return found
except Tag.DoesNotExist:
return None
def get_expired_tag(repository_id, tag_name):
""" Returns a tag with the given name that is expired in the repository or None if none.
"""
try:
return (Tag
.select()
.where(Tag.name == tag_name, Tag.repository == repository_id)
.where(~(Tag.lifetime_end_ms >> None))
.where(Tag.lifetime_end_ms <= get_epoch_timestamp_ms())
.get())
except Tag.DoesNotExist:
return None
def create_temporary_tag_if_necessary(manifest, expiration_sec):
""" Creates a temporary tag pointing to the given manifest, with the given expiration in seconds,
unless there is an existing tag that will keep the manifest around.
"""
tag_name = '$temp-%s' % str(uuid.uuid4())
now_ms = get_epoch_timestamp_ms()
end_ms = now_ms + (expiration_sec * 1000)
# Check if there is an existing tag on the manifest that won't expire within the
# timeframe. If so, no need for a temporary tag.
with db_transaction():
try:
(Tag
.select()
.where(Tag.manifest == manifest,
(Tag.lifetime_end_ms >> None) | (Tag.lifetime_end_ms >= end_ms))
.get())
return None
except Tag.DoesNotExist:
pass
return Tag.create(name=tag_name,
repository=manifest.repository_id,
lifetime_start_ms=now_ms,
lifetime_end_ms=end_ms,
reversion=False,
hidden=True,
manifest=manifest,
tag_kind=Tag.tag_kind.get_id('tag'))
def retarget_tag(tag_name, manifest_id, is_reversion=False, now_ms=None, adjust_old_model=True):
""" Creates or updates a tag with the specified name to point to the given manifest under
its repository. If this action is a reversion to a previous manifest, is_reversion
should be set to True. Returns the newly created tag row or None on error.
"""
try:
manifest = (Manifest
.select(Manifest, MediaType)
.join(MediaType)
.where(Manifest.id == manifest_id)
.get())
except Manifest.DoesNotExist:
return None
# CHECK: Make sure that we are not mistargeting a schema 1 manifest to a tag with a different
# name.
if manifest.media_type.name in DOCKER_SCHEMA1_CONTENT_TYPES:
try:
parsed = DockerSchema1Manifest(Bytes.for_string_or_unicode(manifest.manifest_bytes),
validate=False)
if parsed.tag != tag_name:
logger.error('Tried to re-target schema1 manifest with tag `%s` to tag `%s', parsed.tag,
tag_name)
return None
except MalformedSchema1Manifest:
logger.exception('Could not parse schema1 manifest')
return None
legacy_image = get_legacy_image_for_manifest(manifest)
now_ms = now_ms or get_epoch_timestamp_ms()
now_ts = int(now_ms / 1000)
with db_transaction():
# Lookup an existing tag in the repository with the same name and, if present, mark it
# as expired.
existing_tag = get_tag(manifest.repository_id, tag_name)
if existing_tag is not None:
_, okay = set_tag_end_ms(existing_tag, now_ms)
# TODO: should we retry here and/or use a for-update?
if not okay:
return None
# Create a new tag pointing to the manifest with a lifetime start of now.
created = Tag.create(name=tag_name, repository=manifest.repository_id, lifetime_start_ms=now_ms,
reversion=is_reversion, manifest=manifest,
tag_kind=Tag.tag_kind.get_id('tag'))
# TODO: Remove the linkage code once RepositoryTag is gone.
# If this is a schema 1 manifest, then add a TagManifest linkage to it. Otherwise, it will only
# be pullable via the new OCI model.
if adjust_old_model:
if manifest.media_type.name in DOCKER_SCHEMA1_CONTENT_TYPES and legacy_image is not None:
old_style_tag = RepositoryTag.create(repository=manifest.repository_id, image=legacy_image,
name=tag_name, lifetime_start_ts=now_ts,
reversion=is_reversion)
TagToRepositoryTag.create(tag=created, repository_tag=old_style_tag,
repository=manifest.repository_id)
tag_manifest = TagManifest.create(tag=old_style_tag, digest=manifest.digest,
json_data=manifest.manifest_bytes)
TagManifestToManifest.create(tag_manifest=tag_manifest, manifest=manifest,
repository=manifest.repository_id)
return created
def delete_tag(repository_id, tag_name):
""" Deletes the alive tag with the given name in the specified repository and returns the deleted
tag. If the tag did not exist, returns None.
"""
tag = get_tag(repository_id, tag_name)
if tag is None:
return None
return _delete_tag(tag, get_epoch_timestamp_ms())
def _delete_tag(tag, now_ms):
""" Deletes the given tag by marking it as expired. """
now_ts = int(now_ms / 1000)
with db_transaction():
updated = (Tag
.update(lifetime_end_ms=now_ms)
.where(Tag.id == tag.id, Tag.lifetime_end_ms == tag.lifetime_end_ms)
.execute())
if updated != 1:
return None
# TODO: Remove the linkage code once RepositoryTag is gone.
try:
old_style_tag = (TagToRepositoryTag
.select(TagToRepositoryTag, RepositoryTag)
.join(RepositoryTag)
.where(TagToRepositoryTag.tag == tag)
.get()).repository_tag
old_style_tag.lifetime_end_ts = now_ts
old_style_tag.save()
except TagToRepositoryTag.DoesNotExist:
pass
return tag
def delete_tags_for_manifest(manifest):
""" Deletes all tags pointing to the given manifest. Returns the list of tags
deleted.
"""
query = Tag.select().where(Tag.manifest == manifest)
query = filter_to_alive_tags(query)
query = filter_to_visible_tags(query)
tags = list(query)
now_ms = get_epoch_timestamp_ms()
with db_transaction():
for tag in tags:
_delete_tag(tag, now_ms)
return tags
def filter_to_visible_tags(query):
""" Adjusts the specified Tag query to only return those tags that are visible.
"""
return query.where(Tag.hidden == False)
def filter_to_alive_tags(query, now_ms=None, model=Tag):
""" Adjusts the specified Tag query to only return those tags alive. If now_ms is specified,
the given timestamp (in MS) is used in place of the current timestamp for determining wherther
a tag is alive.
"""
if now_ms is None:
now_ms = get_epoch_timestamp_ms()
return (query.where((model.lifetime_end_ms >> None) | (model.lifetime_end_ms > now_ms))
.where(model.hidden == False))
def set_tag_expiration_sec_for_manifest(manifest_id, expiration_seconds):
""" Sets the tag expiration for any tags that point to the given manifest ID. """
query = Tag.select().where(Tag.manifest == manifest_id)
query = filter_to_alive_tags(query)
tags = list(query)
for tag in tags:
assert not tag.hidden
set_tag_end_ms(tag, tag.lifetime_start_ms + (expiration_seconds * 1000))
return tags
def set_tag_expiration_for_manifest(manifest_id, expiration_datetime):
""" Sets the tag expiration for any tags that point to the given manifest ID. """
query = Tag.select().where(Tag.manifest == manifest_id)
query = filter_to_alive_tags(query)
tags = list(query)
for tag in tags:
assert not tag.hidden
change_tag_expiration(tag, expiration_datetime)
return tags
def change_tag_expiration(tag_id, expiration_datetime):
""" Changes the expiration of the specified tag to the given expiration datetime. If
the expiration datetime is None, then the tag is marked as not expiring. Returns
a tuple of the previous expiration timestamp in seconds (if any), and whether the
operation succeeded.
"""
try:
tag = Tag.get(id=tag_id)
except Tag.DoesNotExist:
return (None, False)
new_end_ms = None
min_expire_sec = convert_to_timedelta(config.app_config.get('LABELED_EXPIRATION_MINIMUM', '1h'))
max_expire_sec = convert_to_timedelta(config.app_config.get('LABELED_EXPIRATION_MAXIMUM', '104w'))
if expiration_datetime is not None:
lifetime_start_ts = int(tag.lifetime_start_ms / 1000)
offset = timegm(expiration_datetime.utctimetuple()) - lifetime_start_ts
offset = min(max(offset, min_expire_sec.total_seconds()), max_expire_sec.total_seconds())
new_end_ms = tag.lifetime_start_ms + (offset * 1000)
if new_end_ms == tag.lifetime_end_ms:
return (None, True)
return set_tag_end_ms(tag, new_end_ms)
def lookup_unrecoverable_tags(repo):
""" Returns the tags in a repository that are expired and past their time machine recovery
period. """
expired_clause = get_epoch_timestamp_ms() - (Namespace.removed_tag_expiration_s * 1000)
return (Tag
.select()
.join(Repository)
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.where(Tag.repository == repo)
.where(~(Tag.lifetime_end_ms >> None), Tag.lifetime_end_ms <= expired_clause))
def set_tag_end_ms(tag, end_ms):
""" Sets the end timestamp for a tag. Should only be called by change_tag_expiration
or tests.
"""
with db_transaction():
updated = (Tag
.update(lifetime_end_ms=end_ms)
.where(Tag.id == tag)
.where(Tag.lifetime_end_ms == tag.lifetime_end_ms)
.execute())
if updated != 1:
return (None, False)
# TODO: Remove the linkage code once RepositoryTag is gone.
try:
old_style_tag = (TagToRepositoryTag
.select(TagToRepositoryTag, RepositoryTag)
.join(RepositoryTag)
.where(TagToRepositoryTag.tag == tag)
.get()).repository_tag
old_style_tag.lifetime_end_ts = end_ms / 1000 if end_ms is not None else None
old_style_tag.save()
except TagToRepositoryTag.DoesNotExist:
pass
return (tag.lifetime_end_ms, True)
def tags_containing_legacy_image(image):
""" Yields all alive Tags containing the given image as a legacy image, somewhere in its
legacy image hierarchy.
"""
ancestors_str = '%s%s/%%' % (image.ancestors, image.id)
tags = (Tag
.select()
.join(Repository)
.switch(Tag)
.join(Manifest)
.join(ManifestLegacyImage)
.join(Image)
.where(Tag.repository == image.repository_id)
.where(Image.repository == image.repository_id)
.where((Image.id == image.id) |
(Image.ancestors ** ancestors_str)))
return filter_to_alive_tags(tags)
def lookup_notifiable_tags_for_legacy_image(docker_image_id, storage_uuid, event_name):
""" Yields any alive Tags found in repositories with an event with the given name registered
and whose legacy Image has the given docker image ID and storage UUID.
"""
event = ExternalNotificationEvent.get(name=event_name)
images = (Image
.select()
.join(ImageStorage)
.where(Image.docker_image_id == docker_image_id,
ImageStorage.uuid == storage_uuid))
for image in list(images):
# Ensure the image is under a repository that supports the event.
try:
RepositoryNotification.get(repository=image.repository_id, event=event)
except RepositoryNotification.DoesNotExist:
continue
# If found in a repository with the valid event, yield the tag(s) that contains the image.
for tag in tags_containing_legacy_image(image):
yield tag

View file

View file

@ -0,0 +1,87 @@
import pytest
from playhouse.test_utils import assert_query_count
from data.database import Manifest, ManifestLabel
from data.model.oci.label import (create_manifest_label, list_manifest_labels, get_manifest_label,
delete_manifest_label, DataModelException)
from test.fixtures import *
@pytest.mark.parametrize('key, value, source_type, expected_error', [
('foo', 'bar', 'manifest', None),
pytest.param('..foo', 'bar', 'manifest', None, id='invalid key on manifest'),
pytest.param('..foo', 'bar', 'api', 'is invalid', id='invalid key on api'),
])
def test_create_manifest_label(key, value, source_type, expected_error, initialized_db):
manifest = Manifest.get()
if expected_error:
with pytest.raises(DataModelException) as ex:
create_manifest_label(manifest, key, value, source_type)
assert ex.match(expected_error)
return
label = create_manifest_label(manifest, key, value, source_type)
labels = [ml.label_id for ml in ManifestLabel.select().where(ManifestLabel.manifest == manifest)]
assert label.id in labels
with assert_query_count(1):
assert label in list_manifest_labels(manifest)
assert label not in list_manifest_labels(manifest, 'someprefix')
assert label in list_manifest_labels(manifest, key[0:2])
with assert_query_count(1):
assert get_manifest_label(label.uuid, manifest) == label
def test_list_manifest_labels(initialized_db):
manifest = Manifest.get()
label1 = create_manifest_label(manifest, 'foo', '1', 'manifest')
label2 = create_manifest_label(manifest, 'bar', '2', 'api')
label3 = create_manifest_label(manifest, 'baz', '3', 'internal')
assert label1 in list_manifest_labels(manifest)
assert label2 in list_manifest_labels(manifest)
assert label3 in list_manifest_labels(manifest)
other_manifest = Manifest.select().where(Manifest.id != manifest.id).get()
assert label1 not in list_manifest_labels(other_manifest)
assert label2 not in list_manifest_labels(other_manifest)
assert label3 not in list_manifest_labels(other_manifest)
def test_get_manifest_label(initialized_db):
found = False
for manifest_label in ManifestLabel.select():
assert (get_manifest_label(manifest_label.label.uuid, manifest_label.manifest) ==
manifest_label.label)
assert manifest_label.label in list_manifest_labels(manifest_label.manifest)
found = True
assert found
def test_delete_manifest_label(initialized_db):
found = False
for manifest_label in list(ManifestLabel.select()):
assert (get_manifest_label(manifest_label.label.uuid, manifest_label.manifest) ==
manifest_label.label)
assert manifest_label.label in list_manifest_labels(manifest_label.manifest)
if manifest_label.label.source_type.mutable:
assert delete_manifest_label(manifest_label.label.uuid, manifest_label.manifest)
assert manifest_label.label not in list_manifest_labels(manifest_label.manifest)
assert get_manifest_label(manifest_label.label.uuid, manifest_label.manifest) is None
else:
with pytest.raises(DataModelException):
delete_manifest_label(manifest_label.label.uuid, manifest_label.manifest)
found = True
assert found

View file

@ -0,0 +1,560 @@
import json
from playhouse.test_utils import assert_query_count
from app import docker_v2_signing_key, storage
from digest.digest_tools import sha256_digest
from data.database import (Tag, ManifestBlob, ImageStorageLocation, ManifestChild,
ImageStorage, Image, RepositoryTag, get_epoch_timestamp_ms)
from data.model.oci.manifest import lookup_manifest, get_or_create_manifest
from data.model.oci.tag import filter_to_alive_tags, get_tag
from data.model.oci.shared import get_legacy_image_for_manifest
from data.model.oci.label import list_manifest_labels
from data.model.oci.retriever import RepositoryContentRetriever
from data.model.repository import get_repository, create_repository
from data.model.image import find_create_or_link_image
from data.model.blob import store_blob_record_and_temp_link
from data.model.storage import get_layer_path
from image.docker.schema1 import DockerSchema1ManifestBuilder, DockerSchema1Manifest
from image.docker.schema2.manifest import DockerSchema2ManifestBuilder
from image.docker.schema2.list import DockerSchema2ManifestListBuilder
from util.bytes import Bytes
from test.fixtures import *
def test_lookup_manifest(initialized_db):
found = False
for tag in filter_to_alive_tags(Tag.select()):
found = True
repo = tag.repository
digest = tag.manifest.digest
with assert_query_count(1):
assert lookup_manifest(repo, digest) == tag.manifest
assert found
for tag in Tag.select():
repo = tag.repository
digest = tag.manifest.digest
with assert_query_count(1):
assert lookup_manifest(repo, digest, allow_dead=True) == tag.manifest
def test_lookup_manifest_dead_tag(initialized_db):
dead_tag = Tag.select().where(Tag.lifetime_end_ms <= get_epoch_timestamp_ms()).get()
assert dead_tag.lifetime_end_ms <= get_epoch_timestamp_ms()
assert lookup_manifest(dead_tag.repository, dead_tag.manifest.digest) is None
assert (lookup_manifest(dead_tag.repository, dead_tag.manifest.digest, allow_dead=True) ==
dead_tag.manifest)
def create_manifest_for_testing(repository, differentiation_field='1'):
# Populate a manifest.
layer_json = json.dumps({
'config': {},
"rootfs": {
"type": "layers",
"diff_ids": []
},
"history": [],
})
# Add a blob containing the config.
_, config_digest = _populate_blob(layer_json)
remote_digest = sha256_digest('something')
builder = DockerSchema2ManifestBuilder()
builder.set_config_digest(config_digest, len(layer_json))
builder.add_layer(remote_digest, 1234, urls=['http://hello/world' + differentiation_field])
manifest = builder.build()
created = get_or_create_manifest(repository, manifest, storage)
assert created
return created.manifest, manifest
def test_lookup_manifest_child_tag(initialized_db):
repository = create_repository('devtable', 'newrepo', None)
manifest, manifest_impl = create_manifest_for_testing(repository)
# Mark the hidden tag as dead.
hidden_tag = Tag.get(manifest=manifest, hidden=True)
hidden_tag.lifetime_end_ms = hidden_tag.lifetime_start_ms
hidden_tag.save()
# Ensure the manifest cannot currently be looked up, as it is not pointed to by an alive tag.
assert lookup_manifest(repository, manifest.digest) is None
assert lookup_manifest(repository, manifest.digest, allow_dead=True) is not None
# Populate a manifest list.
list_builder = DockerSchema2ManifestListBuilder()
list_builder.add_manifest(manifest_impl, 'amd64', 'linux')
manifest_list = list_builder.build()
# Write the manifest list, which should also write the manifests themselves.
created_tuple = get_or_create_manifest(repository, manifest_list, storage)
assert created_tuple is not None
# Since the manifests are not yet referenced by a tag, they cannot be found.
assert lookup_manifest(repository, manifest.digest) is None
assert lookup_manifest(repository, manifest_list.digest) is None
# Unless we ask for "dead" manifests.
assert lookup_manifest(repository, manifest.digest, allow_dead=True) is not None
assert lookup_manifest(repository, manifest_list.digest, allow_dead=True) is not None
def _populate_blob(content):
digest = str(sha256_digest(content))
location = ImageStorageLocation.get(name='local_us')
blob = store_blob_record_and_temp_link('devtable', 'newrepo', digest, location,
len(content), 120)
storage.put_content(['local_us'], get_layer_path(blob), content)
return blob, digest
@pytest.mark.parametrize('schema_version', [
1,
2,
])
def test_get_or_create_manifest(schema_version, initialized_db):
repository = create_repository('devtable', 'newrepo', None)
expected_labels = {
'Foo': 'Bar',
'Baz': 'Meh',
}
layer_json = json.dumps({
'id': 'somelegacyid',
'config': {
'Labels': expected_labels,
},
"rootfs": {
"type": "layers",
"diff_ids": []
},
"history": [
{
"created": "2018-04-03T18:37:09.284840891Z",
"created_by": "do something",
},
],
})
# Create a legacy image.
find_create_or_link_image('somelegacyid', repository, 'devtable', {}, 'local_us')
# Add a blob containing the config.
_, config_digest = _populate_blob(layer_json)
# Add a blob of random data.
random_data = 'hello world'
_, random_digest = _populate_blob(random_data)
# Build the manifest.
if schema_version == 1:
builder = DockerSchema1ManifestBuilder('devtable', 'simple', 'anothertag')
builder.add_layer(random_digest, layer_json)
sample_manifest_instance = builder.build(docker_v2_signing_key)
elif schema_version == 2:
builder = DockerSchema2ManifestBuilder()
builder.set_config_digest(config_digest, len(layer_json))
builder.add_layer(random_digest, len(random_data))
sample_manifest_instance = builder.build()
# Create a new manifest.
created_manifest = get_or_create_manifest(repository, sample_manifest_instance, storage)
created = created_manifest.manifest
newly_created = created_manifest.newly_created
assert newly_created
assert created is not None
assert created.media_type.name == sample_manifest_instance.media_type
assert created.digest == sample_manifest_instance.digest
assert created.manifest_bytes == sample_manifest_instance.bytes.as_encoded_str()
assert created_manifest.labels_to_apply == expected_labels
# Verify it has a temporary tag pointing to it.
assert Tag.get(manifest=created, hidden=True).lifetime_end_ms
# Verify the legacy image.
legacy_image = get_legacy_image_for_manifest(created)
assert legacy_image is not None
assert legacy_image.storage.content_checksum == random_digest
# Verify the linked blobs.
blob_digests = [mb.blob.content_checksum for mb
in ManifestBlob.select().where(ManifestBlob.manifest == created)]
assert random_digest in blob_digests
if schema_version == 2:
assert config_digest in blob_digests
# Retrieve it again and ensure it is the same manifest.
created_manifest2 = get_or_create_manifest(repository, sample_manifest_instance, storage)
created2 = created_manifest2.manifest
newly_created2 = created_manifest2.newly_created
assert not newly_created2
assert created2 == created
# Ensure it again has a temporary tag.
assert Tag.get(manifest=created2, hidden=True).lifetime_end_ms
# Ensure the labels were added.
labels = list(list_manifest_labels(created))
assert len(labels) == 2
labels_dict = {label.key: label.value for label in labels}
assert labels_dict == expected_labels
def test_get_or_create_manifest_invalid_image(initialized_db):
repository = get_repository('devtable', 'simple')
latest_tag = get_tag(repository, 'latest')
parsed = DockerSchema1Manifest(Bytes.for_string_or_unicode(latest_tag.manifest.manifest_bytes),
validate=False)
builder = DockerSchema1ManifestBuilder('devtable', 'simple', 'anothertag')
builder.add_layer(parsed.blob_digests[0], '{"id": "foo", "parent": "someinvalidimageid"}')
sample_manifest_instance = builder.build(docker_v2_signing_key)
created_manifest = get_or_create_manifest(repository, sample_manifest_instance, storage)
assert created_manifest is None
def test_get_or_create_manifest_list(initialized_db):
repository = create_repository('devtable', 'newrepo', None)
expected_labels = {
'Foo': 'Bar',
'Baz': 'Meh',
}
layer_json = json.dumps({
'id': 'somelegacyid',
'config': {
'Labels': expected_labels,
},
"rootfs": {
"type": "layers",
"diff_ids": []
},
"history": [
{
"created": "2018-04-03T18:37:09.284840891Z",
"created_by": "do something",
},
],
})
# Create a legacy image.
find_create_or_link_image('somelegacyid', repository, 'devtable', {}, 'local_us')
# Add a blob containing the config.
_, config_digest = _populate_blob(layer_json)
# Add a blob of random data.
random_data = 'hello world'
_, random_digest = _populate_blob(random_data)
# Build the manifests.
v1_builder = DockerSchema1ManifestBuilder('devtable', 'simple', 'anothertag')
v1_builder.add_layer(random_digest, layer_json)
v1_manifest = v1_builder.build(docker_v2_signing_key).unsigned()
v2_builder = DockerSchema2ManifestBuilder()
v2_builder.set_config_digest(config_digest, len(layer_json))
v2_builder.add_layer(random_digest, len(random_data))
v2_manifest = v2_builder.build()
# Write the manifests.
v1_created = get_or_create_manifest(repository, v1_manifest, storage)
assert v1_created
assert v1_created.manifest.digest == v1_manifest.digest
v2_created = get_or_create_manifest(repository, v2_manifest, storage)
assert v2_created
assert v2_created.manifest.digest == v2_manifest.digest
# Build the manifest list.
list_builder = DockerSchema2ManifestListBuilder()
list_builder.add_manifest(v1_manifest, 'amd64', 'linux')
list_builder.add_manifest(v2_manifest, 'amd32', 'linux')
manifest_list = list_builder.build()
# Write the manifest list, which should also write the manifests themselves.
created_tuple = get_or_create_manifest(repository, manifest_list, storage)
assert created_tuple is not None
created_list = created_tuple.manifest
assert created_list
assert created_list.media_type.name == manifest_list.media_type
assert created_list.digest == manifest_list.digest
# Ensure the child manifest links exist.
child_manifests = {cm.child_manifest.digest: cm.child_manifest
for cm in ManifestChild.select().where(ManifestChild.manifest == created_list)}
assert len(child_manifests) == 2
assert v1_manifest.digest in child_manifests
assert v2_manifest.digest in child_manifests
assert child_manifests[v1_manifest.digest].media_type.name == v1_manifest.media_type
assert child_manifests[v2_manifest.digest].media_type.name == v2_manifest.media_type
def test_get_or_create_manifest_list_duplicate_child_manifest(initialized_db):
repository = create_repository('devtable', 'newrepo', None)
expected_labels = {
'Foo': 'Bar',
'Baz': 'Meh',
}
layer_json = json.dumps({
'id': 'somelegacyid',
'config': {
'Labels': expected_labels,
},
"rootfs": {
"type": "layers",
"diff_ids": []
},
"history": [
{
"created": "2018-04-03T18:37:09.284840891Z",
"created_by": "do something",
},
],
})
# Create a legacy image.
find_create_or_link_image('somelegacyid', repository, 'devtable', {}, 'local_us')
# Add a blob containing the config.
_, config_digest = _populate_blob(layer_json)
# Add a blob of random data.
random_data = 'hello world'
_, random_digest = _populate_blob(random_data)
# Build the manifest.
v2_builder = DockerSchema2ManifestBuilder()
v2_builder.set_config_digest(config_digest, len(layer_json))
v2_builder.add_layer(random_digest, len(random_data))
v2_manifest = v2_builder.build()
# Write the manifest.
v2_created = get_or_create_manifest(repository, v2_manifest, storage)
assert v2_created
assert v2_created.manifest.digest == v2_manifest.digest
# Build the manifest list, with the child manifest repeated.
list_builder = DockerSchema2ManifestListBuilder()
list_builder.add_manifest(v2_manifest, 'amd64', 'linux')
list_builder.add_manifest(v2_manifest, 'amd32', 'linux')
manifest_list = list_builder.build()
# Write the manifest list, which should also write the manifests themselves.
created_tuple = get_or_create_manifest(repository, manifest_list, storage)
assert created_tuple is not None
created_list = created_tuple.manifest
assert created_list
assert created_list.media_type.name == manifest_list.media_type
assert created_list.digest == manifest_list.digest
# Ensure the child manifest links exist.
child_manifests = {cm.child_manifest.digest: cm.child_manifest
for cm in ManifestChild.select().where(ManifestChild.manifest == created_list)}
assert len(child_manifests) == 1
assert v2_manifest.digest in child_manifests
assert child_manifests[v2_manifest.digest].media_type.name == v2_manifest.media_type
# Try to create again and ensure we get back the same manifest list.
created2_tuple = get_or_create_manifest(repository, manifest_list, storage)
assert created2_tuple is not None
assert created2_tuple.manifest == created_list
def test_get_or_create_manifest_with_remote_layers(initialized_db):
repository = create_repository('devtable', 'newrepo', None)
layer_json = json.dumps({
'config': {},
"rootfs": {
"type": "layers",
"diff_ids": []
},
"history": [
{
"created": "2018-04-03T18:37:09.284840891Z",
"created_by": "do something",
},
{
"created": "2018-04-03T18:37:09.284840891Z",
"created_by": "do something",
},
],
})
# Add a blob containing the config.
_, config_digest = _populate_blob(layer_json)
# Add a blob of random data.
random_data = 'hello world'
_, random_digest = _populate_blob(random_data)
remote_digest = sha256_digest('something')
builder = DockerSchema2ManifestBuilder()
builder.set_config_digest(config_digest, len(layer_json))
builder.add_layer(remote_digest, 1234, urls=['http://hello/world'])
builder.add_layer(random_digest, len(random_data))
manifest = builder.build()
assert remote_digest in manifest.blob_digests
assert remote_digest not in manifest.local_blob_digests
assert manifest.has_remote_layer
assert not manifest.has_legacy_image
assert manifest.get_schema1_manifest('foo', 'bar', 'baz', None) is None
# Write the manifest.
created_tuple = get_or_create_manifest(repository, manifest, storage)
assert created_tuple is not None
created_manifest = created_tuple.manifest
assert created_manifest
assert created_manifest.media_type.name == manifest.media_type
assert created_manifest.digest == manifest.digest
# Verify the legacy image.
legacy_image = get_legacy_image_for_manifest(created_manifest)
assert legacy_image is None
# Verify the linked blobs.
blob_digests = {mb.blob.content_checksum for mb
in ManifestBlob.select().where(ManifestBlob.manifest == created_manifest)}
assert random_digest in blob_digests
assert config_digest in blob_digests
assert remote_digest not in blob_digests
def create_manifest_for_testing(repository, differentiation_field='1', include_shared_blob=False):
# Populate a manifest.
layer_json = json.dumps({
'config': {},
"rootfs": {
"type": "layers",
"diff_ids": []
},
"history": [],
})
# Add a blob containing the config.
_, config_digest = _populate_blob(layer_json)
remote_digest = sha256_digest('something')
builder = DockerSchema2ManifestBuilder()
builder.set_config_digest(config_digest, len(layer_json))
builder.add_layer(remote_digest, 1234, urls=['http://hello/world' + differentiation_field])
if include_shared_blob:
_, blob_digest = _populate_blob('some data here')
builder.add_layer(blob_digest, 4567)
manifest = builder.build()
created = get_or_create_manifest(repository, manifest, storage)
assert created
return created.manifest, manifest
def test_retriever(initialized_db):
repository = create_repository('devtable', 'newrepo', None)
layer_json = json.dumps({
'config': {},
"rootfs": {
"type": "layers",
"diff_ids": []
},
"history": [
{
"created": "2018-04-03T18:37:09.284840891Z",
"created_by": "do something",
},
{
"created": "2018-04-03T18:37:09.284840891Z",
"created_by": "do something",
},
],
})
# Add a blob containing the config.
_, config_digest = _populate_blob(layer_json)
# Add a blob of random data.
random_data = 'hello world'
_, random_digest = _populate_blob(random_data)
# Add another blob of random data.
other_random_data = 'hi place'
_, other_random_digest = _populate_blob(other_random_data)
remote_digest = sha256_digest('something')
builder = DockerSchema2ManifestBuilder()
builder.set_config_digest(config_digest, len(layer_json))
builder.add_layer(other_random_digest, len(other_random_data))
builder.add_layer(random_digest, len(random_data))
manifest = builder.build()
assert config_digest in manifest.blob_digests
assert random_digest in manifest.blob_digests
assert other_random_digest in manifest.blob_digests
assert config_digest in manifest.local_blob_digests
assert random_digest in manifest.local_blob_digests
assert other_random_digest in manifest.local_blob_digests
# Write the manifest.
created_tuple = get_or_create_manifest(repository, manifest, storage)
assert created_tuple is not None
created_manifest = created_tuple.manifest
assert created_manifest
assert created_manifest.media_type.name == manifest.media_type
assert created_manifest.digest == manifest.digest
# Verify the linked blobs.
blob_digests = {mb.blob.content_checksum for mb
in ManifestBlob.select().where(ManifestBlob.manifest == created_manifest)}
assert random_digest in blob_digests
assert other_random_digest in blob_digests
assert config_digest in blob_digests
# Delete any Image rows linking to the blobs from temp tags.
for blob_digest in blob_digests:
storage_row = ImageStorage.get(content_checksum=blob_digest)
for image in list(Image.select().where(Image.storage == storage_row)):
all_temp = all([rt.hidden for rt
in RepositoryTag.select().where(RepositoryTag.image == image)])
if all_temp:
RepositoryTag.delete().where(RepositoryTag.image == image).execute()
image.delete_instance(recursive=True)
# Verify the blobs in the retriever.
retriever = RepositoryContentRetriever(repository, storage)
assert (retriever.get_manifest_bytes_with_digest(created_manifest.digest) ==
manifest.bytes.as_encoded_str())
for blob_digest in blob_digests:
assert retriever.get_blob_bytes_with_digest(blob_digest) is not None

View file

@ -0,0 +1,378 @@
from calendar import timegm
from datetime import timedelta, datetime
from playhouse.test_utils import assert_query_count
from data.database import (Tag, ManifestLegacyImage, TagToRepositoryTag, TagManifestToManifest,
TagManifest, Manifest, Repository)
from data.model.oci.test.test_oci_manifest import create_manifest_for_testing
from data.model.oci.tag import (find_matching_tag, get_most_recent_tag,
get_most_recent_tag_lifetime_start, list_alive_tags,
get_legacy_images_for_tags, filter_to_alive_tags,
filter_to_visible_tags, list_repository_tag_history,
get_expired_tag, get_tag, delete_tag,
delete_tags_for_manifest, change_tag_expiration,
set_tag_expiration_for_manifest, retarget_tag,
create_temporary_tag_if_necessary,
lookup_alive_tags_shallow,
lookup_unrecoverable_tags,
get_epoch_timestamp_ms)
from data.model.repository import get_repository, create_repository
from test.fixtures import *
@pytest.mark.parametrize('namespace_name, repo_name, tag_names, expected', [
('devtable', 'simple', ['latest'], 'latest'),
('devtable', 'simple', ['unknown', 'latest'], 'latest'),
('devtable', 'simple', ['unknown'], None),
])
def test_find_matching_tag(namespace_name, repo_name, tag_names, expected, initialized_db):
repo = get_repository(namespace_name, repo_name)
if expected is not None:
with assert_query_count(1):
found = find_matching_tag(repo, tag_names)
assert found is not None
assert found.name == expected
assert not found.lifetime_end_ms
else:
with assert_query_count(1):
assert find_matching_tag(repo, tag_names) is None
def test_get_most_recent_tag_lifetime_start(initialized_db):
repo = get_repository('devtable', 'simple')
tag = get_most_recent_tag(repo)
with assert_query_count(1):
tags = get_most_recent_tag_lifetime_start([repo])
assert tags[repo.id] == tag.lifetime_start_ms
def test_get_most_recent_tag(initialized_db):
repo = get_repository('outsideorg', 'coolrepo')
with assert_query_count(1):
assert get_most_recent_tag(repo).name == 'latest'
def test_get_most_recent_tag_empty_repo(initialized_db):
empty_repo = create_repository('devtable', 'empty', None)
with assert_query_count(1):
assert get_most_recent_tag(empty_repo) is None
def test_list_alive_tags(initialized_db):
found = False
for tag in filter_to_visible_tags(filter_to_alive_tags(Tag.select())):
tags = list_alive_tags(tag.repository)
assert tag in tags
with assert_query_count(1):
legacy_images = get_legacy_images_for_tags(tags)
for tag in tags:
assert ManifestLegacyImage.get(manifest=tag.manifest).image == legacy_images[tag.id]
found = True
assert found
# Ensure hidden tags cannot be listed.
tag = Tag.get()
tag.hidden = True
tag.save()
tags = list_alive_tags(tag.repository)
assert tag not in tags
def test_lookup_alive_tags_shallow(initialized_db):
found = False
for tag in filter_to_visible_tags(filter_to_alive_tags(Tag.select())):
tags = lookup_alive_tags_shallow(tag.repository)
found = True
assert tag in tags
assert found
# Ensure hidden tags cannot be listed.
tag = Tag.get()
tag.hidden = True
tag.save()
tags = lookup_alive_tags_shallow(tag.repository)
assert tag not in tags
def test_get_tag(initialized_db):
found = False
for tag in filter_to_visible_tags(filter_to_alive_tags(Tag.select())):
repo = tag.repository
with assert_query_count(1):
assert get_tag(repo, tag.name) == tag
found = True
assert found
@pytest.mark.parametrize('namespace_name, repo_name', [
('devtable', 'simple'),
('devtable', 'complex'),
])
def test_list_repository_tag_history(namespace_name, repo_name, initialized_db):
repo = get_repository(namespace_name, repo_name)
with assert_query_count(1):
results, has_more = list_repository_tag_history(repo, 1, 100)
assert results
assert not has_more
def test_list_repository_tag_history_with_history(initialized_db):
repo = get_repository('devtable', 'history')
with assert_query_count(1):
results, _ = list_repository_tag_history(repo, 1, 100)
assert len(results) == 2
assert results[0].lifetime_end_ms is None
assert results[1].lifetime_end_ms is not None
with assert_query_count(1):
results, _ = list_repository_tag_history(repo, 1, 100, specific_tag_name='latest')
assert len(results) == 2
assert results[0].lifetime_end_ms is None
assert results[1].lifetime_end_ms is not None
with assert_query_count(1):
results, _ = list_repository_tag_history(repo, 1, 100, specific_tag_name='foobar')
assert len(results) == 0
def test_list_repository_tag_history_all_tags(initialized_db):
for tag in Tag.select():
repo = tag.repository
with assert_query_count(1):
results, _ = list_repository_tag_history(repo, 1, 1000)
assert (tag in results) == (not tag.hidden)
@pytest.mark.parametrize('namespace_name, repo_name, tag_name, expected', [
('devtable', 'simple', 'latest', False),
('devtable', 'simple', 'unknown', False),
('devtable', 'complex', 'latest', False),
('devtable', 'history', 'latest', True),
])
def test_get_expired_tag(namespace_name, repo_name, tag_name, expected, initialized_db):
repo = get_repository(namespace_name, repo_name)
with assert_query_count(1):
assert bool(get_expired_tag(repo, tag_name)) == expected
def test_delete_tag(initialized_db):
found = False
for tag in list(filter_to_visible_tags(filter_to_alive_tags(Tag.select()))):
repo = tag.repository
assert get_tag(repo, tag.name) == tag
assert tag.lifetime_end_ms is None
with assert_query_count(4):
assert delete_tag(repo, tag.name) == tag
assert get_tag(repo, tag.name) is None
found = True
assert found
def test_delete_tags_for_manifest(initialized_db):
for tag in list(filter_to_visible_tags(filter_to_alive_tags(Tag.select()))):
repo = tag.repository
assert get_tag(repo, tag.name) == tag
with assert_query_count(5):
assert delete_tags_for_manifest(tag.manifest) == [tag]
assert get_tag(repo, tag.name) is None
def test_delete_tags_for_manifest_same_manifest(initialized_db):
new_repo = model.repository.create_repository('devtable', 'newrepo', None)
manifest_1, _ = create_manifest_for_testing(new_repo, '1')
manifest_2, _ = create_manifest_for_testing(new_repo, '2')
assert manifest_1.digest != manifest_2.digest
# Add some tag history, moving a tag back and forth between two manifests.
retarget_tag('latest', manifest_1)
retarget_tag('latest', manifest_2)
retarget_tag('latest', manifest_1)
retarget_tag('latest', manifest_2)
retarget_tag('another1', manifest_1)
retarget_tag('another2', manifest_2)
# Delete all tags pointing to the first manifest.
delete_tags_for_manifest(manifest_1)
assert get_tag(new_repo, 'latest').manifest == manifest_2
assert get_tag(new_repo, 'another1') is None
assert get_tag(new_repo, 'another2').manifest == manifest_2
# Delete all tags pointing to the second manifest, which should actually delete the `latest`
# tag now.
delete_tags_for_manifest(manifest_2)
assert get_tag(new_repo, 'latest') is None
assert get_tag(new_repo, 'another1') is None
assert get_tag(new_repo, 'another2') is None
@pytest.mark.parametrize('timedelta, expected_timedelta', [
pytest.param(timedelta(seconds=1), timedelta(hours=1), id='less than minimum'),
pytest.param(timedelta(weeks=300), timedelta(weeks=104), id='more than maxium'),
pytest.param(timedelta(weeks=1), timedelta(weeks=1), id='within range'),
])
def test_change_tag_expiration(timedelta, expected_timedelta, initialized_db):
now = datetime.utcnow()
now_ms = timegm(now.utctimetuple()) * 1000
tag = Tag.get()
tag.lifetime_start_ms = now_ms
tag.save()
original_end_ms, okay = change_tag_expiration(tag, now + timedelta)
assert okay
assert original_end_ms == tag.lifetime_end_ms
updated_tag = Tag.get(id=tag.id)
offset = expected_timedelta.total_seconds() * 1000
expected_ms = (updated_tag.lifetime_start_ms + offset)
assert updated_tag.lifetime_end_ms == expected_ms
original_end_ms, okay = change_tag_expiration(tag, None)
assert okay
assert original_end_ms == expected_ms
updated_tag = Tag.get(id=tag.id)
assert updated_tag.lifetime_end_ms is None
def test_set_tag_expiration_for_manifest(initialized_db):
tag = Tag.get()
manifest = tag.manifest
assert manifest is not None
set_tag_expiration_for_manifest(manifest, datetime.utcnow() + timedelta(weeks=1))
updated_tag = Tag.get(id=tag.id)
assert updated_tag.lifetime_end_ms is not None
def test_create_temporary_tag_if_necessary(initialized_db):
tag = Tag.get()
manifest = tag.manifest
assert manifest is not None
# Ensure no tag is created, since an existing one is present.
created = create_temporary_tag_if_necessary(manifest, 60)
assert created is None
# Mark the tag as deleted.
tag.lifetime_end_ms = 1
tag.save()
# Now create a temp tag.
created = create_temporary_tag_if_necessary(manifest, 60)
assert created is not None
assert created.hidden
assert created.name.startswith('$temp-')
assert created.manifest == manifest
assert created.lifetime_end_ms is not None
assert created.lifetime_end_ms == (created.lifetime_start_ms + 60000)
# Try again and ensure it is not created.
created = create_temporary_tag_if_necessary(manifest, 30)
assert created is None
def test_retarget_tag(initialized_db):
repo = get_repository('devtable', 'history')
results, _ = list_repository_tag_history(repo, 1, 100, specific_tag_name='latest')
assert len(results) == 2
assert results[0].lifetime_end_ms is None
assert results[1].lifetime_end_ms is not None
# Revert back to the original manifest.
created = retarget_tag('latest', results[0].manifest, is_reversion=True,
now_ms=results[1].lifetime_end_ms + 10000)
assert created.lifetime_end_ms is None
assert created.reversion
assert created.name == 'latest'
assert created.manifest == results[0].manifest
# Verify in the history.
results, _ = list_repository_tag_history(repo, 1, 100, specific_tag_name='latest')
assert len(results) == 3
assert results[0].lifetime_end_ms is None
assert results[1].lifetime_end_ms is not None
assert results[2].lifetime_end_ms is not None
assert results[0] == created
# Verify old-style tables.
repository_tag = TagToRepositoryTag.get(tag=created).repository_tag
assert repository_tag.lifetime_start_ts == int(created.lifetime_start_ms / 1000)
tag_manifest = TagManifest.get(tag=repository_tag)
assert TagManifestToManifest.get(tag_manifest=tag_manifest).manifest == created.manifest
def test_retarget_tag_wrong_name(initialized_db):
repo = get_repository('devtable', 'history')
results, _ = list_repository_tag_history(repo, 1, 100, specific_tag_name='latest')
assert len(results) == 2
created = retarget_tag('someothername', results[1].manifest, is_reversion=True)
assert created is None
results, _ = list_repository_tag_history(repo, 1, 100, specific_tag_name='latest')
assert len(results) == 2
def test_lookup_unrecoverable_tags(initialized_db):
# Ensure no existing tags are found.
for repo in Repository.select():
assert not list(lookup_unrecoverable_tags(repo))
# Mark a tag as outside the expiration window and ensure it is found.
repo = get_repository('devtable', 'history')
results, _ = list_repository_tag_history(repo, 1, 100, specific_tag_name='latest')
assert len(results) == 2
results[1].lifetime_end_ms = 1
results[1].save()
# Ensure the tag is now found.
found = list(lookup_unrecoverable_tags(repo))
assert found
assert len(found) == 1
assert found[0] == results[1]
# Mark the tag as expiring in the future and ensure it is no longer found.
results[1].lifetime_end_ms = get_epoch_timestamp_ms() + 1000000
results[1].save()
found = list(lookup_unrecoverable_tags(repo))
assert not found