initial import for Open Source 🎉

This commit is contained in:
Jimmy Zelinskie 2019-11-12 11:09:47 -05:00
parent 1898c361f3
commit 9c0dd3b722
2048 changed files with 218743 additions and 0 deletions

View file

@ -0,0 +1,43 @@
import os
import logging
from data.registry_model.registry_pre_oci_model import pre_oci_model
from data.registry_model.registry_oci_model import oci_model
from data.registry_model.modelsplitter import SplitModel
logger = logging.getLogger(__name__)
class RegistryModelProxy(object):
def __init__(self):
self._model = oci_model if os.getenv('OCI_DATA_MODEL') == 'true' else pre_oci_model
def setup_split(self, oci_model_proportion, oci_whitelist, v22_whitelist, upgrade_mode):
if os.getenv('OCI_DATA_MODEL') == 'true':
return
if upgrade_mode == 'complete':
logger.info('===============================')
logger.info('Full V2_2 + OCI model is enabled')
logger.info('===============================')
self._model = oci_model
return
logger.info('===============================')
logger.info('Split registry model: OCI %s proportion and whitelist `%s` and V22 whitelist `%s`',
oci_model_proportion, oci_whitelist, v22_whitelist)
logger.info('===============================')
self._model = SplitModel(oci_model_proportion, oci_whitelist, v22_whitelist,
upgrade_mode == 'post-oci-rollout')
def set_for_testing(self, use_oci_model):
self._model = oci_model if use_oci_model else pre_oci_model
logger.debug('Changed registry model to `%s` for testing', self._model)
def __getattr__(self, attr):
return getattr(self._model, attr)
registry_model = RegistryModelProxy()
logger.info('===============================')
logger.info('Using registry model `%s`', registry_model._model)
logger.info('===============================')

View file

@ -0,0 +1,335 @@
import logging
import time
from contextlib import contextmanager
from collections import namedtuple
import bitmath
import resumablehashlib
from data.registry_model import registry_model
from data.database import CloseForLongOperation, db_transaction
from digest import digest_tools
from util.registry.filelike import wrap_with_handler, StreamSlice
from util.registry.gzipstream import calculate_size_handler
from util.registry.torrent import PieceHasher
logger = logging.getLogger(__name__)
BLOB_CONTENT_TYPE = 'application/octet-stream'
class BlobUploadException(Exception):
""" Base for all exceptions raised when uploading blobs. """
class BlobRangeMismatchException(BlobUploadException):
""" Exception raised if the range to be uploaded does not match. """
class BlobDigestMismatchException(BlobUploadException):
""" Exception raised if the digest requested does not match that of the contents uploaded. """
class BlobTooLargeException(BlobUploadException):
""" Exception raised if the data uploaded exceeds the maximum_blob_size. """
def __init__(self, uploaded, max_allowed):
super(BlobTooLargeException, self).__init__()
self.uploaded = uploaded
self.max_allowed = max_allowed
BlobUploadSettings = namedtuple('BlobUploadSettings', ['maximum_blob_size', 'bittorrent_piece_size',
'committed_blob_expiration'])
def create_blob_upload(repository_ref, storage, settings, extra_blob_stream_handlers=None):
""" Creates a new blob upload in the specified repository and returns a manager for interacting
with that upload. Returns None if a new blob upload could not be started.
"""
location_name = storage.preferred_locations[0]
new_upload_uuid, upload_metadata = storage.initiate_chunked_upload(location_name)
blob_upload = registry_model.create_blob_upload(repository_ref, new_upload_uuid, location_name,
upload_metadata)
if blob_upload is None:
return None
return _BlobUploadManager(repository_ref, blob_upload, settings, storage,
extra_blob_stream_handlers)
def retrieve_blob_upload_manager(repository_ref, blob_upload_id, storage, settings):
""" Retrieves the manager for an in-progress blob upload with the specified ID under the given
repository or None if none.
"""
blob_upload = registry_model.lookup_blob_upload(repository_ref, blob_upload_id)
if blob_upload is None:
return None
return _BlobUploadManager(repository_ref, blob_upload, settings, storage)
@contextmanager
def complete_when_uploaded(blob_upload):
""" Wraps the given blob upload in a context manager that completes the upload when the context
closes.
"""
try:
yield blob_upload
except Exception as ex:
logger.exception('Exception when uploading blob `%s`', blob_upload.blob_upload_id)
raise ex
finally:
# Cancel the upload if something went wrong or it was not commit to a blob.
if blob_upload.committed_blob is None:
blob_upload.cancel_upload()
@contextmanager
def upload_blob(repository_ref, storage, settings, extra_blob_stream_handlers=None):
""" Starts a new blob upload in the specified repository and yields a manager for interacting
with that upload. When the context manager completes, the blob upload is deleted, whether
committed to a blob or not. Yields None if a blob upload could not be started.
"""
created = create_blob_upload(repository_ref, storage, settings, extra_blob_stream_handlers)
if not created:
yield None
return
try:
yield created
except Exception as ex:
logger.exception('Exception when uploading blob `%s`', created.blob_upload_id)
raise ex
finally:
# Cancel the upload if something went wrong or it was not commit to a blob.
if created.committed_blob is None:
created.cancel_upload()
class _BlobUploadManager(object):
""" Defines a helper class for easily interacting with blob uploads in progress, including
handling of database and storage calls.
"""
def __init__(self, repository_ref, blob_upload, settings, storage,
extra_blob_stream_handlers=None):
assert repository_ref is not None
assert blob_upload is not None
self.repository_ref = repository_ref
self.blob_upload = blob_upload
self.settings = settings
self.storage = storage
self.extra_blob_stream_handlers = extra_blob_stream_handlers
self.committed_blob = None
@property
def blob_upload_id(self):
""" Returns the unique ID for the blob upload. """
return self.blob_upload.upload_id
def upload_chunk(self, app_config, input_fp, start_offset=0, length=-1, metric_queue=None):
""" Uploads a chunk of data found in the given input file-like interface. start_offset and
length are optional and should match a range header if any was given.
If metric_queue is given, the upload time and chunk size are written into the metrics in
the queue.
Returns the total number of bytes uploaded after this upload has completed. Raises
a BlobUploadException if the upload failed.
"""
assert start_offset is not None
assert length is not None
if start_offset > 0 and start_offset > self.blob_upload.byte_count:
logger.error('start_offset provided greater than blob_upload.byte_count')
raise BlobRangeMismatchException()
# Ensure that we won't go over the allowed maximum size for blobs.
max_blob_size = bitmath.parse_string_unsafe(self.settings.maximum_blob_size)
uploaded = bitmath.Byte(length + start_offset)
if length > -1 and uploaded > max_blob_size:
raise BlobTooLargeException(uploaded=uploaded.bytes, max_allowed=max_blob_size.bytes)
location_set = {self.blob_upload.location_name}
upload_error = None
with CloseForLongOperation(app_config):
if start_offset > 0 and start_offset < self.blob_upload.byte_count:
# Skip the bytes which were received on a previous push, which are already stored and
# included in the sha calculation
overlap_size = self.blob_upload.byte_count - start_offset
input_fp = StreamSlice(input_fp, overlap_size)
# Update our upload bounds to reflect the skipped portion of the overlap
start_offset = self.blob_upload.byte_count
length = max(length - overlap_size, 0)
# We use this to escape early in case we have already processed all of the bytes the user
# wants to upload.
if length == 0:
return self.blob_upload.byte_count
input_fp = wrap_with_handler(input_fp, self.blob_upload.sha_state.update)
if self.extra_blob_stream_handlers:
for handler in self.extra_blob_stream_handlers:
input_fp = wrap_with_handler(input_fp, handler)
# Add a hasher for calculating SHA1s for torrents if this is the first chunk and/or we have
# already calculated hash data for the previous chunk(s).
piece_hasher = None
if self.blob_upload.chunk_count == 0 or self.blob_upload.piece_sha_state:
initial_sha1_value = self.blob_upload.piece_sha_state or resumablehashlib.sha1()
initial_sha1_pieces_value = self.blob_upload.piece_hashes or ''
piece_hasher = PieceHasher(self.settings.bittorrent_piece_size, start_offset,
initial_sha1_pieces_value, initial_sha1_value)
input_fp = wrap_with_handler(input_fp, piece_hasher.update)
# If this is the first chunk and we're starting at the 0 offset, add a handler to gunzip the
# stream so we can determine the uncompressed size. We'll throw out this data if another chunk
# comes in, but in the common case the docker client only sends one chunk.
size_info = None
if start_offset == 0 and self.blob_upload.chunk_count == 0:
size_info, fn = calculate_size_handler()
input_fp = wrap_with_handler(input_fp, fn)
start_time = time.time()
length_written, new_metadata, upload_error = self.storage.stream_upload_chunk(
location_set,
self.blob_upload.upload_id,
start_offset,
length,
input_fp,
self.blob_upload.storage_metadata,
content_type=BLOB_CONTENT_TYPE,
)
if upload_error is not None:
logger.error('storage.stream_upload_chunk returned error %s', upload_error)
raise BlobUploadException(upload_error)
# Update the chunk upload time and push bytes metrics.
if metric_queue is not None:
metric_queue.chunk_upload_time.Observe(time.time() - start_time, labelvalues=[
length_written, list(location_set)[0]])
metric_queue.push_byte_count.Inc(length_written)
# Ensure we have not gone beyond the max layer size.
new_blob_bytes = self.blob_upload.byte_count + length_written
new_blob_size = bitmath.Byte(new_blob_bytes)
if new_blob_size > max_blob_size:
raise BlobTooLargeException(uploaded=new_blob_size, max_allowed=max_blob_size.bytes)
# If we determined an uncompressed size and this is the first chunk, add it to the blob.
# Otherwise, we clear the size from the blob as it was uploaded in multiple chunks.
uncompressed_byte_count = self.blob_upload.uncompressed_byte_count
if size_info is not None and self.blob_upload.chunk_count == 0 and size_info.is_valid:
uncompressed_byte_count = size_info.uncompressed_size
elif length_written > 0:
# Otherwise, if we wrote some bytes and the above conditions were not met, then we don't
# know the uncompressed size.
uncompressed_byte_count = None
piece_hashes = None
piece_sha_state = None
if piece_hasher is not None:
piece_hashes = piece_hasher.piece_hashes
piece_sha_state = piece_hasher.hash_fragment
self.blob_upload = registry_model.update_blob_upload(self.blob_upload,
uncompressed_byte_count,
piece_hashes,
piece_sha_state,
new_metadata,
new_blob_bytes,
self.blob_upload.chunk_count + 1,
self.blob_upload.sha_state)
if self.blob_upload is None:
raise BlobUploadException('Could not complete upload of chunk')
return new_blob_bytes
def cancel_upload(self):
""" Cancels the blob upload, deleting any data uploaded and removing the upload itself. """
if self.blob_upload is None:
return
# Tell storage to cancel the chunked upload, deleting its contents.
self.storage.cancel_chunked_upload({self.blob_upload.location_name},
self.blob_upload.upload_id,
self.blob_upload.storage_metadata)
# Remove the blob upload record itself.
registry_model.delete_blob_upload(self.blob_upload)
def commit_to_blob(self, app_config, expected_digest=None):
""" Commits the blob upload to a blob under the repository. The resulting blob will be marked
to not be GCed for some period of time (as configured by `committed_blob_expiration`).
If expected_digest is specified, the content digest of the data uploaded for the blob is
compared to that given and, if it does not match, a BlobDigestMismatchException is
raised. The digest given must be of type `Digest` and not a string.
"""
# Compare the content digest.
if expected_digest is not None:
self._validate_digest(expected_digest)
# Finalize the storage.
storage_already_existed = self._finalize_blob_storage(app_config)
# Convert the upload to a blob.
computed_digest_str = digest_tools.sha256_digest_from_hashlib(self.blob_upload.sha_state)
with db_transaction():
blob = registry_model.commit_blob_upload(self.blob_upload, computed_digest_str,
self.settings.committed_blob_expiration)
if blob is None:
return None
# Save torrent hash information (if available).
if self.blob_upload.piece_sha_state is not None and not storage_already_existed:
piece_bytes = self.blob_upload.piece_hashes + self.blob_upload.piece_sha_state.digest()
registry_model.set_torrent_info(blob, self.settings.bittorrent_piece_size, piece_bytes)
self.committed_blob = blob
return blob
def _validate_digest(self, expected_digest):
"""
Verifies that the digest's SHA matches that of the uploaded data.
"""
computed_digest = digest_tools.sha256_digest_from_hashlib(self.blob_upload.sha_state)
if not digest_tools.digests_equal(computed_digest, expected_digest):
logger.error('Digest mismatch for upload %s: Expected digest %s, found digest %s',
self.blob_upload.upload_id, expected_digest, computed_digest)
raise BlobDigestMismatchException()
def _finalize_blob_storage(self, app_config):
"""
When an upload is successful, this ends the uploading process from the
storage's perspective.
Returns True if the blob already existed.
"""
computed_digest = digest_tools.sha256_digest_from_hashlib(self.blob_upload.sha_state)
final_blob_location = digest_tools.content_path(computed_digest)
# Close the database connection before we perform this operation, as it can take a while
# and we shouldn't hold the connection during that time.
with CloseForLongOperation(app_config):
# Move the storage into place, or if this was a re-upload, cancel it
already_existed = self.storage.exists({self.blob_upload.location_name}, final_blob_location)
if already_existed:
# It already existed, clean up our upload which served as proof that the
# uploader had the blob.
self.storage.cancel_chunked_upload({self.blob_upload.location_name},
self.blob_upload.upload_id,
self.blob_upload.storage_metadata)
else:
# We were the first ones to upload this image (at least to this location)
# Let's copy it into place
self.storage.complete_chunked_upload({self.blob_upload.location_name},
self.blob_upload.upload_id,
final_blob_location,
self.blob_upload.storage_metadata)
return already_existed

View file

@ -0,0 +1,86 @@
# pylint: disable=protected-access
from functools import wraps, total_ordering
class FromDictionaryException(Exception):
""" Exception raised if constructing a data type from a dictionary fails due to
missing data.
"""
def datatype(name, static_fields):
""" Defines a base class for a datatype that will represent a row from the database,
in an abstracted form.
"""
@total_ordering
class DataType(object):
__name__ = name
def __init__(self, **kwargs):
self._db_id = kwargs.pop('db_id', None)
self._inputs = kwargs.pop('inputs', None)
self._fields = kwargs
for name in static_fields:
assert name in self._fields, 'Missing field %s' % name
def __eq__(self, other):
return self._db_id == other._db_id
def __lt__(self, other):
return self._db_id < other._db_id
def __getattr__(self, name):
if name in static_fields:
return self._fields[name]
raise AttributeError('Unknown field `%s`' % name)
def __repr__(self):
return '<%s> #%s' % (name, self._db_id)
@classmethod
def from_dict(cls, dict_data):
try:
return cls(**dict_data)
except:
raise FromDictionaryException()
def asdict(self):
dictionary_rep = dict(self._fields)
assert ('db_id' not in dictionary_rep and
'inputs' not in dictionary_rep)
dictionary_rep['db_id'] = self._db_id
dictionary_rep['inputs'] = self._inputs
return dictionary_rep
return DataType
def requiresinput(input_name):
""" Marks a property on the data type as requiring an input to be invoked. """
def inner(func):
@wraps(func)
def wrapper(self, *args, **kwargs):
if self._inputs.get(input_name) is None:
raise Exception('Cannot invoke function with missing input `%s`' % input_name)
kwargs[input_name] = self._inputs[input_name]
result = func(self, *args, **kwargs)
return result
return wrapper
return inner
def optionalinput(input_name):
""" Marks a property on the data type as having an input be optional when invoked. """
def inner(func):
@wraps(func)
def wrapper(self, *args, **kwargs):
kwargs[input_name] = self._inputs.get(input_name)
result = func(self, *args, **kwargs)
return result
return wrapper
return inner

View file

@ -0,0 +1,504 @@
import hashlib
from collections import namedtuple
from enum import Enum, unique
from cachetools.func import lru_cache
from data import model
from data.database import Manifest as ManifestTable
from data.registry_model.datatype import datatype, requiresinput, optionalinput
from image.docker import ManifestException
from image.docker.schemas import parse_manifest_from_bytes
from image.docker.schema1 import DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE
from image.docker.schema2 import DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE
from util.bytes import Bytes
class RepositoryReference(datatype('Repository', [])):
""" RepositoryReference is a reference to a repository, passed to registry interface methods. """
@classmethod
def for_repo_obj(cls, repo_obj, namespace_name=None, repo_name=None, is_free_namespace=None,
state=None):
if repo_obj is None:
return None
return RepositoryReference(db_id=repo_obj.id,
inputs=dict(
kind=model.repository.get_repo_kind_name(repo_obj),
is_public=model.repository.is_repository_public(repo_obj),
namespace_name=namespace_name,
repo_name=repo_name,
is_free_namespace=is_free_namespace,
state=state
))
@classmethod
def for_id(cls, repo_id, namespace_name=None, repo_name=None, is_free_namespace=None, state=None):
return RepositoryReference(db_id=repo_id,
inputs=dict(
kind=None,
is_public=None,
namespace_name=namespace_name,
repo_name=repo_name,
is_free_namespace=is_free_namespace,
state=state
))
@property
@lru_cache(maxsize=1)
def _repository_obj(self):
return model.repository.lookup_repository(self._db_id)
@property
@optionalinput('kind')
def kind(self, kind):
""" Returns the kind of the repository. """
return kind or model.repository.get_repo_kind_name(self._repositry_obj)
@property
@optionalinput('is_public')
def is_public(self, is_public):
""" Returns whether the repository is public. """
if is_public is not None:
return is_public
return model.repository.is_repository_public(self._repository_obj)
@property
def trust_enabled(self):
""" Returns whether trust is enabled in this repository. """
repository = self._repository_obj
if repository is None:
return None
return repository.trust_enabled
@property
def id(self):
""" Returns the database ID of the repository. """
return self._db_id
@property
@optionalinput('namespace_name')
def namespace_name(self, namespace_name=None):
""" Returns the namespace name of this repository.
"""
if namespace_name is not None:
return namespace_name
repository = self._repository_obj
if repository is None:
return None
return repository.namespace_user.username
@property
@optionalinput('is_free_namespace')
def is_free_namespace(self, is_free_namespace=None):
""" Returns whether the namespace of the repository is on a free plan.
"""
if is_free_namespace is not None:
return is_free_namespace
repository = self._repository_obj
if repository is None:
return None
return repository.namespace_user.stripe_id is None
@property
@optionalinput('repo_name')
def name(self, repo_name=None):
""" Returns the name of this repository.
"""
if repo_name is not None:
return repo_name
repository = self._repository_obj
if repository is None:
return None
return repository.name
@property
@optionalinput('state')
def state(self, state=None):
""" Return the state of the Repository. """
if state is not None:
return state
repository = self._repository_obj
if repository is None:
return None
return repository.state
class Label(datatype('Label', ['key', 'value', 'uuid', 'source_type_name', 'media_type_name'])):
""" Label represents a label on a manifest. """
@classmethod
def for_label(cls, label):
if label is None:
return None
return Label(db_id=label.id, key=label.key, value=label.value,
uuid=label.uuid, media_type_name=label.media_type.name,
source_type_name=label.source_type.name)
class ShallowTag(datatype('ShallowTag', ['name'])):
""" ShallowTag represents a tag in a repository, but only contains basic information. """
@classmethod
def for_tag(cls, tag):
if tag is None:
return None
return ShallowTag(db_id=tag.id, name=tag.name)
@classmethod
def for_repository_tag(cls, repository_tag):
if repository_tag is None:
return None
return ShallowTag(db_id=repository_tag.id, name=repository_tag.name)
@property
def id(self):
""" The ID of this tag for pagination purposes only. """
return self._db_id
class Tag(datatype('Tag', ['name', 'reversion', 'manifest_digest', 'lifetime_start_ts',
'lifetime_end_ts', 'lifetime_start_ms', 'lifetime_end_ms'])):
""" Tag represents a tag in a repository, which points to a manifest or image. """
@classmethod
def for_tag(cls, tag, legacy_image=None):
if tag is None:
return None
return Tag(db_id=tag.id,
name=tag.name,
reversion=tag.reversion,
lifetime_start_ms=tag.lifetime_start_ms,
lifetime_end_ms=tag.lifetime_end_ms,
lifetime_start_ts=tag.lifetime_start_ms / 1000,
lifetime_end_ts=tag.lifetime_end_ms / 1000 if tag.lifetime_end_ms else None,
manifest_digest=tag.manifest.digest,
inputs=dict(legacy_image=legacy_image,
manifest=tag.manifest,
repository=RepositoryReference.for_id(tag.repository_id)))
@classmethod
def for_repository_tag(cls, repository_tag, manifest_digest=None, legacy_image=None):
if repository_tag is None:
return None
return Tag(db_id=repository_tag.id,
name=repository_tag.name,
reversion=repository_tag.reversion,
lifetime_start_ts=repository_tag.lifetime_start_ts,
lifetime_end_ts=repository_tag.lifetime_end_ts,
lifetime_start_ms=repository_tag.lifetime_start_ts * 1000,
lifetime_end_ms=(repository_tag.lifetime_end_ts * 1000
if repository_tag.lifetime_end_ts else None),
manifest_digest=manifest_digest,
inputs=dict(legacy_image=legacy_image,
repository=RepositoryReference.for_id(repository_tag.repository_id)))
@property
@requiresinput('manifest')
def _manifest(self, manifest):
""" Returns the manifest for this tag. Will only apply to new-style OCI tags. """
return manifest
@property
@optionalinput('manifest')
def manifest(self, manifest):
""" Returns the manifest for this tag or None if none. Will only apply to new-style OCI tags.
"""
return Manifest.for_manifest(manifest, self.legacy_image_if_present)
@property
@requiresinput('repository')
def repository(self, repository):
""" Returns the repository under which this tag lives.
"""
return repository
@property
@requiresinput('legacy_image')
def legacy_image(self, legacy_image):
""" Returns the legacy Docker V1-style image for this tag. Note that this
will be None for tags whose manifests point to other manifests instead of images.
"""
return legacy_image
@property
@optionalinput('legacy_image')
def legacy_image_if_present(self, legacy_image):
""" Returns the legacy Docker V1-style image for this tag. Note that this
will be None for tags whose manifests point to other manifests instead of images.
"""
return legacy_image
@property
def id(self):
""" The ID of this tag for pagination purposes only. """
return self._db_id
class Manifest(datatype('Manifest', ['digest', 'media_type', 'internal_manifest_bytes'])):
""" Manifest represents a manifest in a repository. """
@classmethod
def for_tag_manifest(cls, tag_manifest, legacy_image=None):
if tag_manifest is None:
return None
return Manifest(db_id=tag_manifest.id, digest=tag_manifest.digest,
internal_manifest_bytes=Bytes.for_string_or_unicode(tag_manifest.json_data),
media_type=DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE, # Always in legacy.
inputs=dict(legacy_image=legacy_image, tag_manifest=True))
@classmethod
def for_manifest(cls, manifest, legacy_image):
if manifest is None:
return None
# NOTE: `manifest_bytes` will be None if not selected by certain join queries.
manifest_bytes = (Bytes.for_string_or_unicode(manifest.manifest_bytes)
if manifest.manifest_bytes is not None else None)
return Manifest(db_id=manifest.id,
digest=manifest.digest,
internal_manifest_bytes=manifest_bytes,
media_type=ManifestTable.media_type.get_name(manifest.media_type_id),
inputs=dict(legacy_image=legacy_image, tag_manifest=False))
@property
@requiresinput('tag_manifest')
def _is_tag_manifest(self, tag_manifest):
return tag_manifest
@property
@requiresinput('legacy_image')
def legacy_image(self, legacy_image):
""" Returns the legacy Docker V1-style image for this manifest.
"""
return legacy_image
@property
@optionalinput('legacy_image')
def legacy_image_if_present(self, legacy_image):
""" Returns the legacy Docker V1-style image for this manifest. Note that this
will be None for manifests that point to other manifests instead of images.
"""
return legacy_image
def get_parsed_manifest(self, validate=True):
""" Returns the parsed manifest for this manifest. """
assert self.internal_manifest_bytes
return parse_manifest_from_bytes(self.internal_manifest_bytes, self.media_type,
validate=validate)
@property
def layers_compressed_size(self):
""" Returns the total compressed size of the layers in the manifest or None if this could not
be computed.
"""
try:
return self.get_parsed_manifest().layers_compressed_size
except ManifestException:
return None
@property
def is_manifest_list(self):
""" Returns True if this manifest points to a list (instead of an image). """
return self.media_type == DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE
class LegacyImage(datatype('LegacyImage', ['docker_image_id', 'created', 'comment', 'command',
'image_size', 'aggregate_size', 'uploading',
'v1_metadata_string'])):
""" LegacyImage represents a Docker V1-style image found in a repository. """
@classmethod
def for_image(cls, image, images_map=None, tags_map=None, blob=None):
if image is None:
return None
return LegacyImage(db_id=image.id,
inputs=dict(images_map=images_map, tags_map=tags_map,
ancestor_id_list=image.ancestor_id_list(),
blob=blob),
docker_image_id=image.docker_image_id,
created=image.created,
comment=image.comment,
command=image.command,
v1_metadata_string=image.v1_json_metadata,
image_size=image.storage.image_size,
aggregate_size=image.aggregate_size,
uploading=image.storage.uploading)
@property
def id(self):
""" Returns the database ID of the legacy image. """
return self._db_id
@property
@requiresinput('images_map')
@requiresinput('ancestor_id_list')
def parents(self, images_map, ancestor_id_list):
""" Returns the parent images for this image. Raises an exception if the parents have
not been loaded before this property is invoked. Parents are returned starting at the
leaf image.
"""
return [LegacyImage.for_image(images_map[ancestor_id], images_map=images_map)
for ancestor_id in reversed(ancestor_id_list)
if images_map.get(ancestor_id)]
@property
@requiresinput('blob')
def blob(self, blob):
""" Returns the blob for this image. Raises an exception if the blob has
not been loaded before this property is invoked.
"""
return blob
@property
@requiresinput('tags_map')
def tags(self, tags_map):
""" Returns the tags pointing to this image. Raises an exception if the tags have
not been loaded before this property is invoked.
"""
tags = tags_map.get(self._db_id)
if not tags:
return []
return [Tag.for_repository_tag(tag) for tag in tags]
@unique
class SecurityScanStatus(Enum):
""" Security scan status enum """
SCANNED = 'scanned'
FAILED = 'failed'
QUEUED = 'queued'
UNSUPPORTED = 'unsupported'
class ManifestLayer(namedtuple('ManifestLayer', ['layer_info', 'blob'])):
""" Represents a single layer in a manifest. The `layer_info` data will be manifest-type specific,
but will have a few expected fields (such as `digest`). The `blob` represents the associated
blob for this layer, optionally with placements. If the layer is a remote layer, the blob will
be None.
"""
def estimated_size(self, estimate_multiplier):
""" Returns the estimated size of this layer. If the layers' blob has an uncompressed size,
it is used. Otherwise, the compressed_size field in the layer is multiplied by the
multiplier.
"""
if self.blob.uncompressed_size:
return self.blob.uncompressed_size
return (self.layer_info.compressed_size or 0) * estimate_multiplier
class Blob(datatype('Blob', ['uuid', 'digest', 'compressed_size', 'uncompressed_size',
'uploading'])):
""" Blob represents a content-addressable piece of storage. """
@classmethod
def for_image_storage(cls, image_storage, storage_path, placements=None):
if image_storage is None:
return None
return Blob(db_id=image_storage.id,
uuid=image_storage.uuid,
inputs=dict(placements=placements, storage_path=storage_path),
digest=image_storage.content_checksum,
compressed_size=image_storage.image_size,
uncompressed_size=image_storage.uncompressed_size,
uploading=image_storage.uploading)
@property
@requiresinput('storage_path')
def storage_path(self, storage_path):
""" Returns the path of this blob in storage. """
# TODO: change this to take in the storage engine?
return storage_path
@property
@requiresinput('placements')
def placements(self, placements):
""" Returns all the storage placements at which the Blob can be found. """
return placements
class DerivedImage(datatype('DerivedImage', ['verb', 'varying_metadata', 'blob'])):
""" DerivedImage represents an image derived from a manifest via some form of verb. """
@classmethod
def for_derived_storage(cls, derived, verb, varying_metadata, blob):
return DerivedImage(db_id=derived.id,
verb=verb,
varying_metadata=varying_metadata,
blob=blob)
@property
def unique_id(self):
""" Returns a unique ID for this derived image. This call will consistently produce the same
unique ID across calls in the same code base.
"""
return hashlib.sha256('%s:%s' % (self.verb, self._db_id)).hexdigest()
class TorrentInfo(datatype('TorrentInfo', ['pieces', 'piece_length'])):
""" TorrentInfo represents information to pull a blob via torrent. """
@classmethod
def for_torrent_info(cls, torrent_info):
return TorrentInfo(db_id=torrent_info.id,
pieces=torrent_info.pieces,
piece_length=torrent_info.piece_length)
class BlobUpload(datatype('BlobUpload', ['upload_id', 'byte_count', 'uncompressed_byte_count',
'chunk_count', 'sha_state', 'location_name',
'storage_metadata', 'piece_sha_state', 'piece_hashes'])):
""" BlobUpload represents information about an in-progress upload to create a blob. """
@classmethod
def for_upload(cls, blob_upload, location_name=None):
return BlobUpload(db_id=blob_upload.id,
upload_id=blob_upload.uuid,
byte_count=blob_upload.byte_count,
uncompressed_byte_count=blob_upload.uncompressed_byte_count,
chunk_count=blob_upload.chunk_count,
sha_state=blob_upload.sha_state,
location_name=location_name or blob_upload.location.name,
storage_metadata=blob_upload.storage_metadata,
piece_sha_state=blob_upload.piece_sha_state,
piece_hashes=blob_upload.piece_hashes)
class LikelyVulnerableTag(datatype('LikelyVulnerableTag', ['layer_id', 'name'])):
""" LikelyVulnerableTag represents a tag in a repository that is likely vulnerable to a notified
vulnerability.
"""
# TODO: Remove all of this once we're on the new security model exclusively.
@classmethod
def for_tag(cls, tag, repository, docker_image_id, storage_uuid):
layer_id = '%s.%s' % (docker_image_id, storage_uuid)
return LikelyVulnerableTag(db_id=tag.id,
name=tag.name,
layer_id=layer_id,
inputs=dict(repository=repository))
@classmethod
def for_repository_tag(cls, tag, repository):
tag_layer_id = '%s.%s' % (tag.image.docker_image_id, tag.image.storage.uuid)
return LikelyVulnerableTag(db_id=tag.id,
name=tag.name,
layer_id=tag_layer_id,
inputs=dict(repository=repository))
@property
@requiresinput('repository')
def repository(self, repository):
return RepositoryReference.for_repo_obj(repository)

View file

@ -0,0 +1,384 @@
from abc import ABCMeta, abstractmethod
from six import add_metaclass
@add_metaclass(ABCMeta)
class RegistryDataInterface(object):
""" Interface for code to work with the registry data model. The registry data model consists
of all tables that store registry-specific information, such as Manifests, Blobs, Images,
and Labels.
"""
@abstractmethod
def supports_schema2(self, namespace_name):
""" Returns whether the implementation of the data interface supports schema 2 format
manifests. """
@abstractmethod
def get_tag_legacy_image_id(self, repository_ref, tag_name, storage):
""" Returns the legacy image ID for the tag with a legacy images in
the repository. Returns None if None.
"""
@abstractmethod
def get_legacy_tags_map(self, repository_ref, storage):
""" Returns a map from tag name to its legacy image ID, for all tags with legacy images in
the repository. Note that this can be a *very* heavy operation.
"""
@abstractmethod
def find_matching_tag(self, repository_ref, tag_names):
""" Finds an alive tag in the repository matching one of the given tag names and returns it
or None if none.
"""
@abstractmethod
def get_most_recent_tag(self, repository_ref):
""" Returns the most recently pushed alive tag in the repository, if any. If none, returns
None.
"""
@abstractmethod
def lookup_repository(self, namespace_name, repo_name, kind_filter=None):
""" Looks up and returns a reference to the repository with the given namespace and name,
or None if none. """
@abstractmethod
def get_manifest_for_tag(self, tag, backfill_if_necessary=False, include_legacy_image=False):
""" Returns the manifest associated with the given tag. """
@abstractmethod
def lookup_manifest_by_digest(self, repository_ref, manifest_digest, allow_dead=False,
include_legacy_image=False, require_available=False):
""" Looks up the manifest with the given digest under the given repository and returns it
or None if none. If allow_dead is True, manifests pointed to by dead tags will also
be returned. If require_available is True, a temporary tag will be added onto the
returned manifest (before it is returned) to ensure it is available until another
tagging or manifest operation is taken.
"""
@abstractmethod
def create_manifest_and_retarget_tag(self, repository_ref, manifest_interface_instance, tag_name,
storage, raise_on_error=False):
""" Creates a manifest in a repository, adding all of the necessary data in the model.
The `manifest_interface_instance` parameter must be an instance of the manifest
interface as returned by the image/docker package.
Note that all blobs referenced by the manifest must exist under the repository or this
method will fail and return None.
Returns a reference to the (created manifest, tag) or (None, None) on error.
"""
@abstractmethod
def get_legacy_images(self, repository_ref):
"""
Returns an iterator of all the LegacyImage's defined in the matching repository.
"""
@abstractmethod
def get_legacy_image(self, repository_ref, docker_image_id, include_parents=False,
include_blob=False):
"""
Returns the matching LegacyImages under the matching repository, if any. If none,
returns None.
"""
@abstractmethod
def create_manifest_label(self, manifest, key, value, source_type_name, media_type_name=None):
""" Creates a label on the manifest with the given key and value.
Can raise InvalidLabelKeyException or InvalidMediaTypeException depending
on the validation errors.
"""
@abstractmethod
def batch_create_manifest_labels(self, manifest):
""" Returns a context manager for batch creation of labels on a manifest.
Can raise InvalidLabelKeyException or InvalidMediaTypeException depending
on the validation errors.
"""
@abstractmethod
def list_manifest_labels(self, manifest, key_prefix=None):
""" Returns all labels found on the manifest. If specified, the key_prefix will filter the
labels returned to those keys that start with the given prefix.
"""
@abstractmethod
def get_manifest_label(self, manifest, label_uuid):
""" Returns the label with the specified UUID on the manifest or None if none. """
@abstractmethod
def delete_manifest_label(self, manifest, label_uuid):
""" Delete the label with the specified UUID on the manifest. Returns the label deleted
or None if none.
"""
@abstractmethod
def lookup_cached_active_repository_tags(self, model_cache, repository_ref, start_pagination_id,
limit):
"""
Returns a page of active tags in a repository. Note that the tags returned by this method
are ShallowTag objects, which only contain the tag name. This method will automatically cache
the result and check the cache before making a call.
"""
@abstractmethod
def lookup_active_repository_tags(self, repository_ref, start_pagination_id, limit):
"""
Returns a page of active tags in a repository. Note that the tags returned by this method
are ShallowTag objects, which only contain the tag name.
"""
@abstractmethod
def list_all_active_repository_tags(self, repository_ref, include_legacy_images=False):
"""
Returns a list of all the active tags in the repository. Note that this is a *HEAVY*
operation on repositories with a lot of tags, and should only be used for testing or
where other more specific operations are not possible.
"""
@abstractmethod
def list_repository_tag_history(self, repository_ref, page=1, size=100, specific_tag_name=None,
active_tags_only=False, since_time_ms=None):
"""
Returns the history of all tags in the repository (unless filtered). This includes tags that
have been made in-active due to newer versions of those tags coming into service.
"""
@abstractmethod
def get_most_recent_tag_lifetime_start(self, repository_refs):
"""
Returns a map from repository ID to the last modified time ( seconds from epoch, UTC)
for each repository in the given repository reference list.
"""
@abstractmethod
def get_repo_tag(self, repository_ref, tag_name, include_legacy_image=False):
"""
Returns the latest, *active* tag found in the repository, with the matching name
or None if none.
"""
@abstractmethod
def has_expired_tag(self, repository_ref, tag_name):
"""
Returns true if and only if the repository contains a tag with the given name that is expired.
"""
@abstractmethod
def retarget_tag(self, repository_ref, tag_name, manifest_or_legacy_image,
storage, legacy_manifest_key, is_reversion=False):
"""
Creates, updates or moves a tag to a new entry in history, pointing to the manifest or
legacy image specified. If is_reversion is set to True, this operation is considered a
reversion over a previous tag move operation. Returns the updated Tag or None on error.
"""
@abstractmethod
def delete_tag(self, repository_ref, tag_name):
"""
Deletes the latest, *active* tag with the given name in the repository.
"""
@abstractmethod
def delete_tags_for_manifest(self, manifest):
"""
Deletes all tags pointing to the given manifest, making the manifest inaccessible for pulling.
Returns the tags deleted, if any. Returns None on error.
"""
@abstractmethod
def change_repository_tag_expiration(self, tag, expiration_date):
""" Sets the expiration date of the tag under the matching repository to that given. If the
expiration date is None, then the tag will not expire. Returns a tuple of the previous
expiration timestamp in seconds (if any), and whether the operation succeeded.
"""
@abstractmethod
def get_legacy_images_owned_by_tag(self, tag):
""" Returns all legacy images *solely owned and used* by the given tag. """
@abstractmethod
def get_security_status(self, manifest_or_legacy_image):
""" Returns the security status for the given manifest or legacy image or None if none. """
@abstractmethod
def reset_security_status(self, manifest_or_legacy_image):
""" Resets the security status for the given manifest or legacy image, ensuring that it will
get re-indexed.
"""
@abstractmethod
def backfill_manifest_for_tag(self, tag):
""" Backfills a manifest for the V1 tag specified.
If a manifest already exists for the tag, returns that manifest.
NOTE: This method will only be necessary until we've completed the backfill, at which point
it should be removed.
"""
@abstractmethod
def is_existing_disabled_namespace(self, namespace_name):
""" Returns whether the given namespace exists and is disabled. """
@abstractmethod
def is_namespace_enabled(self, namespace_name):
""" Returns whether the given namespace exists and is enabled. """
@abstractmethod
def get_manifest_local_blobs(self, manifest, include_placements=False):
""" Returns the set of local blobs for the given manifest or None if none. """
@abstractmethod
def list_manifest_layers(self, manifest, storage, include_placements=False):
""" Returns an *ordered list* of the layers found in the manifest, starting at the base
and working towards the leaf, including the associated Blob and its placements
(if specified). The layer information in `layer_info` will be of type
`image.docker.types.ManifestImageLayer`. Should not be called for a manifest list.
"""
@abstractmethod
def list_parsed_manifest_layers(self, repository_ref, parsed_manifest, storage,
include_placements=False):
""" Returns an *ordered list* of the layers found in the parsed manifest, starting at the base
and working towards the leaf, including the associated Blob and its placements
(if specified). The layer information in `layer_info` will be of type
`image.docker.types.ManifestImageLayer`. Should not be called for a manifest list.
"""
@abstractmethod
def lookup_derived_image(self, manifest, verb, storage, varying_metadata=None,
include_placements=False):
"""
Looks up the derived image for the given manifest, verb and optional varying metadata and
returns it or None if none.
"""
@abstractmethod
def lookup_or_create_derived_image(self, manifest, verb, storage_location, storage,
varying_metadata=None, include_placements=False):
"""
Looks up the derived image for the given maniest, verb and optional varying metadata
and returns it. If none exists, a new derived image is created.
"""
@abstractmethod
def get_derived_image_signature(self, derived_image, signer_name):
"""
Returns the signature associated with the derived image and a specific signer or None if none.
"""
@abstractmethod
def set_derived_image_signature(self, derived_image, signer_name, signature):
"""
Sets the calculated signature for the given derived image and signer to that specified.
"""
@abstractmethod
def delete_derived_image(self, derived_image):
"""
Deletes a derived image and all of its storage.
"""
@abstractmethod
def set_derived_image_size(self, derived_image, compressed_size):
"""
Sets the compressed size on the given derived image.
"""
@abstractmethod
def get_torrent_info(self, blob):
"""
Returns the torrent information associated with the given blob or None if none.
"""
@abstractmethod
def set_torrent_info(self, blob, piece_length, pieces):
"""
Sets the torrent infomation associated with the given blob to that specified.
"""
@abstractmethod
def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False):
"""
Returns the blob in the repository with the given digest, if any or None if none. Note that
there may be multiple records in the same repository for the same blob digest, so the return
value of this function may change.
"""
@abstractmethod
def create_blob_upload(self, repository_ref, upload_id, location_name, storage_metadata):
""" Creates a new blob upload and returns a reference. If the blob upload could not be
created, returns None. """
@abstractmethod
def lookup_blob_upload(self, repository_ref, blob_upload_id):
""" Looks up the blob upload with the given ID under the specified repository and returns it
or None if none.
"""
@abstractmethod
def update_blob_upload(self, blob_upload, uncompressed_byte_count, piece_hashes, piece_sha_state,
storage_metadata, byte_count, chunk_count, sha_state):
""" Updates the fields of the blob upload to match those given. Returns the updated blob upload
or None if the record does not exists.
"""
@abstractmethod
def delete_blob_upload(self, blob_upload):
""" Deletes a blob upload record. """
@abstractmethod
def commit_blob_upload(self, blob_upload, blob_digest_str, blob_expiration_seconds):
""" Commits the blob upload into a blob and sets an expiration before that blob will be GCed.
"""
@abstractmethod
def mount_blob_into_repository(self, blob, target_repository_ref, expiration_sec):
"""
Mounts the blob from another repository into the specified target repository, and adds an
expiration before that blob is automatically GCed. This function is useful during push
operations if an existing blob from another repository is being pushed. Returns False if
the mounting fails. Note that this function does *not* check security for mounting the blob
and the caller is responsible for doing this check (an example can be found in
endpoints/v2/blob.py).
"""
@abstractmethod
def set_tags_expiration_for_manifest(self, manifest, expiration_sec):
"""
Sets the expiration on all tags that point to the given manifest to that specified.
"""
@abstractmethod
def get_schema1_parsed_manifest(self, manifest, namespace_name, repo_name, tag_name, storage):
""" Returns the schema 1 version of this manifest, or None if none. """
@abstractmethod
def create_manifest_with_temp_tag(self, repository_ref, manifest_interface_instance,
expiration_sec, storage):
""" Creates a manifest under the repository and sets a temporary tag to point to it.
Returns the manifest object created or None on error.
"""
@abstractmethod
def get_cached_namespace_region_blacklist(self, model_cache, namespace_name):
""" Returns a cached set of ISO country codes blacklisted for pulls for the namespace
or None if the list could not be loaded.
"""
@abstractmethod
def convert_manifest(self, manifest, namespace_name, repo_name, tag_name, allowed_mediatypes,
storage):
""" Attempts to convert the specified into a parsed manifest with a media type
in the allowed_mediatypes set. If not possible, or an error occurs, returns None.
"""
@abstractmethod
def yield_tags_for_vulnerability_notification(self, layer_id_pairs):
""" Yields tags that contain one (or more) of the given layer ID pairs, in repositories
which have been registered for vulnerability_found notifications. Returns an iterator
of LikelyVulnerableTag instances.
"""

View file

@ -0,0 +1,28 @@
import logging
from util.timedeltastring import convert_to_timedelta
logger = logging.getLogger(__name__)
def _expires_after(label_dict, manifest, model):
""" Sets the expiration of a manifest based on the quay.expires-in label. """
try:
timedelta = convert_to_timedelta(label_dict['value'])
except ValueError:
logger.exception('Could not convert %s to timedeltastring', label_dict['value'])
return
total_seconds = timedelta.total_seconds()
logger.debug('Labeling manifest %s with expiration of %s', manifest, total_seconds)
model.set_tags_expiration_for_manifest(manifest, total_seconds)
_LABEL_HANDLERS = {
'quay.expires-after': _expires_after,
}
def apply_label_to_manifest(label_dict, manifest, model):
""" Runs the handler defined, if any, for the given label. """
handler = _LABEL_HANDLERS.get(label_dict['key'])
if handler is not None:
handler(label_dict, manifest, model)

View file

@ -0,0 +1,220 @@
import logging
import json
import uuid
from collections import namedtuple
from flask import session
from data import model
from data.database import db_transaction, ImageStorage, ImageStoragePlacement
from data.registry_model import registry_model
from image.docker.schema2 import EMPTY_LAYER_BLOB_DIGEST
logger = logging.getLogger(__name__)
ManifestLayer = namedtuple('ManifestLayer', ['layer_id', 'v1_metadata_string', 'db_id'])
_BuilderState = namedtuple('_BuilderState', ['builder_id', 'images', 'tags', 'checksums',
'temp_storages'])
_SESSION_KEY = '__manifestbuilder'
def create_manifest_builder(repository_ref, storage, legacy_signing_key):
""" Creates a new manifest builder for populating manifests under the specified repository
and returns it. Returns None if the builder could not be constructed.
"""
builder_id = str(uuid.uuid4())
builder = _ManifestBuilder(repository_ref, _BuilderState(builder_id, {}, {}, {}, []), storage,
legacy_signing_key)
builder._save_to_session()
return builder
def lookup_manifest_builder(repository_ref, builder_id, storage, legacy_signing_key):
""" Looks up the manifest builder with the given ID under the specified repository and returns
it or None if none.
"""
builder_state_tuple = session.get(_SESSION_KEY)
if builder_state_tuple is None:
return None
builder_state = _BuilderState(*builder_state_tuple)
if builder_state.builder_id != builder_id:
return None
return _ManifestBuilder(repository_ref, builder_state, storage, legacy_signing_key)
class _ManifestBuilder(object):
""" Helper class which provides an interface for bookkeeping the layers and configuration of
manifests being constructed.
"""
def __init__(self, repository_ref, builder_state, storage, legacy_signing_key):
self._repository_ref = repository_ref
self._builder_state = builder_state
self._storage = storage
self._legacy_signing_key = legacy_signing_key
@property
def builder_id(self):
""" Returns the unique ID for this builder. """
return self._builder_state.builder_id
@property
def committed_tags(self):
""" Returns the tags committed by this builder, if any. """
return [registry_model.get_repo_tag(self._repository_ref, tag_name, include_legacy_image=True)
for tag_name in self._builder_state.tags.keys()]
def start_layer(self, layer_id, v1_metadata_string, location_name, calling_user,
temp_tag_expiration):
""" Starts a new layer with the given ID to be placed into a manifest. Returns the layer
started or None if an error occurred.
"""
# Ensure the repository still exists.
repository = model.repository.lookup_repository(self._repository_ref._db_id)
if repository is None:
return None
namespace_name = repository.namespace_user.username
repo_name = repository.name
try:
v1_metadata = json.loads(v1_metadata_string)
except ValueError:
logger.exception('Exception when trying to parse V1 metadata JSON for layer %s', layer_id)
return None
except TypeError:
logger.exception('Exception when trying to parse V1 metadata JSON for layer %s', layer_id)
return None
# Sanity check that the ID matches the v1 metadata.
if layer_id != v1_metadata['id']:
return None
# Ensure the parent already exists in the repository.
parent_id = v1_metadata.get('parent', None)
parent_image = None
if parent_id is not None:
parent_image = model.image.get_repo_image(namespace_name, repo_name, parent_id)
if parent_image is None:
return None
# Check to see if this layer already exists in the repository. If so, we can skip the creation.
existing_image = registry_model.get_legacy_image(self._repository_ref, layer_id)
if existing_image is not None:
self._builder_state.images[layer_id] = existing_image.id
self._save_to_session()
return ManifestLayer(layer_id, v1_metadata_string, existing_image.id)
with db_transaction():
# Otherwise, create a new legacy image and point a temporary tag at it.
created = model.image.find_create_or_link_image(layer_id, repository, calling_user, {},
location_name)
model.tag.create_temporary_hidden_tag(repository, created, temp_tag_expiration)
# Save its V1 metadata.
command_list = v1_metadata.get('container_config', {}).get('Cmd', None)
command = json.dumps(command_list) if command_list else None
model.image.set_image_metadata(layer_id, namespace_name, repo_name,
v1_metadata.get('created'),
v1_metadata.get('comment'),
command, v1_metadata_string,
parent=parent_image)
# Save the changes to the builder.
self._builder_state.images[layer_id] = created.id
self._save_to_session()
return ManifestLayer(layer_id, v1_metadata_string, created.id)
def lookup_layer(self, layer_id):
""" Returns a layer with the given ID under this builder. If none exists, returns None. """
if layer_id not in self._builder_state.images:
return None
image = model.image.get_image_by_db_id(self._builder_state.images[layer_id])
if image is None:
return None
return ManifestLayer(layer_id, image.v1_json_metadata, image.id)
def assign_layer_blob(self, layer, blob, computed_checksums):
""" Assigns a blob to a layer. """
assert blob
assert not blob.uploading
repo_image = model.image.get_image_by_db_id(layer.db_id)
if repo_image is None:
return None
with db_transaction():
existing_storage = repo_image.storage
repo_image.storage = blob._db_id
repo_image.save()
if existing_storage.uploading:
self._builder_state.temp_storages.append(existing_storage.id)
self._builder_state.checksums[layer.layer_id] = computed_checksums
self._save_to_session()
return True
def validate_layer_checksum(self, layer, checksum):
""" Returns whether the checksum for a layer matches that specified.
"""
return checksum in self.get_layer_checksums(layer)
def get_layer_checksums(self, layer):
""" Returns the registered defined for the layer, if any. """
return self._builder_state.checksums.get(layer.layer_id) or []
def save_precomputed_checksum(self, layer, checksum):
""" Saves a precomputed checksum for a layer. """
checksums = self._builder_state.checksums.get(layer.layer_id) or []
checksums.append(checksum)
self._builder_state.checksums[layer.layer_id] = checksums
self._save_to_session()
def commit_tag_and_manifest(self, tag_name, layer):
""" Commits a new tag + manifest for that tag to the repository with the given name,
pointing to the given layer.
"""
legacy_image = registry_model.get_legacy_image(self._repository_ref, layer.layer_id)
if legacy_image is None:
return None
tag = registry_model.retarget_tag(self._repository_ref, tag_name, legacy_image, self._storage,
self._legacy_signing_key)
if tag is None:
return None
self._builder_state.tags[tag_name] = tag._db_id
self._save_to_session()
return tag
def done(self):
""" Marks the manifest builder as complete and disposes of any state. This call is optional
and it is expected manifest builders will eventually time out if unused for an
extended period of time.
"""
temp_storages = self._builder_state.temp_storages
for storage_id in temp_storages:
try:
storage = ImageStorage.get(id=storage_id)
if storage.uploading and storage.content_checksum != EMPTY_LAYER_BLOB_DIGEST:
# Delete all the placements pointing to the storage.
ImageStoragePlacement.delete().where(ImageStoragePlacement.storage == storage).execute()
# Delete the storage.
storage.delete_instance()
except ImageStorage.DoesNotExist:
pass
session.pop(_SESSION_KEY, None)
def _save_to_session(self):
session[_SESSION_KEY] = self._builder_state

View file

@ -0,0 +1,112 @@
import inspect
import logging
import hashlib
from data.database import DerivedStorageForImage, TagManifest, Manifest, Image
from data.registry_model.registry_oci_model import back_compat_oci_model, oci_model
from data.registry_model.registry_pre_oci_model import pre_oci_model
from data.registry_model.datatypes import LegacyImage, Manifest as ManifestDataType
logger = logging.getLogger(__name__)
class SplitModel(object):
def __init__(self, oci_model_proportion, oci_namespace_whitelist, v22_namespace_whitelist,
oci_only_mode):
self.v22_namespace_whitelist = set(v22_namespace_whitelist)
self.oci_namespace_whitelist = set(oci_namespace_whitelist)
self.oci_namespace_whitelist.update(v22_namespace_whitelist)
self.oci_model_proportion = oci_model_proportion
self.oci_only_mode = oci_only_mode
def supports_schema2(self, namespace_name):
""" Returns whether the implementation of the data interface supports schema 2 format
manifests. """
return namespace_name in self.v22_namespace_whitelist
def _namespace_from_kwargs(self, args_dict):
if 'namespace_name' in args_dict:
return args_dict['namespace_name']
if 'repository_ref' in args_dict:
return args_dict['repository_ref'].namespace_name
if 'tag' in args_dict:
return args_dict['tag'].repository.namespace_name
if 'manifest' in args_dict:
manifest = args_dict['manifest']
if manifest._is_tag_manifest:
return TagManifest.get(id=manifest._db_id).tag.repository.namespace_user.username
else:
return Manifest.get(id=manifest._db_id).repository.namespace_user.username
if 'manifest_or_legacy_image' in args_dict:
manifest_or_legacy_image = args_dict['manifest_or_legacy_image']
if isinstance(manifest_or_legacy_image, LegacyImage):
return Image.get(id=manifest_or_legacy_image._db_id).repository.namespace_user.username
else:
manifest = manifest_or_legacy_image
if manifest._is_tag_manifest:
return TagManifest.get(id=manifest._db_id).tag.repository.namespace_user.username
else:
return Manifest.get(id=manifest._db_id).repository.namespace_user.username
if 'derived_image' in args_dict:
return (DerivedStorageForImage
.get(id=args_dict['derived_image']._db_id)
.source_image
.repository
.namespace_user
.username)
if 'blob' in args_dict:
return '' # Blob functions are shared, so no need to do anything.
if 'blob_upload' in args_dict:
return '' # Blob functions are shared, so no need to do anything.
raise Exception('Unknown namespace for dict `%s`' % args_dict)
def __getattr__(self, attr):
def method(*args, **kwargs):
if self.oci_model_proportion >= 1.0:
if self.oci_only_mode:
logger.debug('Calling method `%s` under full OCI data model for all namespaces', attr)
return getattr(oci_model, attr)(*args, **kwargs)
else:
logger.debug('Calling method `%s` under compat OCI data model for all namespaces', attr)
return getattr(back_compat_oci_model, attr)(*args, **kwargs)
argnames = inspect.getargspec(getattr(back_compat_oci_model, attr))[0]
if not argnames and isinstance(args[0], ManifestDataType):
args_dict = dict(manifest=args[0])
else:
args_dict = {argnames[index + 1]: value for index, value in enumerate(args)}
if attr in ['yield_tags_for_vulnerability_notification', 'get_most_recent_tag_lifetime_start']:
use_oci = self.oci_model_proportion >= 1.0
namespace_name = '(implicit for ' + attr + ')'
else:
namespace_name = self._namespace_from_kwargs(args_dict)
use_oci = namespace_name in self.oci_namespace_whitelist
if not use_oci and self.oci_model_proportion:
# Hash the namespace name and see if it falls into the proportion bucket.
bucket = (int(hashlib.md5(namespace_name).hexdigest(), 16) % 100)
if bucket <= int(self.oci_model_proportion * 100):
logger.debug('Enabling OCI for namespace `%s` in proportional bucket',
namespace_name)
use_oci = True
if use_oci:
logger.debug('Calling method `%s` under OCI data model for namespace `%s`',
attr, namespace_name)
return getattr(back_compat_oci_model, attr)(*args, **kwargs)
else:
return getattr(pre_oci_model, attr)(*args, **kwargs)
return method

View file

@ -0,0 +1,668 @@
# pylint: disable=protected-access
import logging
from contextlib import contextmanager
from peewee import fn
from data import database
from data import model
from data.model import oci, DataModelException
from data.model.oci.retriever import RepositoryContentRetriever
from data.database import db_transaction, Image, IMAGE_NOT_SCANNED_ENGINE_VERSION
from data.registry_model.interface import RegistryDataInterface
from data.registry_model.datatypes import (Tag, Manifest, LegacyImage, Label, SecurityScanStatus,
Blob, ShallowTag, LikelyVulnerableTag)
from data.registry_model.shared import SharedModel
from data.registry_model.label_handlers import apply_label_to_manifest
from image.docker import ManifestException
from image.docker.schema1 import DOCKER_SCHEMA1_CONTENT_TYPES
from image.docker.schema2 import DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE
logger = logging.getLogger(__name__)
class OCIModel(SharedModel, RegistryDataInterface):
"""
OCIModel implements the data model for the registry API using a database schema
after it was changed to support the OCI specification.
"""
def __init__(self, oci_model_only=True):
self.oci_model_only = oci_model_only
def supports_schema2(self, namespace_name):
""" Returns whether the implementation of the data interface supports schema 2 format
manifests. """
return True
def get_tag_legacy_image_id(self, repository_ref, tag_name, storage):
""" Returns the legacy image ID for the tag with a legacy images in
the repository. Returns None if None.
"""
tag = self.get_repo_tag(repository_ref, tag_name, include_legacy_image=True)
if tag is None:
return None
if tag.legacy_image_if_present is not None:
return tag.legacy_image_if_present.docker_image_id
if tag.manifest.media_type == DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE:
# See if we can lookup a schema1 legacy image.
v1_compatible = self.get_schema1_parsed_manifest(tag.manifest, '', '', '', storage)
if v1_compatible is not None:
return v1_compatible.leaf_layer_v1_image_id
return None
def get_legacy_tags_map(self, repository_ref, storage):
""" Returns a map from tag name to its legacy image ID, for all tags with legacy images in
the repository. Note that this can be a *very* heavy operation.
"""
tags = oci.tag.list_alive_tags(repository_ref._db_id)
legacy_images_map = oci.tag.get_legacy_images_for_tags(tags)
tags_map = {}
for tag in tags:
legacy_image = legacy_images_map.get(tag.id)
if legacy_image is not None:
tags_map[tag.name] = legacy_image.docker_image_id
else:
manifest = Manifest.for_manifest(tag.manifest, None)
if legacy_image is None and manifest.media_type == DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE:
# See if we can lookup a schema1 legacy image.
v1_compatible = self.get_schema1_parsed_manifest(manifest, '', '', '', storage)
if v1_compatible is not None:
v1_id = v1_compatible.leaf_layer_v1_image_id
if v1_id is not None:
tags_map[tag.name] = v1_id
return tags_map
def _get_legacy_compatible_image_for_manifest(self, manifest, storage):
# Check for a legacy image directly on the manifest.
if manifest.media_type != DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE:
return oci.shared.get_legacy_image_for_manifest(manifest._db_id)
# Otherwise, lookup a legacy image associated with the v1-compatible manifest
# in the list.
try:
manifest_obj = database.Manifest.get(id=manifest._db_id)
except database.Manifest.DoesNotExist:
logger.exception('Could not find manifest for manifest `%s`', manifest._db_id)
return None
# See if we can lookup a schema1 legacy image.
v1_compatible = self.get_schema1_parsed_manifest(manifest, '', '', '', storage)
if v1_compatible is None:
return None
v1_id = v1_compatible.leaf_layer_v1_image_id
if v1_id is None:
return None
return model.image.get_image(manifest_obj.repository_id, v1_id)
def find_matching_tag(self, repository_ref, tag_names):
""" Finds an alive tag in the repository matching one of the given tag names and returns it
or None if none.
"""
found_tag = oci.tag.find_matching_tag(repository_ref._db_id, tag_names)
assert found_tag is None or not found_tag.hidden
return Tag.for_tag(found_tag)
def get_most_recent_tag(self, repository_ref):
""" Returns the most recently pushed alive tag in the repository, if any. If none, returns
None.
"""
found_tag = oci.tag.get_most_recent_tag(repository_ref._db_id)
assert found_tag is None or not found_tag.hidden
return Tag.for_tag(found_tag)
def get_manifest_for_tag(self, tag, backfill_if_necessary=False, include_legacy_image=False):
""" Returns the manifest associated with the given tag. """
legacy_image = None
if include_legacy_image:
legacy_image = oci.shared.get_legacy_image_for_manifest(tag._manifest)
return Manifest.for_manifest(tag._manifest, LegacyImage.for_image(legacy_image))
def lookup_manifest_by_digest(self, repository_ref, manifest_digest, allow_dead=False,
include_legacy_image=False, require_available=False):
""" Looks up the manifest with the given digest under the given repository and returns it
or None if none. """
manifest = oci.manifest.lookup_manifest(repository_ref._db_id, manifest_digest,
allow_dead=allow_dead,
require_available=require_available)
if manifest is None:
return None
legacy_image = None
if include_legacy_image:
try:
legacy_image_id = database.ManifestLegacyImage.get(manifest=manifest).image.docker_image_id
legacy_image = self.get_legacy_image(repository_ref, legacy_image_id, include_parents=True)
except database.ManifestLegacyImage.DoesNotExist:
pass
return Manifest.for_manifest(manifest, legacy_image)
def create_manifest_label(self, manifest, key, value, source_type_name, media_type_name=None):
""" Creates a label on the manifest with the given key and value. """
label_data = dict(key=key, value=value, source_type_name=source_type_name,
media_type_name=media_type_name)
# Create the label itself.
label = oci.label.create_manifest_label(manifest._db_id, key, value, source_type_name,
media_type_name,
adjust_old_model=not self.oci_model_only)
if label is None:
return None
# Apply any changes to the manifest that the label prescribes.
apply_label_to_manifest(label_data, manifest, self)
return Label.for_label(label)
@contextmanager
def batch_create_manifest_labels(self, manifest):
""" Returns a context manager for batch creation of labels on a manifest.
Can raise InvalidLabelKeyException or InvalidMediaTypeException depending
on the validation errors.
"""
labels_to_add = []
def add_label(key, value, source_type_name, media_type_name=None):
labels_to_add.append(dict(key=key, value=value, source_type_name=source_type_name,
media_type_name=media_type_name))
yield add_label
# TODO: make this truly batch once we've fully transitioned to V2_2 and no longer need
# the mapping tables.
for label_data in labels_to_add:
with db_transaction():
# Create the label itself.
oci.label.create_manifest_label(manifest._db_id, **label_data)
# Apply any changes to the manifest that the label prescribes.
apply_label_to_manifest(label_data, manifest, self)
def list_manifest_labels(self, manifest, key_prefix=None):
""" Returns all labels found on the manifest. If specified, the key_prefix will filter the
labels returned to those keys that start with the given prefix.
"""
labels = oci.label.list_manifest_labels(manifest._db_id, prefix_filter=key_prefix)
return [Label.for_label(l) for l in labels]
def get_manifest_label(self, manifest, label_uuid):
""" Returns the label with the specified UUID on the manifest or None if none. """
return Label.for_label(oci.label.get_manifest_label(label_uuid, manifest._db_id))
def delete_manifest_label(self, manifest, label_uuid):
""" Delete the label with the specified UUID on the manifest. Returns the label deleted
or None if none.
"""
return Label.for_label(oci.label.delete_manifest_label(label_uuid, manifest._db_id))
def lookup_active_repository_tags(self, repository_ref, start_pagination_id, limit):
"""
Returns a page of actvie tags in a repository. Note that the tags returned by this method
are ShallowTag objects, which only contain the tag name.
"""
tags = oci.tag.lookup_alive_tags_shallow(repository_ref._db_id, start_pagination_id, limit)
return [ShallowTag.for_tag(tag) for tag in tags]
def list_all_active_repository_tags(self, repository_ref, include_legacy_images=False):
"""
Returns a list of all the active tags in the repository. Note that this is a *HEAVY*
operation on repositories with a lot of tags, and should only be used for testing or
where other more specific operations are not possible.
"""
tags = list(oci.tag.list_alive_tags(repository_ref._db_id))
legacy_images_map = {}
if include_legacy_images:
legacy_images_map = oci.tag.get_legacy_images_for_tags(tags)
return [Tag.for_tag(tag, legacy_image=LegacyImage.for_image(legacy_images_map.get(tag.id)))
for tag in tags]
def list_repository_tag_history(self, repository_ref, page=1, size=100, specific_tag_name=None,
active_tags_only=False, since_time_ms=None):
"""
Returns the history of all tags in the repository (unless filtered). This includes tags that
have been made in-active due to newer versions of those tags coming into service.
"""
tags, has_more = oci.tag.list_repository_tag_history(repository_ref._db_id,
page, size,
specific_tag_name,
active_tags_only,
since_time_ms)
# TODO: do we need legacy images here?
legacy_images_map = oci.tag.get_legacy_images_for_tags(tags)
return [Tag.for_tag(tag, LegacyImage.for_image(legacy_images_map.get(tag.id))) for tag in tags], has_more
def has_expired_tag(self, repository_ref, tag_name):
"""
Returns true if and only if the repository contains a tag with the given name that is expired.
"""
return bool(oci.tag.get_expired_tag(repository_ref._db_id, tag_name))
def get_most_recent_tag_lifetime_start(self, repository_refs):
"""
Returns a map from repository ID to the last modified time (in s) for each repository in the
given repository reference list.
"""
if not repository_refs:
return {}
toSeconds = lambda ms: ms / 1000 if ms is not None else None
last_modified = oci.tag.get_most_recent_tag_lifetime_start([r.id for r in repository_refs])
return {repo_id: toSeconds(ms) for repo_id, ms in last_modified.items()}
def get_repo_tag(self, repository_ref, tag_name, include_legacy_image=False):
"""
Returns the latest, *active* tag found in the repository, with the matching name
or None if none.
"""
assert isinstance(tag_name, basestring)
tag = oci.tag.get_tag(repository_ref._db_id, tag_name)
if tag is None:
return None
legacy_image = None
if include_legacy_image:
legacy_images = oci.tag.get_legacy_images_for_tags([tag])
legacy_image = legacy_images.get(tag.id)
return Tag.for_tag(tag, legacy_image=LegacyImage.for_image(legacy_image))
def create_manifest_and_retarget_tag(self, repository_ref, manifest_interface_instance, tag_name,
storage, raise_on_error=False):
""" Creates a manifest in a repository, adding all of the necessary data in the model.
The `manifest_interface_instance` parameter must be an instance of the manifest
interface as returned by the image/docker package.
Note that all blobs referenced by the manifest must exist under the repository or this
method will fail and return None.
Returns a reference to the (created manifest, tag) or (None, None) on error, unless
raise_on_error is set to True, in which case a CreateManifestException may also be
raised.
"""
# Get or create the manifest itself.
created_manifest = oci.manifest.get_or_create_manifest(repository_ref._db_id,
manifest_interface_instance,
storage,
for_tagging=True,
raise_on_error=raise_on_error)
if created_manifest is None:
return (None, None)
# Re-target the tag to it.
tag = oci.tag.retarget_tag(tag_name, created_manifest.manifest,
adjust_old_model=not self.oci_model_only)
if tag is None:
return (None, None)
legacy_image = oci.shared.get_legacy_image_for_manifest(created_manifest.manifest)
li = LegacyImage.for_image(legacy_image)
wrapped_manifest = Manifest.for_manifest(created_manifest.manifest, li)
# Apply any labels that should modify the created tag.
if created_manifest.labels_to_apply:
for key, value in created_manifest.labels_to_apply.iteritems():
apply_label_to_manifest(dict(key=key, value=value), wrapped_manifest, self)
# Reload the tag in case any updates were applied.
tag = database.Tag.get(id=tag.id)
return (wrapped_manifest, Tag.for_tag(tag, li))
def retarget_tag(self, repository_ref, tag_name, manifest_or_legacy_image, storage,
legacy_manifest_key, is_reversion=False):
"""
Creates, updates or moves a tag to a new entry in history, pointing to the manifest or
legacy image specified. If is_reversion is set to True, this operation is considered a
reversion over a previous tag move operation. Returns the updated Tag or None on error.
"""
assert legacy_manifest_key is not None
manifest_id = manifest_or_legacy_image._db_id
if isinstance(manifest_or_legacy_image, LegacyImage):
# If a legacy image was required, build a new manifest for it and move the tag to that.
try:
image_row = database.Image.get(id=manifest_or_legacy_image._db_id)
except database.Image.DoesNotExist:
return None
manifest_instance = self._build_manifest_for_legacy_image(tag_name, image_row)
if manifest_instance is None:
return None
created = oci.manifest.get_or_create_manifest(repository_ref._db_id, manifest_instance,
storage)
if created is None:
return None
manifest_id = created.manifest.id
else:
# If the manifest is a schema 1 manifest and its tag name does not match that
# specified, then we need to create a new manifest, but with that tag name.
if manifest_or_legacy_image.media_type in DOCKER_SCHEMA1_CONTENT_TYPES:
try:
parsed = manifest_or_legacy_image.get_parsed_manifest()
except ManifestException:
logger.exception('Could not parse manifest `%s` in retarget_tag',
manifest_or_legacy_image._db_id)
return None
if parsed.tag != tag_name:
logger.debug('Rewriting manifest `%s` for tag named `%s`',
manifest_or_legacy_image._db_id, tag_name)
repository_id = repository_ref._db_id
updated = parsed.with_tag_name(tag_name, legacy_manifest_key)
assert updated.is_signed
created = oci.manifest.get_or_create_manifest(repository_id, updated, storage)
if created is None:
return None
manifest_id = created.manifest.id
tag = oci.tag.retarget_tag(tag_name, manifest_id, is_reversion=is_reversion)
legacy_image = LegacyImage.for_image(oci.shared.get_legacy_image_for_manifest(manifest_id))
return Tag.for_tag(tag, legacy_image)
def delete_tag(self, repository_ref, tag_name):
"""
Deletes the latest, *active* tag with the given name in the repository.
"""
deleted_tag = oci.tag.delete_tag(repository_ref._db_id, tag_name)
if deleted_tag is None:
# TODO: This is only needed because preoci raises an exception. Remove and fix
# expected status codes once PreOCIModel is gone.
msg = ('Invalid repository tag \'%s\' on repository' % tag_name)
raise DataModelException(msg)
return Tag.for_tag(deleted_tag)
def delete_tags_for_manifest(self, manifest):
"""
Deletes all tags pointing to the given manifest, making the manifest inaccessible for pulling.
Returns the tags deleted, if any. Returns None on error.
"""
deleted_tags = oci.tag.delete_tags_for_manifest(manifest._db_id)
return [Tag.for_tag(tag) for tag in deleted_tags]
def change_repository_tag_expiration(self, tag, expiration_date):
""" Sets the expiration date of the tag under the matching repository to that given. If the
expiration date is None, then the tag will not expire. Returns a tuple of the previous
expiration timestamp in seconds (if any), and whether the operation succeeded.
"""
return oci.tag.change_tag_expiration(tag._db_id, expiration_date)
def get_legacy_images_owned_by_tag(self, tag):
""" Returns all legacy images *solely owned and used* by the given tag. """
tag_obj = oci.tag.get_tag_by_id(tag._db_id)
if tag_obj is None:
return None
tags = oci.tag.list_alive_tags(tag_obj.repository_id)
legacy_images = oci.tag.get_legacy_images_for_tags(tags)
tag_legacy_image = legacy_images.get(tag._db_id)
if tag_legacy_image is None:
return None
assert isinstance(tag_legacy_image, Image)
# Collect the IDs of all images that the tag uses.
tag_image_ids = set()
tag_image_ids.add(tag_legacy_image.id)
tag_image_ids.update(tag_legacy_image.ancestor_id_list())
# Remove any images shared by other tags.
for current in tags:
if current == tag_obj:
continue
current_image = legacy_images.get(current.id)
if current_image is None:
continue
tag_image_ids.discard(current_image.id)
tag_image_ids = tag_image_ids.difference(current_image.ancestor_id_list())
if not tag_image_ids:
return []
if not tag_image_ids:
return []
# Load the images we need to return.
images = database.Image.select().where(database.Image.id << list(tag_image_ids))
all_image_ids = set()
for image in images:
all_image_ids.add(image.id)
all_image_ids.update(image.ancestor_id_list())
# Build a map of all the images and their parents.
images_map = {}
all_images = database.Image.select().where(database.Image.id << list(all_image_ids))
for image in all_images:
images_map[image.id] = image
return [LegacyImage.for_image(image, images_map=images_map) for image in images]
def get_security_status(self, manifest_or_legacy_image):
""" Returns the security status for the given manifest or legacy image or None if none. """
image = None
if isinstance(manifest_or_legacy_image, Manifest):
image = oci.shared.get_legacy_image_for_manifest(manifest_or_legacy_image._db_id)
if image is None:
return SecurityScanStatus.UNSUPPORTED
else:
try:
image = database.Image.get(id=manifest_or_legacy_image._db_id)
except database.Image.DoesNotExist:
return None
if image.security_indexed_engine is not None and image.security_indexed_engine >= 0:
return SecurityScanStatus.SCANNED if image.security_indexed else SecurityScanStatus.FAILED
return SecurityScanStatus.QUEUED
def reset_security_status(self, manifest_or_legacy_image):
""" Resets the security status for the given manifest or legacy image, ensuring that it will
get re-indexed.
"""
image = None
if isinstance(manifest_or_legacy_image, Manifest):
image = oci.shared.get_legacy_image_for_manifest(manifest_or_legacy_image._db_id)
if image is None:
return None
else:
try:
image = database.Image.get(id=manifest_or_legacy_image._db_id)
except database.Image.DoesNotExist:
return None
assert image
image.security_indexed = False
image.security_indexed_engine = IMAGE_NOT_SCANNED_ENGINE_VERSION
image.save()
def backfill_manifest_for_tag(self, tag):
""" Backfills a manifest for the V1 tag specified.
If a manifest already exists for the tag, returns that manifest.
NOTE: This method will only be necessary until we've completed the backfill, at which point
it should be removed.
"""
# Nothing to do for OCI tags.
manifest = tag.manifest
if manifest is None:
return None
legacy_image = oci.shared.get_legacy_image_for_manifest(manifest)
return Manifest.for_manifest(manifest, LegacyImage.for_image(legacy_image))
def list_manifest_layers(self, manifest, storage, include_placements=False):
try:
manifest_obj = database.Manifest.get(id=manifest._db_id)
except database.Manifest.DoesNotExist:
logger.exception('Could not find manifest for manifest `%s`', manifest._db_id)
return None
try:
parsed = manifest.get_parsed_manifest()
except ManifestException:
logger.exception('Could not parse and validate manifest `%s`', manifest._db_id)
return None
return self._list_manifest_layers(manifest_obj.repository_id, parsed, storage,
include_placements, by_manifest=True)
def lookup_derived_image(self, manifest, verb, storage, varying_metadata=None,
include_placements=False):
"""
Looks up the derived image for the given manifest, verb and optional varying metadata and
returns it or None if none.
"""
legacy_image = self._get_legacy_compatible_image_for_manifest(manifest, storage)
if legacy_image is None:
return None
derived = model.image.find_derived_storage_for_image(legacy_image, verb, varying_metadata)
return self._build_derived(derived, verb, varying_metadata, include_placements)
def lookup_or_create_derived_image(self, manifest, verb, storage_location, storage,
varying_metadata=None,
include_placements=False):
"""
Looks up the derived image for the given maniest, verb and optional varying metadata
and returns it. If none exists, a new derived image is created.
"""
legacy_image = self._get_legacy_compatible_image_for_manifest(manifest, storage)
if legacy_image is None:
return None
derived = model.image.find_or_create_derived_storage(legacy_image, verb, storage_location,
varying_metadata)
return self._build_derived(derived, verb, varying_metadata, include_placements)
def set_tags_expiration_for_manifest(self, manifest, expiration_sec):
"""
Sets the expiration on all tags that point to the given manifest to that specified.
"""
oci.tag.set_tag_expiration_sec_for_manifest(manifest._db_id, expiration_sec)
def get_schema1_parsed_manifest(self, manifest, namespace_name, repo_name, tag_name, storage):
""" Returns the schema 1 manifest for this manifest, or None if none. """
try:
parsed = manifest.get_parsed_manifest()
except ManifestException:
return None
try:
manifest_row = database.Manifest.get(id=manifest._db_id)
except database.Manifest.DoesNotExist:
return None
retriever = RepositoryContentRetriever(manifest_row.repository_id, storage)
return parsed.get_schema1_manifest(namespace_name, repo_name, tag_name, retriever)
def convert_manifest(self, manifest, namespace_name, repo_name, tag_name, allowed_mediatypes,
storage):
try:
parsed = manifest.get_parsed_manifest()
except ManifestException:
return None
try:
manifest_row = database.Manifest.get(id=manifest._db_id)
except database.Manifest.DoesNotExist:
return None
retriever = RepositoryContentRetriever(manifest_row.repository_id, storage)
return parsed.convert_manifest(allowed_mediatypes, namespace_name, repo_name, tag_name,
retriever)
def create_manifest_with_temp_tag(self, repository_ref, manifest_interface_instance,
expiration_sec, storage):
""" Creates a manifest under the repository and sets a temporary tag to point to it.
Returns the manifest object created or None on error.
"""
# Get or create the manifest itself. get_or_create_manifest will take care of the
# temporary tag work.
created_manifest = oci.manifest.get_or_create_manifest(repository_ref._db_id,
manifest_interface_instance,
storage,
temp_tag_expiration_sec=expiration_sec)
if created_manifest is None:
return None
legacy_image = oci.shared.get_legacy_image_for_manifest(created_manifest.manifest)
li = LegacyImage.for_image(legacy_image)
return Manifest.for_manifest(created_manifest.manifest, li)
def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False):
"""
Returns the blob in the repository with the given digest, if any or None if none. Note that
there may be multiple records in the same repository for the same blob digest, so the return
value of this function may change.
"""
image_storage = self._get_shared_storage(blob_digest)
if image_storage is None:
image_storage = oci.blob.get_repository_blob_by_digest(repository_ref._db_id, blob_digest)
if image_storage is None:
return None
assert image_storage.cas_path is not None
placements = None
if include_placements:
placements = list(model.storage.get_storage_locations(image_storage.uuid))
return Blob.for_image_storage(image_storage,
storage_path=model.storage.get_layer_path(image_storage),
placements=placements)
def list_parsed_manifest_layers(self, repository_ref, parsed_manifest, storage,
include_placements=False):
""" Returns an *ordered list* of the layers found in the parsed manifest, starting at the base
and working towards the leaf, including the associated Blob and its placements
(if specified).
"""
return self._list_manifest_layers(repository_ref._db_id, parsed_manifest, storage,
include_placements=include_placements,
by_manifest=True)
def get_manifest_local_blobs(self, manifest, include_placements=False):
""" Returns the set of local blobs for the given manifest or None if none. """
try:
manifest_row = database.Manifest.get(id=manifest._db_id)
except database.Manifest.DoesNotExist:
return None
return self._get_manifest_local_blobs(manifest, manifest_row.repository_id, include_placements,
by_manifest=True)
def yield_tags_for_vulnerability_notification(self, layer_id_pairs):
""" Yields tags that contain one (or more) of the given layer ID pairs, in repositories
which have been registered for vulnerability_found notifications. Returns an iterator
of LikelyVulnerableTag instances.
"""
for docker_image_id, storage_uuid in layer_id_pairs:
tags = oci.tag.lookup_notifiable_tags_for_legacy_image(docker_image_id, storage_uuid,
'vulnerability_found')
for tag in tags:
yield LikelyVulnerableTag.for_tag(tag, tag.repository, docker_image_id, storage_uuid)
oci_model = OCIModel()
back_compat_oci_model = OCIModel(oci_model_only=False)

View file

@ -0,0 +1,694 @@
# pylint: disable=protected-access
import logging
from contextlib import contextmanager
from peewee import IntegrityError, fn
from data import database
from data import model
from data.database import db_transaction, IMAGE_NOT_SCANNED_ENGINE_VERSION
from data.registry_model.interface import RegistryDataInterface
from data.registry_model.datatypes import (Tag, Manifest, LegacyImage, Label, SecurityScanStatus,
Blob, RepositoryReference, ShallowTag,
LikelyVulnerableTag)
from data.registry_model.shared import SharedModel
from data.registry_model.label_handlers import apply_label_to_manifest
from image.docker.schema1 import ManifestException, DockerSchema1Manifest
from util.validation import is_json
logger = logging.getLogger(__name__)
class PreOCIModel(SharedModel, RegistryDataInterface):
"""
PreOCIModel implements the data model for the registry API using a database schema
before it was changed to support the OCI specification.
"""
def supports_schema2(self, namespace_name):
""" Returns whether the implementation of the data interface supports schema 2 format
manifests. """
return False
def get_tag_legacy_image_id(self, repository_ref, tag_name, storage):
""" Returns the legacy image ID for the tag with a legacy images in
the repository. Returns None if None.
"""
tag = self.get_repo_tag(repository_ref, tag_name, include_legacy_image=True)
if tag is None:
return None
return tag.legacy_image.docker_image_id
def get_legacy_tags_map(self, repository_ref, storage):
""" Returns a map from tag name to its legacy image, for all tags with legacy images in
the repository.
"""
tags = self.list_all_active_repository_tags(repository_ref, include_legacy_images=True)
return {tag.name: tag.legacy_image.docker_image_id for tag in tags}
def find_matching_tag(self, repository_ref, tag_names):
""" Finds an alive tag in the repository matching one of the given tag names and returns it
or None if none.
"""
found_tag = model.tag.find_matching_tag(repository_ref._db_id, tag_names)
assert found_tag is None or not found_tag.hidden
return Tag.for_repository_tag(found_tag)
def get_most_recent_tag(self, repository_ref):
""" Returns the most recently pushed alive tag in the repository, if any. If none, returns
None.
"""
found_tag = model.tag.get_most_recent_tag(repository_ref._db_id)
assert found_tag is None or not found_tag.hidden
return Tag.for_repository_tag(found_tag)
def get_manifest_for_tag(self, tag, backfill_if_necessary=False, include_legacy_image=False):
""" Returns the manifest associated with the given tag. """
try:
tag_manifest = database.TagManifest.get(tag_id=tag._db_id)
except database.TagManifest.DoesNotExist:
if backfill_if_necessary:
return self.backfill_manifest_for_tag(tag)
return None
return Manifest.for_tag_manifest(tag_manifest)
def lookup_manifest_by_digest(self, repository_ref, manifest_digest, allow_dead=False,
include_legacy_image=False, require_available=False):
""" Looks up the manifest with the given digest under the given repository and returns it
or None if none. """
repo = model.repository.lookup_repository(repository_ref._db_id)
if repo is None:
return None
try:
tag_manifest = model.tag.load_manifest_by_digest(repo.namespace_user.username,
repo.name,
manifest_digest,
allow_dead=allow_dead)
except model.tag.InvalidManifestException:
return None
legacy_image = None
if include_legacy_image:
legacy_image = self.get_legacy_image(repository_ref, tag_manifest.tag.image.docker_image_id,
include_parents=True)
return Manifest.for_tag_manifest(tag_manifest, legacy_image)
def create_manifest_and_retarget_tag(self, repository_ref, manifest_interface_instance, tag_name,
storage, raise_on_error=False):
""" Creates a manifest in a repository, adding all of the necessary data in the model.
The `manifest_interface_instance` parameter must be an instance of the manifest
interface as returned by the image/docker package.
Note that all blobs referenced by the manifest must exist under the repository or this
method will fail and return None.
Returns a reference to the (created manifest, tag) or (None, None) on error.
"""
# NOTE: Only Schema1 is supported by the pre_oci_model.
assert isinstance(manifest_interface_instance, DockerSchema1Manifest)
if not manifest_interface_instance.layers:
return None, None
# Ensure all the blobs in the manifest exist.
digests = manifest_interface_instance.checksums
query = self._lookup_repo_storages_by_content_checksum(repository_ref._db_id, digests)
blob_map = {s.content_checksum: s for s in query}
for layer in manifest_interface_instance.layers:
digest_str = str(layer.digest)
if digest_str not in blob_map:
return None, None
# Lookup all the images and their parent images (if any) inside the manifest.
# This will let us know which v1 images we need to synthesize and which ones are invalid.
docker_image_ids = list(manifest_interface_instance.legacy_image_ids)
images_query = model.image.lookup_repository_images(repository_ref._db_id, docker_image_ids)
image_storage_map = {i.docker_image_id: i.storage for i in images_query}
# Rewrite any v1 image IDs that do not match the checksum in the database.
try:
rewritten_images = manifest_interface_instance.rewrite_invalid_image_ids(image_storage_map)
rewritten_images = list(rewritten_images)
parent_image_map = {}
for rewritten_image in rewritten_images:
if not rewritten_image.image_id in image_storage_map:
parent_image = None
if rewritten_image.parent_image_id:
parent_image = parent_image_map.get(rewritten_image.parent_image_id)
if parent_image is None:
parent_image = model.image.get_image(repository_ref._db_id,
rewritten_image.parent_image_id)
if parent_image is None:
return None, None
synthesized = model.image.synthesize_v1_image(
repository_ref._db_id,
blob_map[rewritten_image.content_checksum].id,
blob_map[rewritten_image.content_checksum].image_size,
rewritten_image.image_id,
rewritten_image.created,
rewritten_image.comment,
rewritten_image.command,
rewritten_image.compat_json,
parent_image,
)
parent_image_map[rewritten_image.image_id] = synthesized
except ManifestException:
logger.exception("exception when rewriting v1 metadata")
return None, None
# Store the manifest pointing to the tag.
leaf_layer_id = rewritten_images[-1].image_id
tag_manifest, newly_created = model.tag.store_tag_manifest_for_repo(repository_ref._db_id,
tag_name,
manifest_interface_instance,
leaf_layer_id,
blob_map)
manifest = Manifest.for_tag_manifest(tag_manifest)
# Save the labels on the manifest.
repo_tag = tag_manifest.tag
if newly_created:
has_labels = False
with self.batch_create_manifest_labels(manifest) as add_label:
if add_label is None:
return None, None
for key, value in manifest_interface_instance.layers[-1].v1_metadata.labels.iteritems():
media_type = 'application/json' if is_json(value) else 'text/plain'
add_label(key, value, 'manifest', media_type)
has_labels = True
# Reload the tag in case any updates were applied.
if has_labels:
repo_tag = database.RepositoryTag.get(id=repo_tag.id)
return manifest, Tag.for_repository_tag(repo_tag)
def create_manifest_label(self, manifest, key, value, source_type_name, media_type_name=None):
""" Creates a label on the manifest with the given key and value. """
try:
tag_manifest = database.TagManifest.get(id=manifest._db_id)
except database.TagManifest.DoesNotExist:
return None
label_data = dict(key=key, value=value, source_type_name=source_type_name,
media_type_name=media_type_name)
with db_transaction():
# Create the label itself.
label = model.label.create_manifest_label(tag_manifest, key, value, source_type_name,
media_type_name)
# Apply any changes to the manifest that the label prescribes.
apply_label_to_manifest(label_data, manifest, self)
return Label.for_label(label)
@contextmanager
def batch_create_manifest_labels(self, manifest):
""" Returns a context manager for batch creation of labels on a manifest.
Can raise InvalidLabelKeyException or InvalidMediaTypeException depending
on the validation errors.
"""
try:
tag_manifest = database.TagManifest.get(id=manifest._db_id)
except database.TagManifest.DoesNotExist:
yield None
return
labels_to_add = []
def add_label(key, value, source_type_name, media_type_name=None):
labels_to_add.append(dict(key=key, value=value, source_type_name=source_type_name,
media_type_name=media_type_name))
yield add_label
# TODO: make this truly batch once we've fully transitioned to V2_2 and no longer need
# the mapping tables.
for label in labels_to_add:
with db_transaction():
# Create the label itself.
model.label.create_manifest_label(tag_manifest, **label)
# Apply any changes to the manifest that the label prescribes.
apply_label_to_manifest(label, manifest, self)
def list_manifest_labels(self, manifest, key_prefix=None):
""" Returns all labels found on the manifest. If specified, the key_prefix will filter the
labels returned to those keys that start with the given prefix.
"""
labels = model.label.list_manifest_labels(manifest._db_id, prefix_filter=key_prefix)
return [Label.for_label(l) for l in labels]
def get_manifest_label(self, manifest, label_uuid):
""" Returns the label with the specified UUID on the manifest or None if none. """
return Label.for_label(model.label.get_manifest_label(label_uuid, manifest._db_id))
def delete_manifest_label(self, manifest, label_uuid):
""" Delete the label with the specified UUID on the manifest. Returns the label deleted
or None if none.
"""
return Label.for_label(model.label.delete_manifest_label(label_uuid, manifest._db_id))
def lookup_active_repository_tags(self, repository_ref, start_pagination_id, limit):
"""
Returns a page of actvie tags in a repository. Note that the tags returned by this method
are ShallowTag objects, which only contain the tag name.
"""
tags = model.tag.list_active_repo_tags(repository_ref._db_id, include_images=False,
start_id=start_pagination_id, limit=limit)
return [ShallowTag.for_repository_tag(tag) for tag in tags]
def list_all_active_repository_tags(self, repository_ref, include_legacy_images=False):
"""
Returns a list of all the active tags in the repository. Note that this is a *HEAVY*
operation on repositories with a lot of tags, and should only be used for testing or
where other more specific operations are not possible.
"""
if not include_legacy_images:
tags = model.tag.list_active_repo_tags(repository_ref._db_id, include_images=False)
return [Tag.for_repository_tag(tag) for tag in tags]
tags = model.tag.list_active_repo_tags(repository_ref._db_id)
return [Tag.for_repository_tag(tag,
legacy_image=LegacyImage.for_image(tag.image),
manifest_digest=(tag.tagmanifest.digest
if hasattr(tag, 'tagmanifest')
else None))
for tag in tags]
def list_repository_tag_history(self, repository_ref, page=1, size=100, specific_tag_name=None,
active_tags_only=False, since_time_ms=None):
"""
Returns the history of all tags in the repository (unless filtered). This includes tags that
have been made in-active due to newer versions of those tags coming into service.
"""
# Only available on OCI model
if since_time_ms is not None:
raise NotImplementedError
tags, manifest_map, has_more = model.tag.list_repository_tag_history(repository_ref._db_id,
page, size,
specific_tag_name,
active_tags_only)
return [Tag.for_repository_tag(tag, manifest_map.get(tag.id),
legacy_image=LegacyImage.for_image(tag.image))
for tag in tags], has_more
def has_expired_tag(self, repository_ref, tag_name):
"""
Returns true if and only if the repository contains a tag with the given name that is expired.
"""
try:
model.tag.get_expired_tag_in_repo(repository_ref._db_id, tag_name)
return True
except database.RepositoryTag.DoesNotExist:
return False
def get_most_recent_tag_lifetime_start(self, repository_refs):
"""
Returns a map from repository ID to the last modified time (in s) for each repository in the
given repository reference list.
"""
if not repository_refs:
return {}
tuples = (database.RepositoryTag.select(database.RepositoryTag.repository,
fn.Max(database.RepositoryTag.lifetime_start_ts))
.where(database.RepositoryTag.repository << [r.id for r in repository_refs])
.group_by(database.RepositoryTag.repository)
.tuples())
return {repo_id: seconds for repo_id, seconds in tuples}
def get_repo_tag(self, repository_ref, tag_name, include_legacy_image=False):
"""
Returns the latest, *active* tag found in the repository, with the matching name
or None if none.
"""
assert isinstance(tag_name, basestring)
tag = model.tag.get_active_tag_for_repo(repository_ref._db_id, tag_name)
if tag is None:
return None
legacy_image = LegacyImage.for_image(tag.image) if include_legacy_image else None
tag_manifest = model.tag.get_tag_manifest(tag)
manifest_digest = tag_manifest.digest if tag_manifest else None
return Tag.for_repository_tag(tag, legacy_image=legacy_image, manifest_digest=manifest_digest)
def retarget_tag(self, repository_ref, tag_name, manifest_or_legacy_image, storage,
legacy_manifest_key, is_reversion=False):
"""
Creates, updates or moves a tag to a new entry in history, pointing to the manifest or
legacy image specified. If is_reversion is set to True, this operation is considered a
reversion over a previous tag move operation. Returns the updated Tag or None on error.
"""
# TODO: unify this.
assert legacy_manifest_key is not None
if not is_reversion:
if isinstance(manifest_or_legacy_image, Manifest):
raise NotImplementedError('Not yet implemented')
else:
model.tag.create_or_update_tag_for_repo(repository_ref._db_id, tag_name,
manifest_or_legacy_image.docker_image_id)
else:
if isinstance(manifest_or_legacy_image, Manifest):
model.tag.restore_tag_to_manifest(repository_ref._db_id, tag_name,
manifest_or_legacy_image.digest)
else:
model.tag.restore_tag_to_image(repository_ref._db_id, tag_name,
manifest_or_legacy_image.docker_image_id)
# Generate a manifest for the tag, if necessary.
tag = self.get_repo_tag(repository_ref, tag_name, include_legacy_image=True)
if tag is None:
return None
self.backfill_manifest_for_tag(tag)
return tag
def delete_tag(self, repository_ref, tag_name):
"""
Deletes the latest, *active* tag with the given name in the repository.
"""
repo = model.repository.lookup_repository(repository_ref._db_id)
if repo is None:
return None
deleted_tag = model.tag.delete_tag(repo.namespace_user.username, repo.name, tag_name)
return Tag.for_repository_tag(deleted_tag)
def delete_tags_for_manifest(self, manifest):
"""
Deletes all tags pointing to the given manifest, making the manifest inaccessible for pulling.
Returns the tags deleted, if any. Returns None on error.
"""
try:
tagmanifest = database.TagManifest.get(id=manifest._db_id)
except database.TagManifest.DoesNotExist:
return None
namespace_name = tagmanifest.tag.repository.namespace_user.username
repo_name = tagmanifest.tag.repository.name
tags = model.tag.delete_manifest_by_digest(namespace_name, repo_name, manifest.digest)
return [Tag.for_repository_tag(tag) for tag in tags]
def change_repository_tag_expiration(self, tag, expiration_date):
""" Sets the expiration date of the tag under the matching repository to that given. If the
expiration date is None, then the tag will not expire. Returns a tuple of the previous
expiration timestamp in seconds (if any), and whether the operation succeeded.
"""
try:
tag_obj = database.RepositoryTag.get(id=tag._db_id)
except database.RepositoryTag.DoesNotExist:
return (None, False)
return model.tag.change_tag_expiration(tag_obj, expiration_date)
def get_legacy_images_owned_by_tag(self, tag):
""" Returns all legacy images *solely owned and used* by the given tag. """
try:
tag_obj = database.RepositoryTag.get(id=tag._db_id)
except database.RepositoryTag.DoesNotExist:
return None
# Collect the IDs of all images that the tag uses.
tag_image_ids = set()
tag_image_ids.add(tag_obj.image.id)
tag_image_ids.update(tag_obj.image.ancestor_id_list())
# Remove any images shared by other tags.
for current_tag in model.tag.list_active_repo_tags(tag_obj.repository_id):
if current_tag == tag_obj:
continue
tag_image_ids.discard(current_tag.image.id)
tag_image_ids = tag_image_ids.difference(current_tag.image.ancestor_id_list())
if not tag_image_ids:
return []
if not tag_image_ids:
return []
# Load the images we need to return.
images = database.Image.select().where(database.Image.id << list(tag_image_ids))
all_image_ids = set()
for image in images:
all_image_ids.add(image.id)
all_image_ids.update(image.ancestor_id_list())
# Build a map of all the images and their parents.
images_map = {}
all_images = database.Image.select().where(database.Image.id << list(all_image_ids))
for image in all_images:
images_map[image.id] = image
return [LegacyImage.for_image(image, images_map=images_map) for image in images]
def get_security_status(self, manifest_or_legacy_image):
""" Returns the security status for the given manifest or legacy image or None if none. """
image = None
if isinstance(manifest_or_legacy_image, Manifest):
try:
tag_manifest = database.TagManifest.get(id=manifest_or_legacy_image._db_id)
image = tag_manifest.tag.image
except database.TagManifest.DoesNotExist:
return None
else:
try:
image = database.Image.get(id=manifest_or_legacy_image._db_id)
except database.Image.DoesNotExist:
return None
if image.security_indexed_engine is not None and image.security_indexed_engine >= 0:
return SecurityScanStatus.SCANNED if image.security_indexed else SecurityScanStatus.FAILED
return SecurityScanStatus.QUEUED
def reset_security_status(self, manifest_or_legacy_image):
""" Resets the security status for the given manifest or legacy image, ensuring that it will
get re-indexed.
"""
image = None
if isinstance(manifest_or_legacy_image, Manifest):
try:
tag_manifest = database.TagManifest.get(id=manifest_or_legacy_image._db_id)
image = tag_manifest.tag.image
except database.TagManifest.DoesNotExist:
return None
else:
try:
image = database.Image.get(id=manifest_or_legacy_image._db_id)
except database.Image.DoesNotExist:
return None
assert image
image.security_indexed = False
image.security_indexed_engine = IMAGE_NOT_SCANNED_ENGINE_VERSION
image.save()
def backfill_manifest_for_tag(self, tag):
""" Backfills a manifest for the V1 tag specified.
If a manifest already exists for the tag, returns that manifest.
NOTE: This method will only be necessary until we've completed the backfill, at which point
it should be removed.
"""
# Ensure that there isn't already a manifest for the tag.
tag_manifest = model.tag.get_tag_manifest(tag._db_id)
if tag_manifest is not None:
return Manifest.for_tag_manifest(tag_manifest)
# Create the manifest.
try:
tag_obj = database.RepositoryTag.get(id=tag._db_id)
except database.RepositoryTag.DoesNotExist:
return None
assert not tag_obj.hidden
repo = tag_obj.repository
# Write the manifest to the DB.
manifest = self._build_manifest_for_legacy_image(tag_obj.name, tag_obj.image)
if manifest is None:
return None
blob_query = self._lookup_repo_storages_by_content_checksum(repo, manifest.checksums)
storage_map = {blob.content_checksum: blob.id for blob in blob_query}
try:
tag_manifest = model.tag.associate_generated_tag_manifest_with_tag(tag_obj, manifest,
storage_map)
assert tag_manifest
except IntegrityError:
tag_manifest = model.tag.get_tag_manifest(tag_obj)
return Manifest.for_tag_manifest(tag_manifest)
def list_manifest_layers(self, manifest, storage, include_placements=False):
try:
tag_manifest = database.TagManifest.get(id=manifest._db_id)
except database.TagManifest.DoesNotExist:
logger.exception('Could not find tag manifest for manifest `%s`', manifest._db_id)
return None
try:
parsed = manifest.get_parsed_manifest()
except ManifestException:
logger.exception('Could not parse and validate manifest `%s`', manifest._db_id)
return None
repo_ref = RepositoryReference.for_id(tag_manifest.tag.repository_id)
return self.list_parsed_manifest_layers(repo_ref, parsed, storage, include_placements)
def lookup_derived_image(self, manifest, verb, storage, varying_metadata=None,
include_placements=False):
"""
Looks up the derived image for the given manifest, verb and optional varying metadata and
returns it or None if none.
"""
try:
tag_manifest = database.TagManifest.get(id=manifest._db_id)
except database.TagManifest.DoesNotExist:
logger.exception('Could not find tag manifest for manifest `%s`', manifest._db_id)
return None
repo_image = tag_manifest.tag.image
derived = model.image.find_derived_storage_for_image(repo_image, verb, varying_metadata)
return self._build_derived(derived, verb, varying_metadata, include_placements)
def lookup_or_create_derived_image(self, manifest, verb, storage_location, storage,
varying_metadata=None, include_placements=False):
"""
Looks up the derived image for the given maniest, verb and optional varying metadata
and returns it. If none exists, a new derived image is created.
"""
try:
tag_manifest = database.TagManifest.get(id=manifest._db_id)
except database.TagManifest.DoesNotExist:
logger.exception('Could not find tag manifest for manifest `%s`', manifest._db_id)
return None
repo_image = tag_manifest.tag.image
derived = model.image.find_or_create_derived_storage(repo_image, verb, storage_location,
varying_metadata)
return self._build_derived(derived, verb, varying_metadata, include_placements)
def set_tags_expiration_for_manifest(self, manifest, expiration_sec):
"""
Sets the expiration on all tags that point to the given manifest to that specified.
"""
try:
tag_manifest = database.TagManifest.get(id=manifest._db_id)
except database.TagManifest.DoesNotExist:
return
model.tag.set_tag_expiration_for_manifest(tag_manifest, expiration_sec)
def get_schema1_parsed_manifest(self, manifest, namespace_name, repo_name, tag_name, storage):
""" Returns the schema 1 version of this manifest, or None if none. """
try:
return manifest.get_parsed_manifest()
except ManifestException:
return None
def convert_manifest(self, manifest, namespace_name, repo_name, tag_name, allowed_mediatypes,
storage):
try:
parsed = manifest.get_parsed_manifest()
except ManifestException:
return None
try:
return parsed.convert_manifest(allowed_mediatypes, namespace_name, repo_name, tag_name, None)
except ManifestException:
return None
def create_manifest_with_temp_tag(self, repository_ref, manifest_interface_instance,
expiration_sec, storage):
""" Creates a manifest under the repository and sets a temporary tag to point to it.
Returns the manifest object created or None on error.
"""
raise NotImplementedError('Unsupported in pre OCI model')
def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False):
"""
Returns the blob in the repository with the given digest, if any or None if none. Note that
there may be multiple records in the same repository for the same blob digest, so the return
value of this function may change.
"""
image_storage = self._get_shared_storage(blob_digest)
if image_storage is None:
try:
image_storage = model.blob.get_repository_blob_by_digest(repository_ref._db_id, blob_digest)
except model.BlobDoesNotExist:
return None
assert image_storage.cas_path is not None
placements = None
if include_placements:
placements = list(model.storage.get_storage_locations(image_storage.uuid))
return Blob.for_image_storage(image_storage,
storage_path=model.storage.get_layer_path(image_storage),
placements=placements)
def list_parsed_manifest_layers(self, repository_ref, parsed_manifest, storage,
include_placements=False):
""" Returns an *ordered list* of the layers found in the parsed manifest, starting at the base
and working towards the leaf, including the associated Blob and its placements
(if specified).
"""
return self._list_manifest_layers(repository_ref._db_id, parsed_manifest, storage,
include_placements=include_placements)
def get_manifest_local_blobs(self, manifest, include_placements=False):
""" Returns the set of local blobs for the given manifest or None if none. """
try:
tag_manifest = database.TagManifest.get(id=manifest._db_id)
except database.TagManifest.DoesNotExist:
return None
return self._get_manifest_local_blobs(manifest, tag_manifest.tag.repository_id,
include_placements)
def yield_tags_for_vulnerability_notification(self, layer_id_pairs):
""" Yields tags that contain one (or more) of the given layer ID pairs, in repositories
which have been registered for vulnerability_found notifications. Returns an iterator
of LikelyVulnerableTag instances.
"""
event = database.ExternalNotificationEvent.get(name='vulnerability_found')
def filter_notifying_repos(query):
return model.tag.filter_has_repository_event(query, event)
def filter_and_order(query):
return model.tag.filter_tags_have_repository_event(query, event)
# Find the matching tags.
tags = model.tag.get_matching_tags_for_images(layer_id_pairs,
selections=[database.RepositoryTag,
database.Image,
database.ImageStorage],
filter_images=filter_notifying_repos,
filter_tags=filter_and_order)
for tag in tags:
yield LikelyVulnerableTag.for_repository_tag(tag, tag.repository)
pre_oci_model = PreOCIModel()

View file

@ -0,0 +1,509 @@
# pylint: disable=protected-access
import logging
from abc import abstractmethod
from collections import defaultdict
from data import database
from data import model
from data.cache import cache_key
from data.model.oci.retriever import RepositoryContentRetriever
from data.model.blob import get_shared_blob
from data.registry_model.datatype import FromDictionaryException
from data.registry_model.datatypes import (RepositoryReference, Blob, TorrentInfo, BlobUpload,
LegacyImage, ManifestLayer, DerivedImage, ShallowTag)
from image.docker.schema1 import ManifestException, DockerSchema1ManifestBuilder
from image.docker.schema2 import EMPTY_LAYER_BLOB_DIGEST
logger = logging.getLogger(__name__)
# The maximum size for generated manifest after which we remove extra metadata.
MAXIMUM_GENERATED_MANIFEST_SIZE = 3 * 1024 * 1024 # 3 MB
class SharedModel:
"""
SharedModel implements those data model operations for the registry API that are unchanged
between the old and new data models.
"""
def lookup_repository(self, namespace_name, repo_name, kind_filter=None):
""" Looks up and returns a reference to the repository with the given namespace and name,
or None if none. """
repo = model.repository.get_repository(namespace_name, repo_name, kind_filter=kind_filter)
state = repo.state if repo is not None else None
return RepositoryReference.for_repo_obj(repo, namespace_name, repo_name,
repo.namespace_user.stripe_id is None if repo else None,
state=state)
def is_existing_disabled_namespace(self, namespace_name):
""" Returns whether the given namespace exists and is disabled. """
namespace = model.user.get_namespace_user(namespace_name)
return namespace is not None and not namespace.enabled
def is_namespace_enabled(self, namespace_name):
""" Returns whether the given namespace exists and is enabled. """
namespace = model.user.get_namespace_user(namespace_name)
return namespace is not None and namespace.enabled
def get_derived_image_signature(self, derived_image, signer_name):
"""
Returns the signature associated with the derived image and a specific signer or None if none.
"""
try:
derived_storage = database.DerivedStorageForImage.get(id=derived_image._db_id)
except database.DerivedStorageForImage.DoesNotExist:
return None
storage = derived_storage.derivative
signature_entry = model.storage.lookup_storage_signature(storage, signer_name)
if signature_entry is None:
return None
return signature_entry.signature
def set_derived_image_signature(self, derived_image, signer_name, signature):
"""
Sets the calculated signature for the given derived image and signer to that specified.
"""
try:
derived_storage = database.DerivedStorageForImage.get(id=derived_image._db_id)
except database.DerivedStorageForImage.DoesNotExist:
return None
storage = derived_storage.derivative
signature_entry = model.storage.find_or_create_storage_signature(storage, signer_name)
signature_entry.signature = signature
signature_entry.uploading = False
signature_entry.save()
def delete_derived_image(self, derived_image):
"""
Deletes a derived image and all of its storage.
"""
try:
derived_storage = database.DerivedStorageForImage.get(id=derived_image._db_id)
except database.DerivedStorageForImage.DoesNotExist:
return None
model.image.delete_derived_storage(derived_storage)
def set_derived_image_size(self, derived_image, compressed_size):
"""
Sets the compressed size on the given derived image.
"""
try:
derived_storage = database.DerivedStorageForImage.get(id=derived_image._db_id)
except database.DerivedStorageForImage.DoesNotExist:
return None
storage_entry = derived_storage.derivative
storage_entry.image_size = compressed_size
storage_entry.uploading = False
storage_entry.save()
def get_torrent_info(self, blob):
"""
Returns the torrent information associated with the given blob or None if none.
"""
try:
image_storage = database.ImageStorage.get(id=blob._db_id)
except database.ImageStorage.DoesNotExist:
return None
try:
torrent_info = model.storage.get_torrent_info(image_storage)
except model.TorrentInfoDoesNotExist:
return None
return TorrentInfo.for_torrent_info(torrent_info)
def set_torrent_info(self, blob, piece_length, pieces):
"""
Sets the torrent infomation associated with the given blob to that specified.
"""
try:
image_storage = database.ImageStorage.get(id=blob._db_id)
except database.ImageStorage.DoesNotExist:
return None
torrent_info = model.storage.save_torrent_info(image_storage, piece_length, pieces)
return TorrentInfo.for_torrent_info(torrent_info)
@abstractmethod
def lookup_active_repository_tags(self, repository_ref, start_pagination_id, limit):
pass
def lookup_cached_active_repository_tags(self, model_cache, repository_ref, start_pagination_id,
limit):
"""
Returns a page of active tags in a repository. Note that the tags returned by this method
are ShallowTag objects, which only contain the tag name. This method will automatically cache
the result and check the cache before making a call.
"""
def load_tags():
tags = self.lookup_active_repository_tags(repository_ref, start_pagination_id, limit)
return [tag.asdict() for tag in tags]
tags_cache_key = cache_key.for_active_repo_tags(repository_ref._db_id, start_pagination_id,
limit)
result = model_cache.retrieve(tags_cache_key, load_tags)
try:
return [ShallowTag.from_dict(tag_dict) for tag_dict in result]
except FromDictionaryException:
return self.lookup_active_repository_tags(repository_ref, start_pagination_id, limit)
def get_cached_namespace_region_blacklist(self, model_cache, namespace_name):
""" Returns a cached set of ISO country codes blacklisted for pulls for the namespace
or None if the list could not be loaded.
"""
def load_blacklist():
restrictions = model.user.list_namespace_geo_restrictions(namespace_name)
if restrictions is None:
return None
return [restriction.restricted_region_iso_code for restriction in restrictions]
blacklist_cache_key = cache_key.for_namespace_geo_restrictions(namespace_name)
result = model_cache.retrieve(blacklist_cache_key, load_blacklist)
if result is None:
return None
return set(result)
def get_cached_repo_blob(self, model_cache, namespace_name, repo_name, blob_digest):
"""
Returns the blob in the repository with the given digest if any or None if none.
Caches the result in the caching system.
"""
def load_blob():
repository_ref = self.lookup_repository(namespace_name, repo_name)
if repository_ref is None:
return None
blob_found = self.get_repo_blob_by_digest(repository_ref, blob_digest,
include_placements=True)
if blob_found is None:
return None
return blob_found.asdict()
blob_cache_key = cache_key.for_repository_blob(namespace_name, repo_name, blob_digest, 2)
blob_dict = model_cache.retrieve(blob_cache_key, load_blob)
try:
return Blob.from_dict(blob_dict) if blob_dict is not None else None
except FromDictionaryException:
# The data was stale in some way. Simply reload.
repository_ref = self.lookup_repository(namespace_name, repo_name)
if repository_ref is None:
return None
return self.get_repo_blob_by_digest(repository_ref, blob_digest, include_placements=True)
@abstractmethod
def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False):
pass
def create_blob_upload(self, repository_ref, new_upload_id, location_name, storage_metadata):
""" Creates a new blob upload and returns a reference. If the blob upload could not be
created, returns None. """
repo = model.repository.lookup_repository(repository_ref._db_id)
if repo is None:
return None
try:
upload_record = model.blob.initiate_upload_for_repo(repo, new_upload_id, location_name,
storage_metadata)
return BlobUpload.for_upload(upload_record, location_name=location_name)
except database.Repository.DoesNotExist:
return None
def lookup_blob_upload(self, repository_ref, blob_upload_id):
""" Looks up the blob upload withn the given ID under the specified repository and returns it
or None if none.
"""
upload_record = model.blob.get_blob_upload_by_uuid(blob_upload_id)
if upload_record is None:
return None
return BlobUpload.for_upload(upload_record)
def update_blob_upload(self, blob_upload, uncompressed_byte_count, piece_hashes, piece_sha_state,
storage_metadata, byte_count, chunk_count, sha_state):
""" Updates the fields of the blob upload to match those given. Returns the updated blob upload
or None if the record does not exists.
"""
upload_record = model.blob.get_blob_upload_by_uuid(blob_upload.upload_id)
if upload_record is None:
return None
upload_record.uncompressed_byte_count = uncompressed_byte_count
upload_record.piece_hashes = piece_hashes
upload_record.piece_sha_state = piece_sha_state
upload_record.storage_metadata = storage_metadata
upload_record.byte_count = byte_count
upload_record.chunk_count = chunk_count
upload_record.sha_state = sha_state
upload_record.save()
return BlobUpload.for_upload(upload_record)
def delete_blob_upload(self, blob_upload):
""" Deletes a blob upload record. """
upload_record = model.blob.get_blob_upload_by_uuid(blob_upload.upload_id)
if upload_record is not None:
upload_record.delete_instance()
def commit_blob_upload(self, blob_upload, blob_digest_str, blob_expiration_seconds):
""" Commits the blob upload into a blob and sets an expiration before that blob will be GCed.
"""
upload_record = model.blob.get_blob_upload_by_uuid(blob_upload.upload_id)
if upload_record is None:
return None
repository_id = upload_record.repository_id
# Create the blob and temporarily tag it.
location_obj = model.storage.get_image_location_for_name(blob_upload.location_name)
blob_record = model.blob.store_blob_record_and_temp_link_in_repo(
repository_id, blob_digest_str, location_obj.id, blob_upload.byte_count,
blob_expiration_seconds, blob_upload.uncompressed_byte_count)
# Delete the blob upload.
upload_record.delete_instance()
return Blob.for_image_storage(blob_record,
storage_path=model.storage.get_layer_path(blob_record))
def mount_blob_into_repository(self, blob, target_repository_ref, expiration_sec):
"""
Mounts the blob from another repository into the specified target repository, and adds an
expiration before that blob is automatically GCed. This function is useful during push
operations if an existing blob from another repository is being pushed. Returns False if
the mounting fails.
"""
storage = model.blob.temp_link_blob(target_repository_ref._db_id, blob.digest, expiration_sec)
return bool(storage)
def get_legacy_images(self, repository_ref):
"""
Returns an iterator of all the LegacyImage's defined in the matching repository.
"""
repo = model.repository.lookup_repository(repository_ref._db_id)
if repo is None:
return None
all_images = model.image.get_repository_images_without_placements(repo)
all_images_map = {image.id: image for image in all_images}
all_tags = model.tag.list_repository_tags(repo.namespace_user.username, repo.name)
tags_by_image_id = defaultdict(list)
for tag in all_tags:
tags_by_image_id[tag.image_id].append(tag)
return [LegacyImage.for_image(image, images_map=all_images_map, tags_map=tags_by_image_id)
for image in all_images]
def get_legacy_image(self, repository_ref, docker_image_id, include_parents=False,
include_blob=False):
"""
Returns the matching LegacyImages under the matching repository, if any. If none,
returns None.
"""
repo = model.repository.lookup_repository(repository_ref._db_id)
if repo is None:
return None
image = model.image.get_image(repository_ref._db_id, docker_image_id)
if image is None:
return None
parent_images_map = None
if include_parents:
parent_images = model.image.get_parent_images(repo.namespace_user.username, repo.name, image)
parent_images_map = {image.id: image for image in parent_images}
blob = None
if include_blob:
placements = list(model.storage.get_storage_locations(image.storage.uuid))
blob = Blob.for_image_storage(image.storage,
storage_path=model.storage.get_layer_path(image.storage),
placements=placements)
return LegacyImage.for_image(image, images_map=parent_images_map, blob=blob)
def _get_manifest_local_blobs(self, manifest, repo_id, include_placements=False,
by_manifest=False):
parsed = manifest.get_parsed_manifest()
if parsed is None:
return None
local_blob_digests = list(set(parsed.local_blob_digests))
if not len(local_blob_digests):
return []
blob_query = self._lookup_repo_storages_by_content_checksum(repo_id, local_blob_digests,
by_manifest=by_manifest)
blobs = []
for image_storage in blob_query:
placements = None
if include_placements:
placements = list(model.storage.get_storage_locations(image_storage.uuid))
blob = Blob.for_image_storage(image_storage,
storage_path=model.storage.get_layer_path(image_storage),
placements=placements)
blobs.append(blob)
return blobs
def _list_manifest_layers(self, repo_id, parsed, storage, include_placements=False,
by_manifest=False):
""" Returns an *ordered list* of the layers found in the manifest, starting at the base and
working towards the leaf, including the associated Blob and its placements (if specified).
Returns None if the manifest could not be parsed and validated.
"""
assert not parsed.is_manifest_list
retriever = RepositoryContentRetriever(repo_id, storage)
requires_empty_blob = parsed.get_requires_empty_layer_blob(retriever)
storage_map = {}
blob_digests = list(parsed.local_blob_digests)
if requires_empty_blob:
blob_digests.append(EMPTY_LAYER_BLOB_DIGEST)
if blob_digests:
blob_query = self._lookup_repo_storages_by_content_checksum(repo_id, blob_digests,
by_manifest=by_manifest)
storage_map = {blob.content_checksum: blob for blob in blob_query}
layers = parsed.get_layers(retriever)
if layers is None:
logger.error('Could not load layers for manifest `%s`', parsed.digest)
return None
manifest_layers = []
for layer in layers:
if layer.is_remote:
manifest_layers.append(ManifestLayer(layer, None))
continue
digest_str = str(layer.blob_digest)
if digest_str not in storage_map:
logger.error('Missing digest `%s` for manifest `%s`', layer.blob_digest, parsed.digest)
return None
image_storage = storage_map[digest_str]
assert image_storage.cas_path is not None
assert image_storage.image_size is not None
placements = None
if include_placements:
placements = list(model.storage.get_storage_locations(image_storage.uuid))
blob = Blob.for_image_storage(image_storage,
storage_path=model.storage.get_layer_path(image_storage),
placements=placements)
manifest_layers.append(ManifestLayer(layer, blob))
return manifest_layers
def _build_derived(self, derived, verb, varying_metadata, include_placements):
if derived is None:
return None
derived_storage = derived.derivative
placements = None
if include_placements:
placements = list(model.storage.get_storage_locations(derived_storage.uuid))
blob = Blob.for_image_storage(derived_storage,
storage_path=model.storage.get_layer_path(derived_storage),
placements=placements)
return DerivedImage.for_derived_storage(derived, verb, varying_metadata, blob)
def _build_manifest_for_legacy_image(self, tag_name, legacy_image_row):
import features
from app import app, docker_v2_signing_key
repo = legacy_image_row.repository
namespace_name = repo.namespace_user.username
repo_name = repo.name
# Find the v1 metadata for this image and its parents.
try:
parents = model.image.get_parent_images(namespace_name, repo_name, legacy_image_row)
except model.DataModelException:
logger.exception('Could not load parent images for legacy image %s', legacy_image_row.id)
return None
# If the manifest is being generated under the library namespace, then we make its namespace
# empty.
manifest_namespace = namespace_name
if features.LIBRARY_SUPPORT and namespace_name == app.config['LIBRARY_NAMESPACE']:
manifest_namespace = ''
# Create and populate the manifest builder
builder = DockerSchema1ManifestBuilder(manifest_namespace, repo_name, tag_name)
# Add the leaf layer
builder.add_layer(legacy_image_row.storage.content_checksum, legacy_image_row.v1_json_metadata)
if legacy_image_row.storage.uploading:
logger.error('Cannot add an uploading storage row: %s', legacy_image_row.storage.id)
return None
for parent_image in parents:
if parent_image.storage.uploading:
logger.error('Cannot add an uploading storage row: %s', legacy_image_row.storage.id)
return None
builder.add_layer(parent_image.storage.content_checksum, parent_image.v1_json_metadata)
try:
built_manifest = builder.build(docker_v2_signing_key)
# If the generated manifest is greater than the maximum size, regenerate it with
# intermediate metadata layers stripped down to their bare essentials.
if len(built_manifest.bytes.as_encoded_str()) > MAXIMUM_GENERATED_MANIFEST_SIZE:
built_manifest = builder.with_metadata_removed().build(docker_v2_signing_key)
if len(built_manifest.bytes.as_encoded_str()) > MAXIMUM_GENERATED_MANIFEST_SIZE:
logger.error('Legacy image is too large to generate manifest')
return None
return built_manifest
except ManifestException as me:
logger.exception('Got exception when trying to build manifest for legacy image %s',
legacy_image_row)
return None
def _get_shared_storage(self, blob_digest):
""" Returns an ImageStorage row for the blob digest if it is a globally shared storage. """
# If the EMPTY_LAYER_BLOB_DIGEST is in the checksums, look it up directly. Since we have
# so many duplicate copies in the database currently, looking it up bound to a repository
# can be incredibly slow, and, since it is defined as a globally shared layer, this is extra
# work we don't need to do.
if blob_digest == EMPTY_LAYER_BLOB_DIGEST:
return get_shared_blob(EMPTY_LAYER_BLOB_DIGEST)
return None
def _lookup_repo_storages_by_content_checksum(self, repo, checksums, by_manifest=False):
checksums = set(checksums)
# Load any shared storages first.
extra_storages = []
for checksum in list(checksums):
shared_storage = self._get_shared_storage(checksum)
if shared_storage is not None:
extra_storages.append(shared_storage)
checksums.remove(checksum)
found = []
if checksums:
found = list(model.storage.lookup_repo_storages_by_content_checksum(repo, checksums,
by_manifest=by_manifest))
return found + extra_storages

View file

@ -0,0 +1,145 @@
import hashlib
import os
import tarfile
from io import BytesIO
from contextlib import closing
import pytest
from data.registry_model.blobuploader import (retrieve_blob_upload_manager,
upload_blob, BlobUploadException,
BlobDigestMismatchException, BlobTooLargeException,
BlobUploadSettings)
from data.registry_model.registry_pre_oci_model import PreOCIModel
from storage.distributedstorage import DistributedStorage
from storage.fakestorage import FakeStorage
from test.fixtures import *
@pytest.fixture()
def pre_oci_model(initialized_db):
return PreOCIModel()
@pytest.mark.parametrize('chunk_count', [
0,
1,
2,
10,
])
@pytest.mark.parametrize('subchunk', [
True,
False,
])
def test_basic_upload_blob(chunk_count, subchunk, pre_oci_model):
repository_ref = pre_oci_model.lookup_repository('devtable', 'complex')
storage = DistributedStorage({'local_us': FakeStorage(None)}, ['local_us'])
settings = BlobUploadSettings('2M', 512 * 1024, 3600)
app_config = {'TESTING': True}
data = ''
with upload_blob(repository_ref, storage, settings) as manager:
assert manager
assert manager.blob_upload_id
for index in range(0, chunk_count):
chunk_data = os.urandom(100)
data += chunk_data
if subchunk:
manager.upload_chunk(app_config, BytesIO(chunk_data))
manager.upload_chunk(app_config, BytesIO(chunk_data), (index * 100) + 50)
else:
manager.upload_chunk(app_config, BytesIO(chunk_data))
blob = manager.commit_to_blob(app_config)
# Check the blob.
assert blob.compressed_size == len(data)
assert not blob.uploading
assert blob.digest == 'sha256:' + hashlib.sha256(data).hexdigest()
# Ensure the blob exists in storage and has the expected data.
assert storage.get_content(['local_us'], blob.storage_path) == data
def test_cancel_upload(pre_oci_model):
repository_ref = pre_oci_model.lookup_repository('devtable', 'complex')
storage = DistributedStorage({'local_us': FakeStorage(None)}, ['local_us'])
settings = BlobUploadSettings('2M', 512 * 1024, 3600)
app_config = {'TESTING': True}
blob_upload_id = None
with upload_blob(repository_ref, storage, settings) as manager:
blob_upload_id = manager.blob_upload_id
assert pre_oci_model.lookup_blob_upload(repository_ref, blob_upload_id) is not None
manager.upload_chunk(app_config, BytesIO('hello world'))
# Since the blob was not comitted, the upload should be deleted.
assert blob_upload_id
assert pre_oci_model.lookup_blob_upload(repository_ref, blob_upload_id) is None
def test_too_large(pre_oci_model):
repository_ref = pre_oci_model.lookup_repository('devtable', 'complex')
storage = DistributedStorage({'local_us': FakeStorage(None)}, ['local_us'])
settings = BlobUploadSettings('1K', 512 * 1024, 3600)
app_config = {'TESTING': True}
with upload_blob(repository_ref, storage, settings) as manager:
with pytest.raises(BlobTooLargeException):
manager.upload_chunk(app_config, BytesIO(os.urandom(1024 * 1024 * 2)))
def test_extra_blob_stream_handlers(pre_oci_model):
handler1_result = []
handler2_result = []
def handler1(bytes):
handler1_result.append(bytes)
def handler2(bytes):
handler2_result.append(bytes)
repository_ref = pre_oci_model.lookup_repository('devtable', 'complex')
storage = DistributedStorage({'local_us': FakeStorage(None)}, ['local_us'])
settings = BlobUploadSettings('1K', 512 * 1024, 3600)
app_config = {'TESTING': True}
with upload_blob(repository_ref, storage, settings,
extra_blob_stream_handlers=[handler1, handler2]) as manager:
manager.upload_chunk(app_config, BytesIO('hello '))
manager.upload_chunk(app_config, BytesIO('world'))
assert ''.join(handler1_result) == 'hello world'
assert ''.join(handler2_result) == 'hello world'
def valid_tar_gz(contents):
with closing(BytesIO()) as layer_data:
with closing(tarfile.open(fileobj=layer_data, mode='w|gz')) as tar_file:
tar_file_info = tarfile.TarInfo(name='somefile')
tar_file_info.type = tarfile.REGTYPE
tar_file_info.size = len(contents)
tar_file_info.mtime = 1
tar_file.addfile(tar_file_info, BytesIO(contents))
layer_bytes = layer_data.getvalue()
return layer_bytes
def test_uncompressed_size(pre_oci_model):
repository_ref = pre_oci_model.lookup_repository('devtable', 'complex')
storage = DistributedStorage({'local_us': FakeStorage(None)}, ['local_us'])
settings = BlobUploadSettings('1K', 512 * 1024, 3600)
app_config = {'TESTING': True}
with upload_blob(repository_ref, storage, settings) as manager:
manager.upload_chunk(app_config, BytesIO(valid_tar_gz('hello world')))
blob = manager.commit_to_blob(app_config)
assert blob.compressed_size is not None
assert blob.uncompressed_size is not None

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,104 @@
import hashlib
import json
from io import BytesIO
import pytest
from mock import patch
from app import docker_v2_signing_key
from data.registry_model.blobuploader import BlobUploadSettings, upload_blob
from data.registry_model.manifestbuilder import create_manifest_builder, lookup_manifest_builder
from data.registry_model.registry_pre_oci_model import PreOCIModel
from data.registry_model.registry_oci_model import OCIModel
from storage.distributedstorage import DistributedStorage
from storage.fakestorage import FakeStorage
from test.fixtures import *
@pytest.fixture(params=[PreOCIModel, OCIModel])
def registry_model(request, initialized_db):
return request.param()
@pytest.fixture()
def fake_session():
with patch('data.registry_model.manifestbuilder.session', {}):
yield
@pytest.mark.parametrize('layers', [
pytest.param([('someid', None, 'some data')], id='Single layer'),
pytest.param([('parentid', None, 'some parent data'),
('someid', 'parentid', 'some data')],
id='Multi layer'),
])
def test_build_manifest(layers, fake_session, registry_model):
repository_ref = registry_model.lookup_repository('devtable', 'complex')
storage = DistributedStorage({'local_us': FakeStorage(None)}, ['local_us'])
settings = BlobUploadSettings('2M', 512 * 1024, 3600)
app_config = {'TESTING': True}
builder = create_manifest_builder(repository_ref, storage, docker_v2_signing_key)
assert lookup_manifest_builder(repository_ref, 'anotherid', storage,
docker_v2_signing_key) is None
assert lookup_manifest_builder(repository_ref, builder.builder_id, storage,
docker_v2_signing_key) is not None
blobs_by_layer = {}
for layer_id, parent_id, layer_bytes in layers:
# Start a new layer.
assert builder.start_layer(layer_id, json.dumps({'id': layer_id, 'parent': parent_id}),
'local_us', None, 60)
checksum = hashlib.sha1(layer_bytes).hexdigest()
# Assign it a blob.
with upload_blob(repository_ref, storage, settings) as uploader:
uploader.upload_chunk(app_config, BytesIO(layer_bytes))
blob = uploader.commit_to_blob(app_config)
blobs_by_layer[layer_id] = blob
builder.assign_layer_blob(builder.lookup_layer(layer_id), blob, [checksum])
# Validate the checksum.
assert builder.validate_layer_checksum(builder.lookup_layer(layer_id), checksum)
# Commit the manifest to a tag.
tag = builder.commit_tag_and_manifest('somenewtag', builder.lookup_layer(layers[-1][0]))
assert tag
assert tag in builder.committed_tags
# Mark the builder as done.
builder.done()
# Verify the legacy image for the tag.
found = registry_model.get_repo_tag(repository_ref, 'somenewtag', include_legacy_image=True)
assert found
assert found.name == 'somenewtag'
assert found.legacy_image.docker_image_id == layers[-1][0]
# Verify the blob and manifest.
manifest = registry_model.get_manifest_for_tag(found)
assert manifest
parsed = manifest.get_parsed_manifest()
assert len(list(parsed.layers)) == len(layers)
for index, (layer_id, parent_id, layer_bytes) in enumerate(layers):
assert list(parsed.blob_digests)[index] == blobs_by_layer[layer_id].digest
assert list(parsed.layers)[index].v1_metadata.image_id == layer_id
assert list(parsed.layers)[index].v1_metadata.parent_image_id == parent_id
assert parsed.leaf_layer_v1_image_id == layers[-1][0]
def test_build_manifest_missing_parent(fake_session, registry_model):
storage = DistributedStorage({'local_us': FakeStorage(None)}, ['local_us'])
repository_ref = registry_model.lookup_repository('devtable', 'complex')
builder = create_manifest_builder(repository_ref, storage, docker_v2_signing_key)
assert builder.start_layer('somelayer', json.dumps({'id': 'somelayer', 'parent': 'someparent'}),
'local_us', None, 60) is None