f75f315037
Currently, we only have one (the shared empty layer), but this should make the blob lookups for repositories significantly faster, as we won't need to do the massive join.
468 lines
18 KiB
Python
468 lines
18 KiB
Python
# pylint: disable=protected-access
|
|
import logging
|
|
|
|
from abc import abstractmethod
|
|
from collections import defaultdict
|
|
|
|
from data import database
|
|
from data import model
|
|
from data.cache import cache_key
|
|
from data.model.oci.retriever import RepositoryContentRetriever
|
|
from data.model.blob import get_shared_blob
|
|
from data.registry_model.datatype import FromDictionaryException
|
|
from data.registry_model.datatypes import (RepositoryReference, Blob, TorrentInfo, BlobUpload,
|
|
LegacyImage, ManifestLayer, DerivedImage)
|
|
from image.docker.schema1 import ManifestException, DockerSchema1ManifestBuilder
|
|
from image.docker.schema2 import EMPTY_LAYER_BLOB_DIGEST
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class SharedModel:
|
|
"""
|
|
SharedModel implements those data model operations for the registry API that are unchanged
|
|
between the old and new data models.
|
|
"""
|
|
def lookup_repository(self, namespace_name, repo_name, kind_filter=None):
|
|
""" Looks up and returns a reference to the repository with the given namespace and name,
|
|
or None if none. """
|
|
repo = model.repository.get_repository(namespace_name, repo_name, kind_filter=kind_filter)
|
|
return RepositoryReference.for_repo_obj(repo)
|
|
|
|
def is_existing_disabled_namespace(self, namespace_name):
|
|
""" Returns whether the given namespace exists and is disabled. """
|
|
namespace = model.user.get_namespace_user(namespace_name)
|
|
return namespace is not None and not namespace.enabled
|
|
|
|
def is_namespace_enabled(self, namespace_name):
|
|
""" Returns whether the given namespace exists and is enabled. """
|
|
namespace = model.user.get_namespace_user(namespace_name)
|
|
return namespace is not None and namespace.enabled
|
|
|
|
def get_derived_image_signature(self, derived_image, signer_name):
|
|
"""
|
|
Returns the signature associated with the derived image and a specific signer or None if none.
|
|
"""
|
|
try:
|
|
derived_storage = database.DerivedStorageForImage.get(id=derived_image._db_id)
|
|
except database.DerivedStorageForImage.DoesNotExist:
|
|
return None
|
|
|
|
storage = derived_storage.derivative
|
|
signature_entry = model.storage.lookup_storage_signature(storage, signer_name)
|
|
if signature_entry is None:
|
|
return None
|
|
|
|
return signature_entry.signature
|
|
|
|
def set_derived_image_signature(self, derived_image, signer_name, signature):
|
|
"""
|
|
Sets the calculated signature for the given derived image and signer to that specified.
|
|
"""
|
|
try:
|
|
derived_storage = database.DerivedStorageForImage.get(id=derived_image._db_id)
|
|
except database.DerivedStorageForImage.DoesNotExist:
|
|
return None
|
|
|
|
storage = derived_storage.derivative
|
|
signature_entry = model.storage.find_or_create_storage_signature(storage, signer_name)
|
|
signature_entry.signature = signature
|
|
signature_entry.uploading = False
|
|
signature_entry.save()
|
|
|
|
def delete_derived_image(self, derived_image):
|
|
"""
|
|
Deletes a derived image and all of its storage.
|
|
"""
|
|
try:
|
|
derived_storage = database.DerivedStorageForImage.get(id=derived_image._db_id)
|
|
except database.DerivedStorageForImage.DoesNotExist:
|
|
return None
|
|
|
|
model.image.delete_derived_storage(derived_storage)
|
|
|
|
def set_derived_image_size(self, derived_image, compressed_size):
|
|
"""
|
|
Sets the compressed size on the given derived image.
|
|
"""
|
|
try:
|
|
derived_storage = database.DerivedStorageForImage.get(id=derived_image._db_id)
|
|
except database.DerivedStorageForImage.DoesNotExist:
|
|
return None
|
|
|
|
storage_entry = derived_storage.derivative
|
|
storage_entry.image_size = compressed_size
|
|
storage_entry.uploading = False
|
|
storage_entry.save()
|
|
|
|
def get_torrent_info(self, blob):
|
|
"""
|
|
Returns the torrent information associated with the given blob or None if none.
|
|
"""
|
|
try:
|
|
image_storage = database.ImageStorage.get(id=blob._db_id)
|
|
except database.ImageStorage.DoesNotExist:
|
|
return None
|
|
|
|
try:
|
|
torrent_info = model.storage.get_torrent_info(image_storage)
|
|
except model.TorrentInfoDoesNotExist:
|
|
return None
|
|
|
|
return TorrentInfo.for_torrent_info(torrent_info)
|
|
|
|
def set_torrent_info(self, blob, piece_length, pieces):
|
|
"""
|
|
Sets the torrent infomation associated with the given blob to that specified.
|
|
"""
|
|
try:
|
|
image_storage = database.ImageStorage.get(id=blob._db_id)
|
|
except database.ImageStorage.DoesNotExist:
|
|
return None
|
|
|
|
torrent_info = model.storage.save_torrent_info(image_storage, piece_length, pieces)
|
|
return TorrentInfo.for_torrent_info(torrent_info)
|
|
|
|
|
|
def get_cached_namespace_region_blacklist(self, model_cache, namespace_name):
|
|
""" Returns a cached set of ISO country codes blacklisted for pulls for the namespace
|
|
or None if the list could not be loaded.
|
|
"""
|
|
|
|
def load_blacklist():
|
|
restrictions = model.user.list_namespace_geo_restrictions(namespace_name)
|
|
if restrictions is None:
|
|
return None
|
|
|
|
return [restriction.restricted_region_iso_code for restriction in restrictions]
|
|
|
|
blacklist_cache_key = cache_key.for_namespace_geo_restrictions(namespace_name)
|
|
result = model_cache.retrieve(blacklist_cache_key, load_blacklist)
|
|
if result is None:
|
|
return None
|
|
|
|
return set(result)
|
|
|
|
|
|
def get_cached_repo_blob(self, model_cache, namespace_name, repo_name, blob_digest):
|
|
"""
|
|
Returns the blob in the repository with the given digest if any or None if none.
|
|
Caches the result in the caching system.
|
|
"""
|
|
def load_blob():
|
|
repository_ref = self.lookup_repository(namespace_name, repo_name)
|
|
if repository_ref is None:
|
|
return None
|
|
|
|
blob_found = self.get_repo_blob_by_digest(repository_ref, blob_digest,
|
|
include_placements=True)
|
|
if blob_found is None:
|
|
return None
|
|
|
|
return blob_found.asdict()
|
|
|
|
blob_cache_key = cache_key.for_repository_blob(namespace_name, repo_name, blob_digest, 2)
|
|
blob_dict = model_cache.retrieve(blob_cache_key, load_blob)
|
|
|
|
try:
|
|
return Blob.from_dict(blob_dict) if blob_dict is not None else None
|
|
except FromDictionaryException:
|
|
# The data was stale in some way. Simply reload.
|
|
repository_ref = self.lookup_repository(namespace_name, repo_name)
|
|
if repository_ref is None:
|
|
return None
|
|
|
|
return self.get_repo_blob_by_digest(repository_ref, blob_digest, include_placements=True)
|
|
|
|
@abstractmethod
|
|
def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False):
|
|
pass
|
|
|
|
def create_blob_upload(self, repository_ref, new_upload_id, location_name, storage_metadata):
|
|
""" Creates a new blob upload and returns a reference. If the blob upload could not be
|
|
created, returns None. """
|
|
repo = model.repository.lookup_repository(repository_ref._db_id)
|
|
if repo is None:
|
|
return None
|
|
|
|
try:
|
|
upload_record = model.blob.initiate_upload(repo.namespace_user.username, repo.name,
|
|
new_upload_id, location_name, storage_metadata)
|
|
return BlobUpload.for_upload(upload_record)
|
|
except database.Repository.DoesNotExist:
|
|
return None
|
|
|
|
def lookup_blob_upload(self, repository_ref, blob_upload_id):
|
|
""" Looks up the blob upload withn the given ID under the specified repository and returns it
|
|
or None if none.
|
|
"""
|
|
upload_record = model.blob.get_blob_upload_by_uuid(blob_upload_id)
|
|
if upload_record is None:
|
|
return None
|
|
|
|
return BlobUpload.for_upload(upload_record)
|
|
|
|
def update_blob_upload(self, blob_upload, uncompressed_byte_count, piece_hashes, piece_sha_state,
|
|
storage_metadata, byte_count, chunk_count, sha_state):
|
|
""" Updates the fields of the blob upload to match those given. Returns the updated blob upload
|
|
or None if the record does not exists.
|
|
"""
|
|
upload_record = model.blob.get_blob_upload_by_uuid(blob_upload.upload_id)
|
|
if upload_record is None:
|
|
return None
|
|
|
|
upload_record.uncompressed_byte_count = uncompressed_byte_count
|
|
upload_record.piece_hashes = piece_hashes
|
|
upload_record.piece_sha_state = piece_sha_state
|
|
upload_record.storage_metadata = storage_metadata
|
|
upload_record.byte_count = byte_count
|
|
upload_record.chunk_count = chunk_count
|
|
upload_record.sha_state = sha_state
|
|
upload_record.save()
|
|
return BlobUpload.for_upload(upload_record)
|
|
|
|
def delete_blob_upload(self, blob_upload):
|
|
""" Deletes a blob upload record. """
|
|
upload_record = model.blob.get_blob_upload_by_uuid(blob_upload.upload_id)
|
|
if upload_record is not None:
|
|
upload_record.delete_instance()
|
|
|
|
def commit_blob_upload(self, blob_upload, blob_digest_str, blob_expiration_seconds):
|
|
""" Commits the blob upload into a blob and sets an expiration before that blob will be GCed.
|
|
"""
|
|
upload_record = model.blob.get_blob_upload_by_uuid(blob_upload.upload_id)
|
|
if upload_record is None:
|
|
return None
|
|
|
|
repository = upload_record.repository
|
|
namespace_name = repository.namespace_user.username
|
|
repo_name = repository.name
|
|
|
|
# Create the blob and temporarily tag it.
|
|
location_obj = model.storage.get_image_location_for_name(blob_upload.location_name)
|
|
blob_record = model.blob.store_blob_record_and_temp_link(
|
|
namespace_name, repo_name, blob_digest_str, location_obj.id, blob_upload.byte_count,
|
|
blob_expiration_seconds, blob_upload.uncompressed_byte_count)
|
|
|
|
# Delete the blob upload.
|
|
upload_record.delete_instance()
|
|
return Blob.for_image_storage(blob_record,
|
|
storage_path=model.storage.get_layer_path(blob_record))
|
|
|
|
def mount_blob_into_repository(self, blob, target_repository_ref, expiration_sec):
|
|
"""
|
|
Mounts the blob from another repository into the specified target repository, and adds an
|
|
expiration before that blob is automatically GCed. This function is useful during push
|
|
operations if an existing blob from another repository is being pushed. Returns False if
|
|
the mounting fails.
|
|
"""
|
|
repo = model.repository.lookup_repository(target_repository_ref._db_id)
|
|
if repo is None:
|
|
return False
|
|
|
|
namespace_name = repo.namespace_user.username
|
|
repo_name = repo.name
|
|
|
|
storage = model.blob.temp_link_blob(namespace_name, repo_name, blob.digest,
|
|
expiration_sec)
|
|
return bool(storage)
|
|
|
|
def get_legacy_images(self, repository_ref):
|
|
"""
|
|
Returns an iterator of all the LegacyImage's defined in the matching repository.
|
|
"""
|
|
repo = model.repository.lookup_repository(repository_ref._db_id)
|
|
if repo is None:
|
|
return None
|
|
|
|
all_images = model.image.get_repository_images_without_placements(repo)
|
|
all_images_map = {image.id: image for image in all_images}
|
|
|
|
all_tags = model.tag.list_repository_tags(repo.namespace_user.username, repo.name)
|
|
tags_by_image_id = defaultdict(list)
|
|
for tag in all_tags:
|
|
tags_by_image_id[tag.image_id].append(tag)
|
|
|
|
return [LegacyImage.for_image(image, images_map=all_images_map, tags_map=tags_by_image_id)
|
|
for image in all_images]
|
|
|
|
def get_legacy_image(self, repository_ref, docker_image_id, include_parents=False,
|
|
include_blob=False):
|
|
"""
|
|
Returns the matching LegacyImages under the matching repository, if any. If none,
|
|
returns None.
|
|
"""
|
|
repo = model.repository.lookup_repository(repository_ref._db_id)
|
|
if repo is None:
|
|
return None
|
|
|
|
image = model.image.get_image(repository_ref._db_id, docker_image_id)
|
|
if image is None:
|
|
return None
|
|
|
|
parent_images_map = None
|
|
if include_parents:
|
|
parent_images = model.image.get_parent_images(repo.namespace_user.username, repo.name, image)
|
|
parent_images_map = {image.id: image for image in parent_images}
|
|
|
|
blob = None
|
|
if include_blob:
|
|
placements = list(model.storage.get_storage_locations(image.storage.uuid))
|
|
blob = Blob.for_image_storage(image.storage,
|
|
storage_path=model.storage.get_layer_path(image.storage),
|
|
placements=placements)
|
|
|
|
return LegacyImage.for_image(image, images_map=parent_images_map, blob=blob)
|
|
|
|
def _get_manifest_local_blobs(self, manifest, repo_id, include_placements=False,
|
|
by_manifest=False):
|
|
parsed = manifest.get_parsed_manifest()
|
|
if parsed is None:
|
|
return None
|
|
|
|
local_blob_digests = list(set(parsed.local_blob_digests))
|
|
if not len(local_blob_digests):
|
|
return []
|
|
|
|
blob_query = self._lookup_repo_storages_by_content_checksum(repo_id, local_blob_digests,
|
|
by_manifest=by_manifest)
|
|
blobs = []
|
|
for image_storage in blob_query:
|
|
placements = None
|
|
if include_placements:
|
|
placements = list(model.storage.get_storage_locations(image_storage.uuid))
|
|
|
|
blob = Blob.for_image_storage(image_storage,
|
|
storage_path=model.storage.get_layer_path(image_storage),
|
|
placements=placements)
|
|
blobs.append(blob)
|
|
|
|
return blobs
|
|
|
|
def _list_manifest_layers(self, repo_id, parsed, storage, include_placements=False,
|
|
by_manifest=False):
|
|
""" Returns an *ordered list* of the layers found in the manifest, starting at the base and
|
|
working towards the leaf, including the associated Blob and its placements (if specified).
|
|
Returns None if the manifest could not be parsed and validated.
|
|
"""
|
|
assert not parsed.is_manifest_list
|
|
|
|
retriever = RepositoryContentRetriever(repo_id, storage)
|
|
requires_empty_blob = parsed.get_requires_empty_layer_blob(retriever)
|
|
|
|
storage_map = {}
|
|
blob_digests = list(parsed.local_blob_digests)
|
|
if requires_empty_blob:
|
|
blob_digests.append(EMPTY_LAYER_BLOB_DIGEST)
|
|
|
|
if blob_digests:
|
|
blob_query = self._lookup_repo_storages_by_content_checksum(repo_id, blob_digests,
|
|
by_manifest=by_manifest)
|
|
storage_map = {blob.content_checksum: blob for blob in blob_query}
|
|
|
|
|
|
layers = parsed.get_layers(retriever)
|
|
if layers is None:
|
|
logger.error('Could not load layers for manifest `%s`', parsed.digest)
|
|
return None
|
|
|
|
manifest_layers = []
|
|
for layer in layers:
|
|
if layer.is_remote:
|
|
manifest_layers.append(ManifestLayer(layer, None))
|
|
continue
|
|
|
|
digest_str = str(layer.blob_digest)
|
|
if digest_str not in storage_map:
|
|
logger.error('Missing digest `%s` for manifest `%s`', layer.blob_digest, parsed.digest)
|
|
return None
|
|
|
|
image_storage = storage_map[digest_str]
|
|
assert image_storage.cas_path is not None
|
|
assert image_storage.image_size is not None
|
|
|
|
placements = None
|
|
if include_placements:
|
|
placements = list(model.storage.get_storage_locations(image_storage.uuid))
|
|
|
|
blob = Blob.for_image_storage(image_storage,
|
|
storage_path=model.storage.get_layer_path(image_storage),
|
|
placements=placements)
|
|
manifest_layers.append(ManifestLayer(layer, blob))
|
|
|
|
return manifest_layers
|
|
|
|
def _build_derived(self, derived, verb, varying_metadata, include_placements):
|
|
if derived is None:
|
|
return None
|
|
|
|
derived_storage = derived.derivative
|
|
placements = None
|
|
if include_placements:
|
|
placements = list(model.storage.get_storage_locations(derived_storage.uuid))
|
|
|
|
blob = Blob.for_image_storage(derived_storage,
|
|
storage_path=model.storage.get_layer_path(derived_storage),
|
|
placements=placements)
|
|
|
|
return DerivedImage.for_derived_storage(derived, verb, varying_metadata, blob)
|
|
|
|
def _build_manifest_for_legacy_image(self, tag_name, legacy_image_row):
|
|
import features
|
|
|
|
from app import app, docker_v2_signing_key
|
|
|
|
repo = legacy_image_row.repository
|
|
namespace_name = repo.namespace_user.username
|
|
repo_name = repo.name
|
|
|
|
# Find the v1 metadata for this image and its parents.
|
|
try:
|
|
parents = model.image.get_parent_images(namespace_name, repo_name, legacy_image_row)
|
|
except model.DataModelException:
|
|
logger.exception('Could not load parent images for legacy image %s', legacy_image_row.id)
|
|
return None
|
|
|
|
# If the manifest is being generated under the library namespace, then we make its namespace
|
|
# empty.
|
|
manifest_namespace = namespace_name
|
|
if features.LIBRARY_SUPPORT and namespace_name == app.config['LIBRARY_NAMESPACE']:
|
|
manifest_namespace = ''
|
|
|
|
# Create and populate the manifest builder
|
|
builder = DockerSchema1ManifestBuilder(manifest_namespace, repo_name, tag_name)
|
|
|
|
# Add the leaf layer
|
|
builder.add_layer(legacy_image_row.storage.content_checksum, legacy_image_row.v1_json_metadata)
|
|
|
|
for parent_image in parents:
|
|
builder.add_layer(parent_image.storage.content_checksum, parent_image.v1_json_metadata)
|
|
|
|
# Sign the manifest with our signing key.
|
|
return builder.build(docker_v2_signing_key)
|
|
|
|
def _get_shared_storage(self, blob_digest):
|
|
""" Returns an ImageStorage row for the blob digest if it is a globally shared storage. """
|
|
# If the EMPTY_LAYER_BLOB_DIGEST is in the checksums, look it up directly. Since we have
|
|
# so many duplicate copies in the database currently, looking it up bound to a repository
|
|
# can be incredibly slow, and, since it is defined as a globally shared layer, this is extra
|
|
# work we don't need to do.
|
|
if blob_digest == EMPTY_LAYER_BLOB_DIGEST:
|
|
return get_shared_blob(EMPTY_LAYER_BLOB_DIGEST)
|
|
|
|
return None
|
|
|
|
def _lookup_repo_storages_by_content_checksum(self, repo, checksums, by_manifest=False):
|
|
# Load any shared storages first.
|
|
extra_storages = []
|
|
for checksum in list(checksums):
|
|
shared_storage = self._get_shared_storage(checksum)
|
|
if shared_storage is not None:
|
|
extra_storages.append(shared_storage)
|
|
checksums.remove(checksum)
|
|
|
|
found = []
|
|
if checksums:
|
|
found = list(model.storage.lookup_repo_storages_by_content_checksum(repo, checksums,
|
|
by_manifest=by_manifest))
|
|
return found + extra_storages
|