From e3a39d7bd6bcdb64761fe15e18d086e1410f7c03 Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Wed, 15 Jun 2016 14:49:03 -0400 Subject: [PATCH 01/34] fix indentation --- data/model/tag.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/data/model/tag.py b/data/model/tag.py index 03fa30ec3..6139b5fff 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -344,9 +344,10 @@ def load_manifest_by_digest(namespace, repo_name, digest): def _load_repo_manifests(namespace, repo_name): - return _tag_alive(TagManifest - .select(TagManifest, RepositoryTag, Repository) - .join(RepositoryTag) - .join(Repository) - .join(Namespace, on=(Namespace.id == Repository.namespace_user)) - .where(Repository.name == repo_name, Namespace.username == namespace)) + return _tag_alive(TagManifest + .select(TagManifest, RepositoryTag) + .join(RepositoryTag) + .join(Image) + .join(Repository) + .join(Namespace, on=(Namespace.id == Repository.namespace_user)) + .where(Repository.name == repo_name, Namespace.username == namespace)) From 4f95a814c007577cc6c25ba28a0df7c2a8b93970 Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Wed, 6 Jul 2016 14:10:02 -0400 Subject: [PATCH 02/34] satisfy the pylint gods --- endpoints/v1/registry.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/endpoints/v1/registry.py b/endpoints/v1/registry.py index 5bcdd89aa..0d6f5236b 100644 --- a/endpoints/v1/registry.py +++ b/endpoints/v1/registry.py @@ -1,26 +1,27 @@ import logging import json -from flask import make_response, request, session, Response, redirect, abort as flask_abort from functools import wraps from datetime import datetime from time import time +from flask import make_response, request, session, Response, redirect, abort as flask_abort + from app import storage as store, app from auth.auth import process_auth, extract_namespace_repo_from_session from auth.auth_context import get_authenticated_user -from auth.registry_jwt_auth import get_granted_username -from digest import checksums -from util.http import abort, exact_abort -from util.registry.filelike import SocketReader from auth.permissions import (ReadRepositoryPermission, ModifyRepositoryPermission) +from auth.registry_jwt_auth import get_granted_username from data import model, database -from util.registry import gzipstream -from util.registry.torrent import PieceHasher +from digest import checksums from endpoints.v1 import v1_bp from endpoints.decorators import anon_protect +from util.http import abort, exact_abort +from util.registry.filelike import SocketReader +from util.registry import gzipstream from util.registry.replication import queue_storage_replication +from util.registry.torrent import PieceHasher logger = logging.getLogger(__name__) @@ -38,7 +39,7 @@ def set_uploading_flag(repo_image, is_image_uploading): repo_image.storage.save() -def _finish_image(namespace, repository, repo_image): +def _finish_image(namespace, repo_image): # Checksum is ok, we remove the marker set_uploading_flag(repo_image, False) @@ -176,7 +177,7 @@ def put_image_layer(namespace, repository, image_id): logger.debug('Retrieving image data') uuid = repo_image.storage.uuid json_data = repo_image.v1_json_metadata - except (AttributeError): + except AttributeError: logger.exception('Exception when retrieving image data') abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) @@ -235,7 +236,8 @@ def put_image_layer(namespace, repository, image_id): size_info.compressed_size, size_info.uncompressed_size) pieces_bytes = piece_hasher.final_piece_hashes() - model.storage.save_torrent_info(updated_storage, app.config['BITTORRENT_PIECE_SIZE'], pieces_bytes) + model.storage.save_torrent_info(updated_storage, app.config['BITTORRENT_PIECE_SIZE'], + pieces_bytes) # Append the computed checksum. csums = [] @@ -266,7 +268,7 @@ def put_image_layer(namespace, repository, image_id): issue='checksum-mismatch', image_id=image_id) # Mark the image as uploaded. - _finish_image(namespace, repository, repo_image) + _finish_image(namespace, repo_image) return make_response('true', 200) @@ -330,7 +332,7 @@ def put_image_checksum(namespace, repository, image_id): issue='checksum-mismatch', image_id=image_id) # Mark the image as uploaded. - _finish_image(namespace, repository, repo_image) + _finish_image(namespace, repo_image) return make_response('true', 200) From 9cfd6ec452272521b56417b061d7563af350a3dc Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Wed, 15 Jun 2016 14:48:46 -0400 Subject: [PATCH 03/34] database: initial manifestlist schema changes --- data/database.py | 207 ++++++++++++++++++++++++++++++++++++++++++++--- pylintrc | 2 +- 2 files changed, 196 insertions(+), 13 deletions(-) diff --git a/data/database.py b/data/database.py index 935fad2a6..24598f29c 100644 --- a/data/database.py +++ b/data/database.py @@ -1,3 +1,5 @@ +# pylint: disable=old-style-class,no-init + import inspect import logging import string @@ -9,7 +11,6 @@ from collections import defaultdict from datetime import datetime from random import SystemRandom -import resumablehashlib import toposort from enum import Enum @@ -18,6 +19,8 @@ from playhouse.shortcuts import RetryOperationalError from sqlalchemy.engine.url import make_url +import resumablehashlib + from data.fields import ResumableSHA256Field, ResumableSHA1Field, JSONField, Base64BinaryField from data.read_slave import ReadSlaveModel from util.names import urn_generator @@ -115,11 +118,11 @@ def delete_instance_filtered(instance, model_class, delete_nullable, skip_transi with db_transaction(): for query, fk in filtered_ops: - model = fk.model_class + _model = fk.model_class if fk.null and not delete_nullable: - model.update(**{fk.name: None}).where(query).execute() + _model.update(**{fk.name: None}).where(query).execute() else: - model.delete().where(query).execute() + _model.delete().where(query).execute() return instance.delete().where(instance._pk_expr()).execute() @@ -144,12 +147,12 @@ class CloseForLongOperation(object): self.config_object = config_object def __enter__(self): - if self.config_object.get('TESTING') == True: + if self.config_object.get('TESTING') is True: return close_db_filter(None) - def __exit__(self, type, value, traceback): + def __exit__(self, typ, value, traceback): # Note: Nothing to do. The next SQL call will reconnect automatically. pass @@ -163,7 +166,7 @@ class UseThenDisconnect(object): def __enter__(self): configure(self.config_object) - def __exit__(self, type, value, traceback): + def __exit__(self, typ, value, traceback): close_db_filter(None) @@ -295,7 +298,7 @@ class QuayUserField(ForeignKeyField): def __init__(self, allows_robots=False, robot_null_delete=False, *args, **kwargs): self.allows_robots = allows_robots self.robot_null_delete = robot_null_delete - if not 'rel_model' in kwargs: + if 'rel_model' not in kwargs: kwargs['rel_model'] = User super(QuayUserField, self).__init__(*args, **kwargs) @@ -341,12 +344,12 @@ class User(BaseModel): # For all the model dependencies, only delete those that allow robots. for query, fk in reversed(list(self.dependencies(search_nullable=True))): if isinstance(fk, QuayUserField) and fk.allows_robots: - model = fk.model_class + _model = fk.model_class if fk.robot_null_delete: - model.update(**{fk.name: None}).where(query).execute() + _model.update(**{fk.name: None}).where(query).execute() else: - model.delete().where(query).execute() + _model.delete().where(query).execute() # Delete the instance itself. super(User, self).delete_instance(recursive=False, delete_nullable=False) @@ -494,7 +497,7 @@ class PermissionPrototype(BaseModel): uuid = CharField(default=uuid_generator) activating_user = QuayUserField(allows_robots=True, index=True, null=True, related_name='userpermissionproto') - delegate_user = QuayUserField(allows_robots=True,related_name='receivingpermission', + delegate_user = QuayUserField(allows_robots=True, related_name='receivingpermission', null=True) delegate_team = ForeignKeyField(Team, related_name='receivingpermission', null=True) @@ -988,6 +991,186 @@ class TagManifestLabel(BaseModel): (('annotated', 'label'), True), ) +class Blob(BaseModel): + """ Blob represents a content-addressable object stored outside of the database. """ + digest = CharField(index=True, unique=True) + media_type = ForeignKeyField(MediaType) + size = BigIntegerField() + uncompressed_size = BigIntegerField(null=True) + + +class BlobPlacementLocation(BaseModel): + """ BlobPlacementLocation is an enumeration of the possible storage locations for Blobs. """ + name = CharField(index=True, unique=True) + + +class BlobPlacementLocationPreference(BaseModel): + """ BlobPlacementLocationPreference is a location to which a user's data will be replicated. """ + user = QuayUserField(index=True, allow_robots=False) + location = ForeignKeyField(BlobPlacementLocation) + + +class BlobPlacement(BaseModel): + """ BlobPlacement represents the location of a Blob. """ + blob = ForeignKeyField(Blob) + location = ForeignKeyField(BlobPlacementLocation) + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('blob', 'location'), True), + ) + + +class BlobUploading(BaseModel): + """ BlobUploading represents the state of a Blob currently being uploaded. """ + uuid = CharField(index=True, unique=True) + created = DateTimeField(default=datetime.now, index=True) + repository = ForeignKeyField(Repository, index=True) + location = ForeignKeyField(BlobPlacementLocation) + byte_count = IntegerField(default=0) + sha_state = ResumableSHA256Field(null=True, default=resumablehashlib.sha256) + storage_metadata = JSONField(null=True, default={}) + chunk_count = IntegerField(default=0) + uncompressed_byte_count = IntegerField(null=True) + piece_sha_state = ResumableSHA1Field(null=True) + piece_hashes = Base64BinaryField(null=True) + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('repository', 'uuid'), True), + ) + + +class Manifest(BaseModel): + """ Manifest represents the metadata and collection of blobs that comprise a container image. """ + digest = CharField(index=True, unique=True) + media_type = ForeignKeyField(MediaType) + manifest_json = JSONField() + + +class ManifestBlob(BaseModel): + """ ManifestBlob is a many-to-many relation table linking Manifests and Blobs. """ + manifest = ForeignKeyField(Manifest, index=True) + blob = ForeignKeyField(Blob, index=True) + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('manifest', 'blob'), True), + ) + + +class ManifestList(BaseModel): + """ ManifestList represents all of the various manifests that compose a Tag. """ + digest = CharField(index=True, unique=True) + manifest_list_json = JSONField() + schema_version = CharField() + media_type = ForeignKeyField(MediaType) + + +class ManifestListManifest(BaseModel): + """ ManifestListManifest is a many-to-many relation table linking ManifestLists and Manifests. """ + manifest_list = ForeignKeyField(ManifestList, index=True) + manifest = ForeignKeyField(Manifest, index=True) + operating_system = CharField() + architecture = CharField() + platform_json = JSONField() + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('manifest_list', 'operating_system', 'architecture'), False), + ) + + +class ManifestLayer(BaseModel): + """ ManifestLayer represents one of the layers that compose a Manifest. """ + blob = ForeignKeyField(Blob, index=True) + manifest = ForeignKeyField(Manifest) + manifest_index = IntegerField(index=True) # index 0 is the last command in a Dockerfile + metadata_json = JSONField() + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('manifest', 'manifest_index'), True), + ) + + +class ManifestLayerDockerV1(BaseModel): + """ ManifestLayerDockerV1 is the Docker v1 registry protocol metadata for a ManifestLayer. """ + manifest_layer = ForeignKeyField(ManifestLayer) + image_id = CharField(index=True) + checksum = CharField() + compat_json = JSONField() + + +class ManifestLayerScan(BaseModel): + """ ManifestLayerScan represents the state of security scanning for a ManifestLayer. """ + layer = ForeignKeyField(ManifestLayer, unique=True) + scannable = BooleanField() + scanned_by = CharField() + + +class DerivedImage(BaseModel): + """ DerivedImage represents a Manifest transcoded into an alternative format. """ + source_manifest = ForeignKeyField(Manifest) + derived_manifest_json = JSONField() + media_type = ForeignKeyField(MediaType) + blob = ForeignKeyField(Blob) + uniqueness_hash = CharField(index=True, unique=True) + signature_blob = ForeignKeyField(Blob, null=True) + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('source_manifest', 'blob'), True), + (('source_manifest', 'media_type', 'uniqueness_hash'), True), + ) + + +class Tag(BaseModel): + """ Tag represents a user-facing alias for referencing a ManifestList. """ + name = CharField() + repository = ForeignKeyField(Repository) + manifest_list = ForeignKeyField(ManifestList) + lifetime_start = IntegerField(default=get_epoch_timestamp) + lifetime_end = IntegerField(null=True, index=True) + hidden = BooleanField(default=False) + reverted = BooleanField(default=False) + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('repository', 'name'), False), + + # This unique index prevents deadlocks when concurrently moving and deleting tags + (('repository', 'name', 'lifetime_end_ts'), True), + ) + + +class BitTorrentPieces(BaseModel): + """ BitTorrentPieces represents the BitTorrent piece metadata calculated from a Blob. """ + blob = ForeignKeyField(Blob) + pieces = Base64BinaryField() + piece_length = IntegerField() + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('annotated', 'label'), True), + ) + is_model = lambda x: inspect.isclass(x) and issubclass(x, BaseModel) and x is not BaseModel all_models = [model[1] for model in inspect.getmembers(sys.modules[__name__], is_model)] diff --git a/pylintrc b/pylintrc index 7cecd0de7..123b4692d 100644 --- a/pylintrc +++ b/pylintrc @@ -9,7 +9,7 @@ # --enable=similarities". If you want to run only the classes checker, but have # no Warning level messages displayed, use"--disable=all --enable=classes # --disable=W" -disable=missing-docstring,invalid-name,too-many-locals +disable=missing-docstring,invalid-name,too-many-locals,too-few-public-methods,too-many-lines [TYPECHECK] From c14437e54a6c3dfe42aa5deee33a2a94bcbbedfa Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Mon, 11 Jul 2016 18:51:28 -0400 Subject: [PATCH 04/34] initial v1 refactor to use model methods --- data/model/v1/__init__.py | 126 +++++++++++++++++++++++ endpoints/v1/registry.py | 208 ++++++++++++++------------------------ 2 files changed, 201 insertions(+), 133 deletions(-) create mode 100644 data/model/v1/__init__.py diff --git a/data/model/v1/__init__.py b/data/model/v1/__init__.py new file mode 100644 index 000000000..fb156bd00 --- /dev/null +++ b/data/model/v1/__init__.py @@ -0,0 +1,126 @@ +from app import app, storage as store +from data import model +from util.morecollections import AttrDict + + +# TODO(jzelinskie): implement all of these methods using both legacy and new models. + +def blob_placement_locations_docker_v1(namespace_name, repo_name, image_id): + repo_image = model.image.get_repo_image_extended(namespace_name, repo_name, image_id) + if repo_image is None: + return None + return repo_image.storage.locations + + +def blob_placement_locations_and_path_docker_v1(namespace_name, repo_name, image_id): + repo_image = model.image.get_repo_image_extended(namespace_name, repo_name, image_id) + if not repo_image: + return None, None + return model.storage.get_layer_path(repo_image.storage), repo_image.storage.locations + + +def docker_v1_metadata(namespace_name, repo_name, image_id): + if not repo_image: + return None + + return AttrDict({ + 'namespace_name': namespace_name, + 'repo_name': repo_name, + 'image_id': image_id, + 'checksum': repo_image.v1_checksum, + 'compat_json': repo_image.v1_json_metadata, + }) + + +def update_docker_v1_metadata(namespace_name, repo_name, image_id, created_date_str, comment, + command, compat_json, parent_image_id=None): + # Old implementation: + # parent_image = get_repo_extended(namespace_name, repo_name, parent_image_id) + # model.image.set_image_metadata(image_id, namespace_name, repo_name, create_date_str, comment, command, compat_json, parent_image) + pass + + +def storage_exists(namespace_name, repo_name, image_id): + repo_image = model.image.get_repo_image_extended(namespace_name, repo_name, image_id) + try: + layer_path = store.v1_image_layer_path(repo_image.storage.uuid) + except AttributeError: + return False + + if (store.exists(repo_image.storage.locations, layer_path) and not + repo_image.storage.uploading): + return True + return False + + +def store_docker_v1_checksum(namespace_name, repo_name, image_id, checksum, content_checksum): + ## Old implementation: + # UPDATE repo_image.storage.content_checksum = content_checksum + # UPDATE repo_image.v1_checksum = checksum + pass + + +def is_image_uploading(namespace_name, repo_name, image_id): + repo_image = model.image.get_repo_image_extended(namespace_name, repo_name, image_id) + if repo_image is None: + return False + return repo_image.storage.uploading + + +def update_image_uploading(namespace_name, repo_name, image_id, is_uploading): + ## Old implementation: + # UPDATE repo_image.storage.uploading = is_uploading + pass + + +def update_image_size(namespace_name, repo_name, image_id, size, uncompressed_size): + model.storage.set_image_storage_metadata( + image_id, + namespace_name, + repo_name, + size, + uncompressed_size, + ) + + +def image_size(namespace_name, repo_name, image_id): + return repo_image.storage.image_size + + +def create_bittorrent_pieces(namespace_name, repo_name, image_id, pieces_bytes): + repo_image = model.image.get_repo_image_extended(namespace_name, repo_name, image_id) + try: + model.storage.save_torrent_info( + repo_image.storage, + app.config['BITTORRENT_PIECE_SIZE'], + pieces_bytes + ) + except AttributeError: + pass + + +def image_ancestry(namespace_name, repo_name, image_id): + try: + image = model.image.get_image_by_id(namespace, repository, image_id) + except model.InvalidImageException: + return None + + parents = model.image.get_parent_images(namespace, repository, image) + ancestry_docker_ids = [image.docker_image_id] + ancestry_docker_ids.extend([parent.docker_image_id for parent in parents]) + + +def repository_exists(namespace_name, repo_name): + repo = model.repository.get_repository(namespace_name, repo_name) + return repo is not None + + +def create_or_link_image(username, repo_name, image_id, storage_location): + pass + + +def create_temp_hidden_tag(namespace_name, repo_name, expiration): + # was this code: + # model.tag.create_temporary_hidden_tag(repo, repo_image, + # app.config['PUSH_TEMP_TAG_EXPIRATION_SEC']) + pass diff --git a/endpoints/v1/registry.py b/endpoints/v1/registry.py index 0d6f5236b..d30b01743 100644 --- a/endpoints/v1/registry.py +++ b/endpoints/v1/registry.py @@ -27,23 +27,12 @@ from util.registry.torrent import PieceHasher logger = logging.getLogger(__name__) -def image_is_uploading(repo_image): - if repo_image is None: - return False - - return repo_image.storage.uploading - - -def set_uploading_flag(repo_image, is_image_uploading): - repo_image.storage.uploading = is_image_uploading - repo_image.storage.save() - - -def _finish_image(namespace, repo_image): +def _finish_image(namespace, repository, image_id): # Checksum is ok, we remove the marker - set_uploading_flag(repo_image, False) + update_image_uploading(namespace, repository, image_id, False) # Send a job to the work queue to replicate the image layer. + # TODO(jzelinskie): make this not use imagestorage queue_storage_replication(namespace, repo_image.storage) @@ -52,11 +41,9 @@ def require_completion(f): @wraps(f) def wrapper(namespace, repository, *args, **kwargs): image_id = kwargs['image_id'] - repo_image = model.image.get_repo_image_extended(namespace, repository, image_id) - if image_is_uploading(repo_image): + if is_image_uploading(namespace, repository, image_id): abort(400, 'Image %(image_id)s is being uploaded, retry later', - issue='upload-in-progress', image_id=kwargs['image_id']) - + issue='upload-in-progress', image_id=image_id) return f(namespace, repository, *args, **kwargs) return wrapper @@ -96,18 +83,17 @@ def head_image_layer(namespace, repository, image_id, headers): logger.debug('Checking repo permissions') if permission.can() or model.repository.repository_is_public(namespace, repository): - logger.debug('Looking up repo image') - repo_image = model.image.get_repo_image_extended(namespace, repository, image_id) - if not repo_image: - logger.debug('Image not found') + logger.debug('Looking up blob placement locations') + locations = blob_placement_locations_docker_v1(namespace, repository, image_id) + if locations is None: + logger.debug('Could not find any blob placement locations') abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) - extra_headers = {} - # Add the Accept-Ranges header if the storage engine supports resumable # downloads. - if store.get_supports_resumable_downloads(repo_image.storage.locations): + extra_headers = {} + if store.get_supports_resumable_downloads(locations): logger.debug('Storage supports resumable downloads') extra_headers['Accept-Ranges'] = 'bytes' @@ -130,29 +116,23 @@ def get_image_layer(namespace, repository, image_id, headers): logger.debug('Checking repo permissions') if permission.can() or model.repository.repository_is_public(namespace, repository): - logger.debug('Looking up repo image') - repo_image = model.image.get_repo_image_extended(namespace, repository, image_id) - if not repo_image: - logger.debug('Image not found') + logger.debug('Looking up blob placement locations and path') + locations, path = blob_placement_locations_and_path_docker_v1(namespace, repository, image_id) + if not locations or not path: abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) - try: - path = model.storage.get_layer_path(repo_image.storage) logger.debug('Looking up the direct download URL for path: %s', path) - direct_download_url = store.get_direct_download_url(repo_image.storage.locations, path) - + direct_download_url = store.get_direct_download_url(locations, path) if direct_download_url: logger.debug('Returning direct download URL') resp = redirect(direct_download_url) return resp - logger.debug('Streaming layer data') - # Close the database handle here for this process before we send the long download. database.close_db_filter(None) - - return Response(store.stream_read(repo_image.storage.locations, path), headers=headers) + logger.debug('Streaming layer data') + return Response(store.stream_read(locations, path), headers=headers) except (IOError, AttributeError): logger.exception('Image layer data not found') abort(404, 'Image %(image_id)s not found', issue='unknown-image', @@ -172,21 +152,7 @@ def put_image_layer(namespace, repository, image_id): abort(403) logger.debug('Retrieving image') - repo_image = model.image.get_repo_image_extended(namespace, repository, image_id) - try: - logger.debug('Retrieving image data') - uuid = repo_image.storage.uuid - json_data = repo_image.v1_json_metadata - except AttributeError: - logger.exception('Exception when retrieving image data') - abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) - - uuid = repo_image.storage.uuid - layer_path = store.v1_image_layer_path(uuid) - logger.info('Storing layer at v1 path: %s', layer_path) - - if (store.exists(repo_image.storage.locations, layer_path) and not - image_is_uploading(repo_image)): + if storage_exists_docker_v1(namespace, repository, image_id): exact_abort(409, 'Image already exists') logger.debug('Storing layer data') @@ -216,7 +182,8 @@ def put_image_layer(namespace, repository, image_id): sr.add_handler(piece_hasher.update) # Add a handler which computes the checksum. - h, sum_hndlr = checksums.simple_checksum_handler(json_data) + v1_metadata = docker_v1_metadata(namespace, repository, image_id) + h, sum_hndlr = checksums.simple_checksum_handler(v1_metadata.compat_json) sr.add_handler(sum_hndlr) # Add a handler which computes the content checksum only @@ -224,20 +191,19 @@ def put_image_layer(namespace, repository, image_id): sr.add_handler(content_sum_hndlr) # Stream write the data to storage. + locations, path = blob_placement_locations_and_path_docker_v1(namespace, repository, image_id) with database.CloseForLongOperation(app.config): try: - store.stream_write(repo_image.storage.locations, layer_path, sr) + store.stream_write(locations, path, sr) except IOError: logger.exception('Exception when writing image data') abort(520, 'Image %(image_id)s could not be written. Please try again.', image_id=image_id) # Save the size of the image. - updated_storage = model.storage.set_image_storage_metadata(image_id, namespace, repository, - size_info.compressed_size, - size_info.uncompressed_size) - pieces_bytes = piece_hasher.final_piece_hashes() - model.storage.save_torrent_info(updated_storage, app.config['BITTORRENT_PIECE_SIZE'], - pieces_bytes) + update_image_size(namespace, repository, image_id, size_info.compressed_size, size_info.uncompressed_size) + + # Save the BitTorrent pieces. + create_bittorrent_pieces(namespace, repository, image_id, piece_hasher.final_piece_hashes()) # Append the computed checksum. csums = [] @@ -246,29 +212,27 @@ def put_image_layer(namespace, repository, image_id): try: if requires_tarsum: tmp.seek(0) - csums.append(checksums.compute_tarsum(tmp, json_data)) + csums.append(checksums.compute_tarsum(tmp, v1_metadata.compat_json)) tmp.close() - except (IOError, checksums.TarError) as exc: logger.debug('put_image_layer: Error when computing tarsum %s', exc) - if repo_image.v1_checksum is None: + v1_metadata = docker_v1_metadata(namespace, repository, image_id) + if v1_metadata.checksum is None: # We don't have a checksum stored yet, that's fine skipping the check. # Not removing the mark though, image is not downloadable yet. session['checksum'] = csums session['content_checksum'] = 'sha256:{0}'.format(ch.hexdigest()) return make_response('true', 200) - checksum = repo_image.v1_checksum - # We check if the checksums provided matches one the one we computed - if checksum not in csums: + if v1_metadata.checksum not in csums: logger.warning('put_image_layer: Wrong checksum') abort(400, 'Checksum mismatch; ignoring the layer for image %(image_id)s', issue='checksum-mismatch', image_id=image_id) # Mark the image as uploaded. - _finish_image(namespace, repo_image) + _finish_image(namespace, repository, image_id) return make_response('true', 200) @@ -305,24 +269,27 @@ def put_image_checksum(namespace, repository, image_id): issue='missing-checksum-cookie', image_id=image_id) logger.debug('Looking up repo image') - repo_image = model.image.get_repo_image_extended(namespace, repository, image_id) - if not repo_image or not repo_image.storage: + v1_metadata = docker_v1_metadata(namespace_name, repo_name, image_id) + if not v1_metadata: abort(404, 'Image not found: %(image_id)s', issue='unknown-image', image_id=image_id) logger.debug('Looking up repo layer data') - if not repo_image.v1_json_metadata: + if not v1_metadata.compat_json: abort(404, 'Image not found: %(image_id)s', issue='unknown-image', image_id=image_id) logger.debug('Marking image path') - if not image_is_uploading(repo_image): + if not is_image_uploading(namespace, repository, image_id): abort(409, 'Cannot set checksum for image %(image_id)s', issue='image-write-error', image_id=image_id) logger.debug('Storing image and content checksums') + content_checksum = session.get('content_checksum', None) - err = store_checksum(repo_image, checksum, content_checksum) - if err: - abort(400, err) + checksum_parts = checksum.split(':') + if len(checksum_parts) != 2: + abort(400, 'Invalid checksum format') + + store_docker_v1_checksum(namespace, repository, image_id, checksum, content_checksum) if checksum not in session.get('checksum', []): logger.debug('session checksums: %s', session.get('checksum', [])) @@ -332,7 +299,7 @@ def put_image_checksum(namespace, repository, image_id): issue='checksum-mismatch', image_id=image_id) # Mark the image as uploaded. - _finish_image(namespace, repo_image) + _finish_image(namespace, repository, image_id) return make_response('true', 200) @@ -350,18 +317,18 @@ def get_image_json(namespace, repository, image_id, headers): abort(403) logger.debug('Looking up repo image') - repo_image = model.image.get_repo_image_extended(namespace, repository, image_id) - if repo_image is None: + v1_metadata = docker_v1_metadata(namespace_name, repo_name, image_id) + if v1_metadata is None: flask_abort(404) logger.debug('Looking up repo layer size') - size = repo_image.storage.image_size + size = image_size(namespace_name, repo_name, image_id) if size is not None: # Note: X-Docker-Size is optional and we *can* end up with a NULL image_size, # so handle this case rather than failing. headers['X-Docker-Size'] = str(size) - response = make_response(repo_image.v1_json_metadata, 200) + response = make_response(v1_metadata.compat_json, 200) response.headers.extend(headers) return response @@ -378,35 +345,16 @@ def get_image_ancestry(namespace, repository, image_id, headers): if not permission.can() and not model.repository.repository_is_public(namespace, repository): abort(403) - try: - image = model.image.get_image_by_id(namespace, repository, image_id) - except model.InvalidImageException: + ancestry_docker_ids = image_ancestry(namespace, repository, image_id) + if ancestry_docker_ids is None: abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) - parents = model.image.get_parent_images(namespace, repository, image) - - ancestry_docker_ids = [image.docker_image_id] - ancestry_docker_ids.extend([parent.docker_image_id for parent in parents]) - # We can not use jsonify here because we are returning a list not an object response = make_response(json.dumps(ancestry_docker_ids), 200) response.headers.extend(headers) return response -def store_checksum(image_with_storage, checksum, content_checksum): - checksum_parts = checksum.split(':') - if len(checksum_parts) != 2: - return 'Invalid checksum format' - - # We store the checksum - image_with_storage.storage.content_checksum = content_checksum - image_with_storage.storage.save() - - image_with_storage.v1_checksum = checksum - image_with_storage.save() - - @v1_bp.route('/images//json', methods=['PUT']) @process_auth @extract_namespace_repo_from_session @@ -419,8 +367,8 @@ def put_image_json(namespace, repository, image_id): logger.debug('Parsing image JSON') try: - v1_metadata = request.data - data = json.loads(v1_metadata.decode('utf8')) + uploaded_metadata = request.data + data = json.loads(uploaded_metadata.decode('utf8')) except ValueError: pass @@ -432,48 +380,42 @@ def put_image_json(namespace, repository, image_id): abort(400, 'Missing key `id` in JSON for image: %(image_id)s', issue='invalid-request', image_id=image_id) - logger.debug('Looking up repo image') - - repo = model.repository.get_repository(namespace, repository) - if repo is None: - abort(404, 'Repository does not exist: %(namespace)s/%(repository)s', issue='no-repo', - namespace=namespace, repository=repository) - - repo_image = model.image.get_repo_image_extended(namespace, repository, image_id) - if not repo_image: - username = get_authenticated_user() and get_authenticated_user().username - if not username: - username = get_granted_username() - - logger.debug('Image not found, creating image with initiating user context: %s', username) - repo_image = model.image.find_create_or_link_image(image_id, repo, username, {}, - store.preferred_locations[0]) - - # Create a temporary tag to prevent this image from getting garbage collected while the push - # is in progress. - model.tag.create_temporary_hidden_tag(repo, repo_image, - app.config['PUSH_TEMP_TAG_EXPIRATION_SEC']) - if image_id != data['id']: abort(400, 'JSON data contains invalid id for image: %(image_id)s', issue='invalid-request', image_id=image_id) - parent_id = data.get('parent', None) + logger.debug('Looking up repo image') - parent_image = None + if not repository_exists(namespace, repository): + abort(404, 'Repository does not exist: %(namespace)s/%(repository)s', issue='no-repo', + namespace=namespace, repository=repository) + + v1_metadata = docker_v1_metadata(namespace, repository, image_id) + if v1_metadata is None: + username = get_authenticated_user() and get_authenticated_user().username + if not username: + username = get_granted_username() + + logger.debug('Image not found, creating or linking image with initiating user context: %s', username) + create_or_link_image(username, repository, image_id, store.preferred_locations[0]) + v1_metadata = docker_v1_metadata(namespace, repository, image_id) + + # Create a temporary tag to prevent this image from getting garbage collected while the push + # is in progress. + create_temp_hidden_tag(namespace_name, repo_name, app.config['PUSH_TEMP_TAG_EXPIRATION_SEC']) + + parent_id = data.get('parent', None) if parent_id: logger.debug('Looking up parent image') - parent_image = model.image.get_repo_image_extended(namespace, repository, parent_id) - - if not parent_image or parent_image.storage.uploading: + if docker_v1_metadata(namespace, repository, parent_id) is None: abort(400, 'Image %(image_id)s depends on non existing parent image %(parent_id)s', issue='invalid-request', image_id=image_id, parent_id=parent_id) logger.debug('Checking if image already exists') - if repo_image.v1_json_metadata and not image_is_uploading(repo_image): + if v1_metadata and not is_image_uploading(namespace, repository, image_id): exact_abort(409, 'Image already exists') - set_uploading_flag(repo_image, True) + update_image_uploading(namespace, repository, image_id, True) # If we reach that point, it means that this is a new image or a retry # on a failed push, save the metadata @@ -481,8 +423,8 @@ def put_image_json(namespace, repository, image_id): command = json.dumps(command_list) if command_list else None logger.debug('Setting image metadata') - model.image.set_image_metadata(image_id, namespace, repository, data.get('created'), - data.get('comment'), command, v1_metadata, parent_image) + update_docker_v1_metadata(namespace, repository, image_id, data.get('created'), + data.get('comment'), command, uploaded_metadata, parent_image) return make_response('true', 200) From 8435c254c39009787c11e0984a090f36a1c02f44 Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Tue, 12 Jul 2016 13:48:44 -0400 Subject: [PATCH 05/34] finish v1 registry refactor --- data/model/{v1/__init__.py => v1.py} | 28 +++++++++++-- endpoints/v1/registry.py | 60 ++++++++++++++-------------- endpoints/v1/tag.py | 19 ++++----- 3 files changed, 65 insertions(+), 42 deletions(-) rename data/model/{v1/__init__.py => v1.py} (80%) diff --git a/data/model/v1/__init__.py b/data/model/v1.py similarity index 80% rename from data/model/v1/__init__.py rename to data/model/v1.py index fb156bd00..fd934b3df 100644 --- a/data/model/v1/__init__.py +++ b/data/model/v1.py @@ -5,14 +5,14 @@ from util.morecollections import AttrDict # TODO(jzelinskie): implement all of these methods using both legacy and new models. -def blob_placement_locations_docker_v1(namespace_name, repo_name, image_id): +def placement_locations_docker_v1(namespace_name, repo_name, image_id): repo_image = model.image.get_repo_image_extended(namespace_name, repo_name, image_id) if repo_image is None: return None return repo_image.storage.locations -def blob_placement_locations_and_path_docker_v1(namespace_name, repo_name, image_id): +def placement_locations_and_path_docker_v1(namespace_name, repo_name, image_id): repo_image = model.image.get_repo_image_extended(namespace_name, repo_name, image_id) if not repo_image: return None, None @@ -73,7 +73,7 @@ def update_image_uploading(namespace_name, repo_name, image_id, is_uploading): pass -def update_image_size(namespace_name, repo_name, image_id, size, uncompressed_size): +def update_image_sizes(namespace_name, repo_name, image_id, size, uncompressed_size): model.storage.set_image_storage_metadata( image_id, namespace_name, @@ -83,7 +83,7 @@ def update_image_size(namespace_name, repo_name, image_id, size, uncompressed_si ) -def image_size(namespace_name, repo_name, image_id): +def get_image_size(namespace_name, repo_name, image_id): return repo_image.storage.image_size @@ -124,3 +124,23 @@ def create_temp_hidden_tag(namespace_name, repo_name, expiration): # model.tag.create_temporary_hidden_tag(repo, repo_image, # app.config['PUSH_TEMP_TAG_EXPIRATION_SEC']) pass + + +def list_tags(namespace_name, repo_name): + return model.tag.list_repository_tags(namespace_name, repo_name) + + +def create_or_update_tag(namespace_name, repo_name, image_id, tag_name): + model.tag.create_or_update_tag(namespace_name, repo_name, tag_name, image_id) + + +def find_image_id_by_tag(namespace_name, repo_name, tag_name): + try: + tag_image = model.tag.get_tag_image(namespace_name, repo_name, tag_name) + except model.DataModelException: + return None + return tag_image.docker_image_id + + +def delete_tag(namespace_name, repo_name, tag_name): + model.tag.delete_tag(namespace_name, repo_name, tag_name) diff --git a/endpoints/v1/registry.py b/endpoints/v1/registry.py index d30b01743..99c2e8f5e 100644 --- a/endpoints/v1/registry.py +++ b/endpoints/v1/registry.py @@ -14,6 +14,7 @@ from auth.permissions import (ReadRepositoryPermission, ModifyRepositoryPermission) from auth.registry_jwt_auth import get_granted_username from data import model, database +from data.model import v1 from digest import checksums from endpoints.v1 import v1_bp from endpoints.decorators import anon_protect @@ -29,7 +30,7 @@ logger = logging.getLogger(__name__) def _finish_image(namespace, repository, image_id): # Checksum is ok, we remove the marker - update_image_uploading(namespace, repository, image_id, False) + v1.update_image_uploading(namespace, repository, image_id, False) # Send a job to the work queue to replicate the image layer. # TODO(jzelinskie): make this not use imagestorage @@ -41,7 +42,7 @@ def require_completion(f): @wraps(f) def wrapper(namespace, repository, *args, **kwargs): image_id = kwargs['image_id'] - if is_image_uploading(namespace, repository, image_id): + if v1.is_image_uploading(namespace, repository, image_id): abort(400, 'Image %(image_id)s is being uploaded, retry later', issue='upload-in-progress', image_id=image_id) return f(namespace, repository, *args, **kwargs) @@ -83,8 +84,8 @@ def head_image_layer(namespace, repository, image_id, headers): logger.debug('Checking repo permissions') if permission.can() or model.repository.repository_is_public(namespace, repository): - logger.debug('Looking up blob placement locations') - locations = blob_placement_locations_docker_v1(namespace, repository, image_id) + logger.debug('Looking up placement locations') + locations = v1.placement_locations_docker_v1(namespace, repository, image_id) if locations is None: logger.debug('Could not find any blob placement locations') abort(404, 'Image %(image_id)s not found', issue='unknown-image', @@ -116,8 +117,10 @@ def get_image_layer(namespace, repository, image_id, headers): logger.debug('Checking repo permissions') if permission.can() or model.repository.repository_is_public(namespace, repository): - logger.debug('Looking up blob placement locations and path') - locations, path = blob_placement_locations_and_path_docker_v1(namespace, repository, image_id) + logger.debug('Looking up placement locations and path') + locations, path = v1.placement_locations_and_path_docker_v1(namespace, + repository, + image_id) if not locations or not path: abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) @@ -152,7 +155,7 @@ def put_image_layer(namespace, repository, image_id): abort(403) logger.debug('Retrieving image') - if storage_exists_docker_v1(namespace, repository, image_id): + if v1.storage_exists(namespace, repository, image_id): exact_abort(409, 'Image already exists') logger.debug('Storing layer data') @@ -182,7 +185,7 @@ def put_image_layer(namespace, repository, image_id): sr.add_handler(piece_hasher.update) # Add a handler which computes the checksum. - v1_metadata = docker_v1_metadata(namespace, repository, image_id) + v1_metadata = v1.docker_v1_metadata(namespace, repository, image_id) h, sum_hndlr = checksums.simple_checksum_handler(v1_metadata.compat_json) sr.add_handler(sum_hndlr) @@ -191,7 +194,7 @@ def put_image_layer(namespace, repository, image_id): sr.add_handler(content_sum_hndlr) # Stream write the data to storage. - locations, path = blob_placement_locations_and_path_docker_v1(namespace, repository, image_id) + locations, path = v1.placement_locations_and_path_docker_v1(namespace, repository, image_id) with database.CloseForLongOperation(app.config): try: store.stream_write(locations, path, sr) @@ -200,10 +203,11 @@ def put_image_layer(namespace, repository, image_id): abort(520, 'Image %(image_id)s could not be written. Please try again.', image_id=image_id) # Save the size of the image. - update_image_size(namespace, repository, image_id, size_info.compressed_size, size_info.uncompressed_size) + v1.update_image_sizes(namespace, repository, image_id, size_info.compressed_size, + size_info.uncompressed_size) # Save the BitTorrent pieces. - create_bittorrent_pieces(namespace, repository, image_id, piece_hasher.final_piece_hashes()) + v1.create_bittorrent_pieces(namespace, repository, image_id, piece_hasher.final_piece_hashes()) # Append the computed checksum. csums = [] @@ -217,7 +221,6 @@ def put_image_layer(namespace, repository, image_id): except (IOError, checksums.TarError) as exc: logger.debug('put_image_layer: Error when computing tarsum %s', exc) - v1_metadata = docker_v1_metadata(namespace, repository, image_id) if v1_metadata.checksum is None: # We don't have a checksum stored yet, that's fine skipping the check. # Not removing the mark though, image is not downloadable yet. @@ -269,7 +272,7 @@ def put_image_checksum(namespace, repository, image_id): issue='missing-checksum-cookie', image_id=image_id) logger.debug('Looking up repo image') - v1_metadata = docker_v1_metadata(namespace_name, repo_name, image_id) + v1_metadata = v1.docker_v1_metadata(namespace, repository, image_id) if not v1_metadata: abort(404, 'Image not found: %(image_id)s', issue='unknown-image', image_id=image_id) @@ -278,7 +281,7 @@ def put_image_checksum(namespace, repository, image_id): abort(404, 'Image not found: %(image_id)s', issue='unknown-image', image_id=image_id) logger.debug('Marking image path') - if not is_image_uploading(namespace, repository, image_id): + if not v1.is_image_uploading(namespace, repository, image_id): abort(409, 'Cannot set checksum for image %(image_id)s', issue='image-write-error', image_id=image_id) @@ -289,7 +292,7 @@ def put_image_checksum(namespace, repository, image_id): if len(checksum_parts) != 2: abort(400, 'Invalid checksum format') - store_docker_v1_checksum(namespace, repository, image_id, checksum, content_checksum) + v1.store_docker_v1_checksum(namespace, repository, image_id, checksum, content_checksum) if checksum not in session.get('checksum', []): logger.debug('session checksums: %s', session.get('checksum', [])) @@ -317,12 +320,12 @@ def get_image_json(namespace, repository, image_id, headers): abort(403) logger.debug('Looking up repo image') - v1_metadata = docker_v1_metadata(namespace_name, repo_name, image_id) + v1_metadata = v1.docker_v1_metadata(namespace, repository, image_id) if v1_metadata is None: flask_abort(404) logger.debug('Looking up repo layer size') - size = image_size(namespace_name, repo_name, image_id) + size = v1.get_image_size(namespace, repository, image_id) if size is not None: # Note: X-Docker-Size is optional and we *can* end up with a NULL image_size, # so handle this case rather than failing. @@ -345,7 +348,7 @@ def get_image_ancestry(namespace, repository, image_id, headers): if not permission.can() and not model.repository.repository_is_public(namespace, repository): abort(403) - ancestry_docker_ids = image_ancestry(namespace, repository, image_id) + ancestry_docker_ids = v1.image_ancestry(namespace, repository, image_id) if ancestry_docker_ids is None: abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) @@ -386,36 +389,36 @@ def put_image_json(namespace, repository, image_id): logger.debug('Looking up repo image') - if not repository_exists(namespace, repository): + if not v1.repository_exists(namespace, repository): abort(404, 'Repository does not exist: %(namespace)s/%(repository)s', issue='no-repo', namespace=namespace, repository=repository) - v1_metadata = docker_v1_metadata(namespace, repository, image_id) + v1_metadata = v1.docker_v1_metadata(namespace, repository, image_id) if v1_metadata is None: username = get_authenticated_user() and get_authenticated_user().username if not username: username = get_granted_username() logger.debug('Image not found, creating or linking image with initiating user context: %s', username) - create_or_link_image(username, repository, image_id, store.preferred_locations[0]) - v1_metadata = docker_v1_metadata(namespace, repository, image_id) + v1.create_or_link_image(username, repository, image_id, store.preferred_locations[0]) + v1_metadata = v1.docker_v1_metadata(namespace, repository, image_id) # Create a temporary tag to prevent this image from getting garbage collected while the push # is in progress. - create_temp_hidden_tag(namespace_name, repo_name, app.config['PUSH_TEMP_TAG_EXPIRATION_SEC']) + v1.create_temp_hidden_tag(namespace, repository, app.config['PUSH_TEMP_TAG_EXPIRATION_SEC']) parent_id = data.get('parent', None) if parent_id: logger.debug('Looking up parent image') - if docker_v1_metadata(namespace, repository, parent_id) is None: + if v1.docker_v1_metadata(namespace, repository, parent_id) is None: abort(400, 'Image %(image_id)s depends on non existing parent image %(parent_id)s', issue='invalid-request', image_id=image_id, parent_id=parent_id) logger.debug('Checking if image already exists') - if v1_metadata and not is_image_uploading(namespace, repository, image_id): + if v1_metadata and not v1.is_image_uploading(namespace, repository, image_id): exact_abort(409, 'Image already exists') - update_image_uploading(namespace, repository, image_id, True) + v1.update_image_uploading(namespace, repository, image_id, True) # If we reach that point, it means that this is a new image or a retry # on a failed push, save the metadata @@ -423,8 +426,7 @@ def put_image_json(namespace, repository, image_id): command = json.dumps(command_list) if command_list else None logger.debug('Setting image metadata') - update_docker_v1_metadata(namespace, repository, image_id, data.get('created'), - data.get('comment'), command, uploaded_metadata, parent_image) + v1.update_docker_v1_metadata(namespace, repository, image_id, data.get('created'), + data.get('comment'), command, uploaded_metadata, parent_id) return make_response('true', 200) - diff --git a/endpoints/v1/tag.py b/endpoints/v1/tag.py index da730748d..b7830215e 100644 --- a/endpoints/v1/tag.py +++ b/endpoints/v1/tag.py @@ -3,11 +3,13 @@ import json from flask import abort, request, jsonify, make_response, session + from util.names import TAG_ERROR, TAG_REGEX from auth.auth import process_auth from auth.permissions import (ReadRepositoryPermission, ModifyRepositoryPermission) from data import model +from data.model import v1 from endpoints.common import parse_repository_name from endpoints.decorators import anon_protect from endpoints.v1 import v1_bp @@ -25,7 +27,7 @@ def get_tags(namespace_name, repo_name): permission = ReadRepositoryPermission(namespace_name, repo_name) if permission.can() or model.repository.repository_is_public(namespace_name, repo_name): - tags = model.tag.list_repository_tags(namespace_name, repo_name) + tags = v1.list_tags(namespace_name, repo_name) tag_map = {tag.name: tag.image.docker_image_id for tag in tags} return jsonify(tag_map) @@ -40,12 +42,11 @@ def get_tag(namespace_name, repo_name, tag): permission = ReadRepositoryPermission(namespace_name, repo_name) if permission.can() or model.repository.repository_is_public(namespace_name, repo_name): - try: - tag_image = model.tag.get_tag_image(namespace_name, repo_name, tag) - except model.DataModelException: + image_id = v1.find_image_id_by_tag(namespace_name, repo_name, tag) + if image_id is None: abort(404) - resp = make_response('"%s"' % tag_image.docker_image_id) + resp = make_response('"%s"' % image_id) resp.headers['Content-Type'] = 'application/json' return resp @@ -63,14 +64,14 @@ def put_tag(namespace_name, repo_name, tag): if not TAG_REGEX.match(tag): abort(400, TAG_ERROR) - docker_image_id = json.loads(request.data) - model.tag.create_or_update_tag(namespace_name, repo_name, tag, docker_image_id) + image_id = json.loads(request.data) + v1.create_or_update_tag(namespace_name, repo_name, image_id, tag) # Store the updated tag. if 'pushed_tags' not in session: session['pushed_tags'] = {} - session['pushed_tags'][tag] = docker_image_id + session['pushed_tags'][tag] = image_id return make_response('Created', 200) @@ -85,7 +86,7 @@ def delete_tag(namespace_name, repo_name, tag): permission = ModifyRepositoryPermission(namespace_name, repo_name) if permission.can(): - model.tag.delete_tag(namespace_name, repo_name, tag) + v1.delete_tag(namespace_name, repo_name, tag) track_and_log('delete_tag', model.repository.get_repository(namespace_name, repo_name), tag=tag) return make_response('Deleted', 200) From 94d71f21662c9481a830adbdce22176c7bcb6267 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 12 Jul 2016 15:09:31 -0400 Subject: [PATCH 06/34] Fix model to actually initialize --- data/database.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/data/database.py b/data/database.py index 24598f29c..35dd3e66a 100644 --- a/data/database.py +++ b/data/database.py @@ -1006,7 +1006,7 @@ class BlobPlacementLocation(BaseModel): class BlobPlacementLocationPreference(BaseModel): """ BlobPlacementLocationPreference is a location to which a user's data will be replicated. """ - user = QuayUserField(index=True, allow_robots=False) + user = QuayUserField(index=True, allows_robots=False) location = ForeignKeyField(BlobPlacementLocation) @@ -1124,9 +1124,9 @@ class DerivedImage(BaseModel): source_manifest = ForeignKeyField(Manifest) derived_manifest_json = JSONField() media_type = ForeignKeyField(MediaType) - blob = ForeignKeyField(Blob) + blob = ForeignKeyField(Blob, related_name='blob') uniqueness_hash = CharField(index=True, unique=True) - signature_blob = ForeignKeyField(Blob, null=True) + signature_blob = ForeignKeyField(Blob, null=True, related_name='signature_blob') class Meta: database = db @@ -1154,7 +1154,7 @@ class Tag(BaseModel): (('repository', 'name'), False), # This unique index prevents deadlocks when concurrently moving and deleting tags - (('repository', 'name', 'lifetime_end_ts'), True), + (('repository', 'name', 'lifetime_end'), True), ) From ea18790dfedc33a25f1c42c1c307d29287800a94 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 12 Jul 2016 16:09:13 -0400 Subject: [PATCH 07/34] Get V1 registry code working with new model methods --- data/model/image.py | 11 +++ data/model/v1.py | 162 ++++++++++++++++++++++++++------------- endpoints/v1/registry.py | 12 +-- 3 files changed, 124 insertions(+), 61 deletions(-) diff --git a/data/model/image.py b/data/model/image.py index 031f4a660..b636c64fe 100644 --- a/data/model/image.py +++ b/data/model/image.py @@ -99,6 +99,17 @@ def get_repo_image_extended(namespace_name, repository_name, docker_image_id): return images[0] +def get_repo_image_and_storage(namespace_name, repository_name, docker_image_id): + def limit_to_image_id(query): + return query.where(Image.docker_image_id == docker_image_id) + + images = _get_repository_images_and_storages(namespace_name, repository_name, limit_to_image_id) + if not images: + return None + + return images[0] + + def _get_repository_images_and_storages(namespace_name, repository_name, query_modifier): query = (Image .select(Image, ImageStorage) diff --git a/data/model/v1.py b/data/model/v1.py index fd934b3df..3dfa1e123 100644 --- a/data/model/v1.py +++ b/data/model/v1.py @@ -1,26 +1,36 @@ from app import app, storage as store from data import model +from data.model import db_transaction from util.morecollections import AttrDict - -# TODO(jzelinskie): implement all of these methods using both legacy and new models. - def placement_locations_docker_v1(namespace_name, repo_name, image_id): - repo_image = model.image.get_repo_image_extended(namespace_name, repo_name, image_id) - if repo_image is None: + """ Returns all the placements for the image with the given V1 Docker ID, found under the + given repository or None if no image was found. + """ + repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) + if repo_image is None or repo_image.storage is None: return None + return repo_image.storage.locations def placement_locations_and_path_docker_v1(namespace_name, repo_name, image_id): + """ Returns a tuple of the placements and storage path location for the image with the + given V1 Docker ID, found under the given repository or None if no image was found. + """ repo_image = model.image.get_repo_image_extended(namespace_name, repo_name, image_id) - if not repo_image: + if not repo_image or repo_image.storage is None: return None, None - return model.storage.get_layer_path(repo_image.storage), repo_image.storage.locations + + return repo_image.storage.locations, model.storage.get_layer_path(repo_image.storage) def docker_v1_metadata(namespace_name, repo_name, image_id): - if not repo_image: + """ Returns various pieces of metadata associated with an image with the given V1 Docker ID, + including the checksum and its V1 JSON metadata. + """ + repo_image = model.image.get_repo_image(namespace_name, repo_name, image_id) + if repo_image is None: return None return AttrDict({ @@ -34,113 +44,155 @@ def docker_v1_metadata(namespace_name, repo_name, image_id): def update_docker_v1_metadata(namespace_name, repo_name, image_id, created_date_str, comment, command, compat_json, parent_image_id=None): - # Old implementation: - # parent_image = get_repo_extended(namespace_name, repo_name, parent_image_id) - # model.image.set_image_metadata(image_id, namespace_name, repo_name, create_date_str, comment, command, compat_json, parent_image) - pass + """ Updates various pieces of V1 metadata associated with a particular image. """ + parent_image = None + if parent_image_id is not None: + parent_image = model.image.get_repo_image(namespace_name, repo_name, parent_image_id) + + model.image.set_image_metadata(image_id, namespace_name, repo_name, created_date_str, comment, + command, compat_json, parent=parent_image) def storage_exists(namespace_name, repo_name, image_id): - repo_image = model.image.get_repo_image_extended(namespace_name, repo_name, image_id) - try: - layer_path = store.v1_image_layer_path(repo_image.storage.uuid) - except AttributeError: + """ Returns whether storage already exists for the image with the V1 Docker ID under the + given repository. + """ + repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) + if repo_image is None or repo_image.storage is None: return False - if (store.exists(repo_image.storage.locations, layer_path) and not - repo_image.storage.uploading): - return True - return False + if repo_image.storage.uploading: + return False + + layer_path = model.storage.get_layer_path(repo_image.storage) + return store.exists(repo_image.storage.locations, layer_path) -def store_docker_v1_checksum(namespace_name, repo_name, image_id, checksum, content_checksum): - ## Old implementation: - # UPDATE repo_image.storage.content_checksum = content_checksum - # UPDATE repo_image.v1_checksum = checksum - pass +def store_docker_v1_checksums(namespace_name, repo_name, image_id, checksum, content_checksum): + """ Stores the various V1 checksums for the image with the V1 Docker ID. """ + repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) + if repo_image is None or repo_image.storage is None: + return + + with db_transaction(): + repo_image.storage.content_checksum = content_checksum + repo_image.v1_checksum = checksum + + repo_image.storage.save() + repo_image.save() def is_image_uploading(namespace_name, repo_name, image_id): - repo_image = model.image.get_repo_image_extended(namespace_name, repo_name, image_id) - if repo_image is None: + """ Returns whether the image with the V1 Docker ID is currently marked as uploading. """ + repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) + if repo_image is None or repo_image.storage is None: return False + return repo_image.storage.uploading def update_image_uploading(namespace_name, repo_name, image_id, is_uploading): - ## Old implementation: - # UPDATE repo_image.storage.uploading = is_uploading - pass + """ Marks the image with the V1 Docker ID with the given uploading status. """ + repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) + if repo_image is None or repo_image.storage is None: + return + + repo_image.storage.uploading = is_uploading + repo_image.storage.save() + return repo_image.storage def update_image_sizes(namespace_name, repo_name, image_id, size, uncompressed_size): - model.storage.set_image_storage_metadata( - image_id, - namespace_name, - repo_name, - size, - uncompressed_size, - ) + """ Updates the sizing information for the image with the given V1 Docker ID. """ + model.storage.set_image_storage_metadata(image_id, namespace_name, repo_name, size, + uncompressed_size) def get_image_size(namespace_name, repo_name, image_id): + """ Returns the wire size of the image with the given Docker V1 ID. """ + repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) + if repo_image is None or repo_image.storage is None: + return None + return repo_image.storage.image_size def create_bittorrent_pieces(namespace_name, repo_name, image_id, pieces_bytes): - repo_image = model.image.get_repo_image_extended(namespace_name, repo_name, image_id) - try: - model.storage.save_torrent_info( - repo_image.storage, - app.config['BITTORRENT_PIECE_SIZE'], - pieces_bytes - ) - except AttributeError: - pass + """ Saves the bittorrent piece hashes for the image with the given Docker V1 ID. """ + repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) + if repo_image is None or repo_image.storage is None: + return + + model.storage.save_torrent_info(repo_image.storage, app.config['BITTORRENT_PIECE_SIZE'], + pieces_bytes) def image_ancestry(namespace_name, repo_name, image_id): + """ Returns a list containing the full ancestry of Docker V1 IDs, in order, for the image with + the givne Docker V1 ID. + """ try: - image = model.image.get_image_by_id(namespace, repository, image_id) + image = model.image.get_image_by_id(namespace_name, repo_name, image_id) except model.InvalidImageException: return None - parents = model.image.get_parent_images(namespace, repository, image) + parents = model.image.get_parent_images(namespace_name, repo_name, image) ancestry_docker_ids = [image.docker_image_id] ancestry_docker_ids.extend([parent.docker_image_id for parent in parents]) + return ancestry_docker_ids def repository_exists(namespace_name, repo_name): + """ Returns whether the repository with the given name and namespace exists. """ repo = model.repository.get_repository(namespace_name, repo_name) return repo is not None -def create_or_link_image(username, repo_name, image_id, storage_location): - pass +def create_or_link_image(username, namespace_name, repo_name, image_id, storage_location): + """ Adds the given image to the given repository, by either linking to an existing image + visible to the user with the given username, or creating a new one if no existing image + matches. + """ + repo = model.repository.get_repository(namespace_name, repo_name) + model.image.find_create_or_link_image(image_id, repo, username, {}, storage_location) -def create_temp_hidden_tag(namespace_name, repo_name, expiration): - # was this code: - # model.tag.create_temporary_hidden_tag(repo, repo_image, - # app.config['PUSH_TEMP_TAG_EXPIRATION_SEC']) - pass +def create_temp_hidden_tag(namespace_name, repo_name, image_id, expiration): + """ Creates a hidden tag under the matching namespace pointing to the image with the given V1 + Docker ID. + """ + repo_image = model.image.get_repo_image(namespace_name, repo_name, image_id) + if repo_image is None: + return + + repo = repo_image.repository + model.tag.create_temporary_hidden_tag(repo, repo_image, expiration) def list_tags(namespace_name, repo_name): + """ Returns all the tags defined in the repository with the given namespace and name. """ return model.tag.list_repository_tags(namespace_name, repo_name) def create_or_update_tag(namespace_name, repo_name, image_id, tag_name): + """ Creates or updates a tag under the matching repository to point to the image with the given + Docker V1 ID. + """ model.tag.create_or_update_tag(namespace_name, repo_name, tag_name, image_id) def find_image_id_by_tag(namespace_name, repo_name, tag_name): + """ Returns the Docker V1 image ID for the HEAD image for the tag with the given name under + the matching repository, or None if none. + """ try: tag_image = model.tag.get_tag_image(namespace_name, repo_name, tag_name) except model.DataModelException: return None + return tag_image.docker_image_id def delete_tag(namespace_name, repo_name, tag_name): + """ Deletes the given tag from the given repository. """ model.tag.delete_tag(namespace_name, repo_name, tag_name) diff --git a/endpoints/v1/registry.py b/endpoints/v1/registry.py index 99c2e8f5e..6132c44b8 100644 --- a/endpoints/v1/registry.py +++ b/endpoints/v1/registry.py @@ -30,11 +30,10 @@ logger = logging.getLogger(__name__) def _finish_image(namespace, repository, image_id): # Checksum is ok, we remove the marker - v1.update_image_uploading(namespace, repository, image_id, False) + blob_ref = v1.update_image_uploading(namespace, repository, image_id, False) # Send a job to the work queue to replicate the image layer. - # TODO(jzelinskie): make this not use imagestorage - queue_storage_replication(namespace, repo_image.storage) + queue_storage_replication(namespace, blob_ref) def require_completion(f): @@ -292,7 +291,7 @@ def put_image_checksum(namespace, repository, image_id): if len(checksum_parts) != 2: abort(400, 'Invalid checksum format') - v1.store_docker_v1_checksum(namespace, repository, image_id, checksum, content_checksum) + v1.store_docker_v1_checksums(namespace, repository, image_id, checksum, content_checksum) if checksum not in session.get('checksum', []): logger.debug('session checksums: %s', session.get('checksum', [])) @@ -400,12 +399,13 @@ def put_image_json(namespace, repository, image_id): username = get_granted_username() logger.debug('Image not found, creating or linking image with initiating user context: %s', username) - v1.create_or_link_image(username, repository, image_id, store.preferred_locations[0]) + v1.create_or_link_image(username, namespace, repository, image_id, store.preferred_locations[0]) v1_metadata = v1.docker_v1_metadata(namespace, repository, image_id) # Create a temporary tag to prevent this image from getting garbage collected while the push # is in progress. - v1.create_temp_hidden_tag(namespace, repository, app.config['PUSH_TEMP_TAG_EXPIRATION_SEC']) + v1.create_temp_hidden_tag(namespace, repository, image_id, + app.config['PUSH_TEMP_TAG_EXPIRATION_SEC']) parent_id = data.get('parent', None) if parent_id: From a5502b54f8c914bf4abbc87cd567c4063648bb69 Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Mon, 25 Jul 2016 18:55:10 -0400 Subject: [PATCH 08/34] trackhelper: use data.types.Repository --- endpoints/trackhelper.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/endpoints/trackhelper.py b/endpoints/trackhelper.py index 1905ac86d..f1bc708e5 100644 --- a/endpoints/trackhelper.py +++ b/endpoints/trackhelper.py @@ -3,21 +3,22 @@ import random from urlparse import urlparse -from app import analytics, app, userevents -from data import model from flask import request + +from app import analytics, userevents +from data import model from auth.registry_jwt_auth import get_granted_entity from auth.auth_context import (get_authenticated_user, get_validated_token, get_validated_oauth_token) logger = logging.getLogger(__name__) -def track_and_log(event_name, repo, analytics_name=None, analytics_sample=1, **kwargs): - repository = repo.name - namespace = repo.namespace_user.username +def track_and_log(event_name, repo_obj, analytics_name=None, analytics_sample=1, **kwargs): + repo_name = repo_obj.name + namespace_name = repo_obj.namespace_name, metadata = { - 'repo': repository, - 'namespace': namespace, + 'repo': repo_name, + 'namespace': namespace_name, } metadata.update(kwargs) @@ -57,7 +58,7 @@ def track_and_log(event_name, repo, analytics_name=None, analytics_sample=1, **k request_parsed = urlparse(request.url_root) extra_params = { - 'repository': '%s/%s' % (namespace, repository), + 'repository': '%s/%s' % (namespace_name, repo_name), 'user-agent': request.user_agent.string, 'hostname': request_parsed.hostname, } @@ -68,8 +69,8 @@ def track_and_log(event_name, repo, analytics_name=None, analytics_sample=1, **k logger.debug('Publishing %s to the user events system', event_name) user_event_data = { 'action': event_name, - 'repository': repository, - 'namespace': namespace + 'repository': repo_name, + 'namespace': namespace_name, } event = userevents.get_event(authenticated_user.username) @@ -84,7 +85,6 @@ def track_and_log(event_name, repo, analytics_name=None, analytics_sample=1, **k # Log the action to the database. logger.debug('Logging the %s to logs system', event_name) - model.log.log_action(event_name, namespace, performer=authenticated_user, ip=request.remote_addr, - metadata=metadata, repository=repo) - + model.log.log_action(event_name, namespace_name, performer=authenticated_user, + ip=request.remote_addr, metadata=metadata, repository=repo_obj) logger.debug('Track and log of %s complete', event_name) From 5b630ebdb0c720291c9af2d08cb5afe6f1341346 Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Mon, 25 Jul 2016 18:56:25 -0400 Subject: [PATCH 09/34] v2/manifest: refactor to use types --- data/model/v2.py | 102 ++++++++ data/types.py | 346 ++++++++++++++++++++++++++ endpoints/v2/manifest.py | 513 ++++++++------------------------------- 3 files changed, 553 insertions(+), 408 deletions(-) create mode 100644 data/model/v2.py create mode 100644 data/types.py diff --git a/data/model/v2.py b/data/model/v2.py new file mode 100644 index 000000000..b677b462e --- /dev/null +++ b/data/model/v2.py @@ -0,0 +1,102 @@ +from data.types import ( + Repository, + Tag, + ManifestJSON, + DockerV1Metadata, +) + +def get_repository(namespace_name, repo_name): + repo = model.repository.get_repository(namespace_name, repo_name) + if repo is None: + return None + + return Repository( + id=repo.id, + name=repo.name, + namespace_name=repo.namespace_user.username, + ) + + +def get_active_tag(namespace_name, repo_name, tag_name): + try: + return model.tag.get_active_tag(namespace_name, repo_name, tag_name) + except RepositoryTag.DoesNotExist: + return None + + +def get_manifest_by_tag(namespace_name, repo_name, tag_name): + try: + manifest = model.tag.load_tag_manifest(namespace_name, repo_name, manifest_ref) + return ManifestJSON(digest=digest, json=manifest.json_data) + except model.InvalidManifestException: + return None + + +def get_manifest_by_digest(namespace_name, repo_name, digest): + try: + manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, manifest_ref) + return ManifestJSON(digest=digest, json=manifest.json_data) + except model.InvalidManifestException: + return None + + +def get_tag_by_manifest_digest(namespace_name, repo_name, digest): + return Tag() + + +def delete_tag(namespace_name, repo_name, tag_name): + model.tag.delete_tag(namespace_name, repo_name, tag.name) + return True + + +def docker_v1_metadata_by_tag(namespace_name, repo_name, tag_name): + if not repo_image: + return None + + return DockerV1Metadata( + namespace_name=namespace_name, + repo_name=repo_name, + image_id=image_id, + checksum=repo_image.v1_checksum, + content_checksum=repo_image.content_checksum, + compat_json=repo_image.v1_json_metadata, + ) + + +def docker_v1_metadata_by_image_id(namespace_name, repo_name, image_ids): + images_query = model.image.lookup_repository_images(repo, all_image_ids) + return [DockerV1Metadata( + namespace_name=namespace_name, + repo_name=repo_name, + image_id=image.docker_image_id, + checksum=image.v1_checksum, + content_checksum=image.content_checksum, + compat_json=image.v1_json_metadata, + ) for image in images_query] + + +def get_parents_docker_v1_metadata(namespace_name, repo_name, image_id): + # Old implementation: + # parents = model.image.get_parent_images(namespace_name, repo_name, image) + + # desired: + # return a list of the AttrDict in docker_v1_metadata + return [] + + +def create_manifest_and_update_tag(namespace_name, repo_name, tag_name, manifest_digest, manifest_bytes): + try: + model.tag.associate_generated_tag_manifest(namespace_name, repo_name, tag_name, + manifest.digest, manifest.bytes) + except IntegrityError: + # It's already there! + pass + + +def synthesize_v1_image(repo, storage, image_id, created, comment, command, compat_json, parent_image_id): + model.image.synthesize_v1_image(repo, storage, image_id, created, comment, command, compat_json, parent_image_id) + + +def save_manifest(namespace_name, repo_name, tag_name, leaf_layer_id, manifest_digest, manifest_bytes): + model.tag.store_tag_manifest(namespace_name, repo_name, tag_name, leaf_layer_id, manifest_digest, + manifest_bytes) diff --git a/data/types.py b/data/types.py new file mode 100644 index 000000000..f734f3506 --- /dev/null +++ b/data/types.py @@ -0,0 +1,346 @@ +import json +import hashlib +import logging + +from collections import namedtuple, OrderedDict +from datetime import datetime + +from jwkest.jws import SIGNER_ALGS, keyrep +from jwt.utils import base64url_encode, base64url_decode + +from digest import digest_tools + + +logger = logging.getLogger(__name__) + + +DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+prettyjws' +DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v2+json' +DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.list.v2+json' + +DOCKER_SCHEMA2_CONTENT_TYPES = [DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, + DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE] + + +# These are used to extract backwards compatiblity data from Docker Manifest Schema 1 +ExtractedLayerMetadata = namedtuple( + 'ExtractedLayerMetadata', + ['digest', 'v1_metadata', 'v1_metadata_str'] +) +ExtractedDockerV1Metadata = namedtuple( + 'ExtractedDockerV1Metadata', + ['image_id', 'parent_image_id', 'created', 'comment', 'command'] +) + + +# Constants used for Docker Manifest Schema 2.1 +_DOCKER_SCHEMA_1_SIGNATURES_KEY = 'signatures' +_DOCKER_SCHEMA_1_PROTECTED_KEY = 'protected' +_DOCKER_SCHEMA_1_FORMAT_LENGTH_KEY = 'formatLength' +_DOCKER_SCHEMA_1_FORMAT_TAIL_KEY = 'formatTail' +_DOCKER_SCHEMA_1_REPO_NAME_KEY = 'name' +_DOCKER_SCHEMA_1_REPO_TAG_KEY = 'tag' +_DOCKER_SCHEMA_1_FS_LAYERS_KEY = 'fsLayers' +_DOCKER_SCHEMA_1_HISTORY_KEY = 'history' +_DOCKER_SCHEMA_1_BLOB_SUM_KEY = 'blobSum' +_DOCKER_SCHEMA_1_V1_COMPAT_KEY = 'v1Compatibility' +_DOCKER_SCHEMA_1_ARCH_KEY = 'architecture' +_DOCKER_SCHEMA_1_SCHEMA_VER_KEY = 'schemaVersion' +_ISO_DATETIME_FORMAT_ZULU = '%Y-%m-%dT%H:%M:%SZ' +_JWS_ALGORITHM = 'RS256' + + +class ManifestException(Exception): + pass + + +class ManifestMalformed(ManifestException): + pass + + +class ManifestSignatureFailure(ManifestException): + pass + + +def _updated_v1_metadata(v1_metadata_json, updated_id_map): + parsed = json.loads(v1_metadata_json) + parsed['id'] = updated_id_map[parsed['id']] + + if parsed.get('parent') and parsed['parent'] in updated_id_map: + parsed['parent'] = updated_id_map[parsed['parent']] + + if parsed.get('container_config', {}).get('Image'): + existing_image = parsed['container_config']['Image'] + if existing_image in updated_id_map: + parsed['container_config']['image'] = updated_id_map[existing_image] + + return json.dumps(parsed) + + +class DockerSchema1Manifest(object): + def __init__(self, manifest_bytes, validate=True): + self._layers = None + self._bytes = manifest_bytes + + self._parsed = json.loads(manifest_bytes) + self._signatures = self._parsed[_DOCKER_SCHEMA_1_SIGNATURES_KEY] + self._tag = self._parsed[_DOCKER_SCHEMA_1_REPO_TAG_KEY] + + repo_name_tuple = self._parsed[_DOCKER_SCHEMA_1_REPO_NAME_KEY].split('/') + if len(repo_name_tuple) > 1: + self._namespace, self._repo_name = repo_name_tuple + elif len(repo_name_tuple) == 1: + self._namespace = '' + self._repo_name = repo_name_tuple[0] + else: + raise ManifestMalformed('malformed repository name') + + if validate: + self._validate() + + def _validate(self): + for signature in self._signatures: + bytes_to_verify = '{0}.{1}'.format(signature['protected'], + base64url_encode(self.payload)) + signer = SIGNER_ALGS[signature['header']['alg']] + key = keyrep(signature['header']['jwk']) + gk = key.get_key() + sig = base64url_decode(signature['signature'].encode('utf-8')) + verified = signer.verify(bytes_to_verify, sig, gk) + if not verified: + raise ManifestSignatureFailure() + + @property + def signatures(self): + return self._signatures + + @property + def namespace(self): + return self._namespace + + @property + def repo_name(self): + return self._repo_name + + @property + def tag(self): + return self._tag + + @property + def bytes(self): + return self._bytes + + @property + def manifest_json(self): + return self._parsed + + @property + def digest(self): + return digest_tools.sha256_digest(self.payload) + + @property + def image_ids(self): + return {mdata.v1_metadata.image_id for mdata in self.layers} + + @property + def parent_image_ids(self): + return {mdata.v1_metadata.parent_image_id for mdata in self.layers + if mdata.v1_metadata.parent_image_id} + + @property + def checksums(self): + return list({str(mdata.digest) for mdata in self.layers}) + + @property + def layers(self): + if self._layers is None: + self._layers = list(self._generate_layers()) + return self._layers + + def _generate_layers(self): + """ Returns a generator of objects that have the blobSum and v1Compatibility keys in them, + starting from the base image and working toward the leaf node. + """ + for blob_sum_obj, history_obj in reversed(zip(self._parsed[_DOCKER_SCHEMA_1_FS_LAYERS_KEY], + self._parsed[_DOCKER_SCHEMA_1_HISTORY_KEY])): + + try: + image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[_DOCKER_SCHEMA_1_BLOB_SUM_KEY]) + except digest_tools.InvalidDigestException: + raise ManifestMalformed('could not parse manifest digest: %s' % + blob_sum_obj[_DOCKER_SCHEMA_1_BLOB_SUM_KEY]) + + metadata_string = history_obj[_DOCKER_SCHEMA_1_V1_COMPAT_KEY] + + v1_metadata = json.loads(metadata_string) + command_list = v1_metadata.get('container_config', {}).get('Cmd', None) + command = json.dumps(command_list) if command_list else None + + if not 'id' in v1_metadata: + raise ManifestMalformed('invalid manifest v1 history') + + extracted = ExtractedDockerV1Metadata(v1_metadata['id'], v1_metadata.get('parent'), + v1_metadata.get('created'), v1_metadata.get('comment'), + command) + yield ExtractedLayerMetadata(image_digest, extracted, metadata_string) + + @property + def payload(self): + protected = str(self._signatures[0][_DOCKER_SCHEMA_1_PROTECTED_KEY]) + parsed_protected = json.loads(base64url_decode(protected)) + signed_content_head = self._bytes[:parsed_protected[_DOCKER_SCHEMA_1_FORMAT_LENGTH_KEY]] + signed_content_tail = base64url_decode(str(parsed_protected[_DOCKER_SCHEMA_1_FORMAT_TAIL_KEY])) + return signed_content_head + signed_content_tail + + def rewrite_invalid_image_ids(self, images_map): + """ + Rewrites Docker v1 image IDs and returns a generator of DockerV1Metadata. + + If Docker gives us a layer with a v1 image ID that already points to existing + content, but the checksums don't match, then we need to rewrite the image ID + to something new in order to ensure consistency. + """ + # used to synthesize a new "content addressable" image id + digest_history = hashlib.sha256() + + has_rewritten_ids = False + updated_id_map = {} + for extracted_layer_metadata in self.layers: + digest_str = str(extracted_layer_metadata.digest) + extracted_v1_metadata = extracted_layer_metadata.v1_metadata + working_image_id = extracted_v1_metadata.image_id + + # Update our digest_history hash for the new layer data. + digest_history.update(digest_str) + digest_history.update("@") + digest_history.update(extracted_layer_metadata.v1_metadata_str.encode('utf-8')) + digest_history.update("|") + + # Ensure that the v1 image's storage matches the V2 blob. If not, we've + # found a data inconsistency and need to create a new layer ID for the V1 + # image, and all images that follow it in the ancestry chain. + digest_mismatch = (extracted_v1_metadata.image_id in images_map and + images_map[extracted_v1_metadata.image_id].content_checksum != digest_str) + if digest_mismatch or has_rewritten_ids: + working_image_id = digest_history.hexdigest() + has_rewritten_ids = True + + # Store the new docker id in the map + updated_id_map[extracted_v1_metadata.image_id] = working_image_id + + # Lookup the parent image for the layer, if any. + parent_image_id = None + if extracted_v1_metadata.parent_image_id is not None: + parent_image_id = images_map.get(extracted_v1_metadata.parent_image_id, None) + if parent_image_id is None: + raise ManifestMalformed( + 'Parent not found with image ID: {0}'.format(extracted_v1_metadata.parent_image_id) + ) + + # Synthesize and store the v1 metadata in the db. + v1_metadata_json = extracted_layer_metadata.v1_metadata_str + if has_rewritten_ids: + v1_metadata_json = _updated_v1_metadata(v1_metadata_json, updated_id_map) + + yield DockerV1Metadata( + image_id=working_image_id, + created=extracted_v1_metadata.created, + comment=extracted_v1_metadata.comment, + command=extracted_v1_metadata.command, + compat_json=v1_metadata_json, + parent_image_id=parent_image_id, + ) + + +class DockerSchema1ManifestBuilder(object): + """ Class which represents a manifest which is currently being built. """ + def __init__(self, namespace_name, repo_name, tag, architecture='amd64'): + repo_name_key = '{0}/{1}'.format(namespace_name, repo_name) + if namespace_name == '': + repo_name_key = repo_name + + self._base_payload = { + _DOCKER_SCHEMA_1_REPO_TAG_KEY: tag, + _DOCKER_SCHEMA_1_REPO_NAME_KEY: repo_name_key, + _DOCKER_SCHEMA_1_ARCH_KEY: architecture, + _DOCKER_SCHEMA_1_SCHEMA_VER_KEY: 1, + } + + self._fs_layer_digests = [] + self._history = [] + + def add_layer(self, layer_digest, v1_json_metadata): + self._fs_layer_digests.append({ + _DOCKER_SCHEMA_1_BLOB_SUM_KEY: layer_digest, + }) + self._history.append({ + _DOCKER_SCHEMA_1_V1_COMPAT_KEY: v1_json_metadata, + }) + return self + + + def build(self, json_web_key): + """ Build the payload and sign it, returning a SignedManifest object. + """ + payload = OrderedDict(self._base_payload) + payload.update({ + _DOCKER_SCHEMA_1_HISTORY_KEY: self._history, + _DOCKER_SCHEMA_1_FS_LAYERS_KEY: self._fs_layer_digests, + }) + + payload_str = json.dumps(payload, indent=3) + + split_point = payload_str.rfind('\n}') + + protected_payload = { + 'formatTail': base64url_encode(payload_str[split_point:]), + 'formatLength': split_point, + 'time': datetime.utcnow().strftime(_ISO_DATETIME_FORMAT_ZULU), + } + protected = base64url_encode(json.dumps(protected_payload)) + logger.debug('Generated protected block: %s', protected) + + bytes_to_sign = '{0}.{1}'.format(protected, base64url_encode(payload_str)) + + signer = SIGNER_ALGS[_JWS_ALGORITHM] + signature = base64url_encode(signer.sign(bytes_to_sign, json_web_key.get_key())) + logger.debug('Generated signature: %s', signature) + + public_members = set(json_web_key.public_members) + public_key = {comp: value for comp, value in json_web_key.to_dict().items() + if comp in public_members} + + signature_block = { + 'header': { + 'jwk': public_key, + 'alg': _JWS_ALGORITHM, + }, + 'signature': signature, + _DOCKER_SCHEMA_1_PROTECTED_KEY: protected, + } + + logger.debug('Encoded signature block: %s', json.dumps(signature_block)) + + payload.update({ + _DOCKER_SCHEMA_1_SIGNATURES_KEY: [signature_block], + }) + + return DockerSchema1Manifest(json.dumps(payload, indent=3)) + + +Repository = namedtuple('Repository', ['id', 'name', 'namespace_name']) + +Tag = namedtuple('Tag', ['name', 'repository']) + +ManifestJSON = namedtuple('ManifestJSON', ['digest', 'json']) + +DockerV1Metadata = namedtuple('DockerV1Metadata', ['namespace_name', + 'repo_name', + 'image_id', + 'checksum', + 'content_checksum', + 'created', + 'comment', + 'command', + 'parent_image_id', + 'compat_json']) diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index b1302d1b8..cb92b1ebe 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -1,34 +1,31 @@ import logging -import json -import hashlib -from collections import namedtuple, OrderedDict -from datetime import datetime from functools import wraps -import jwt.utils - -from peewee import IntegrityError from flask import make_response, request, url_for -from jwkest.jws import SIGNER_ALGS, keyrep import features from app import docker_v2_signing_key, app, metric_queue from auth.registry_jwt_auth import process_registry_jwt_auth +from data import model +from data.types import ( + DockerSchema1Manifest, + DockerSchema1ManifestBuilder, + ManifestException, + DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE, + DOCKER_SCHEMA2_CONTENT_TYPES, +) +from digest import digest_tools from endpoints.common import parse_repository_name from endpoints.decorators import anon_protect from endpoints.v2 import v2_bp, require_repo_read, require_repo_write from endpoints.v2.errors import (BlobUnknown, ManifestInvalid, ManifestUnknown, TagInvalid, - NameInvalid, TagAlreadyExists) + NameInvalid) from endpoints.trackhelper import track_and_log from endpoints.notificationhelper import spawn_notification from util.registry.replication import queue_storage_replication from util.names import VALID_TAG_PATTERN -from digest import digest_tools -from data import model -from data.model import TagAlreadyCreatedException -from data.database import RepositoryTag logger = logging.getLogger(__name__) @@ -37,234 +34,29 @@ BASE_MANIFEST_ROUTE = '//manifests/ 1: - self._namespace, self._repo_name = repo_name_tuple - elif len(repo_name_tuple) == 1: - self._namespace = '' - self._repo_name = repo_name_tuple[0] - else: - raise ValueError('repo_name has too many or too few pieces') - - if validate: - self._validate() - - def _validate(self): - for signature in self._signatures: - bytes_to_verify = '{0}.{1}'.format(signature['protected'], - jwt.utils.base64url_encode(self.payload)) - signer = SIGNER_ALGS[signature['header']['alg']] - key = keyrep(signature['header']['jwk']) - gk = key.get_key() - sig = jwt.utils.base64url_decode(signature['signature'].encode('utf-8')) - verified = signer.verify(bytes_to_verify, sig, gk) - if not verified: - raise ValueError('manifest file failed signature verification') - - @property - def signatures(self): - return self._signatures - - @property - def namespace(self): - return self._namespace - - @property - def repo_name(self): - return self._repo_name - - @property - def tag(self): - return self._tag - - @property - def bytes(self): - return self._bytes - - @property - def digest(self): - return digest_tools.sha256_digest(self.payload) - - @property - def layers(self): - """ Returns a generator of objects that have the blobSum and v1Compatibility keys in them, - starting from the base image and working toward the leaf node. - """ - for blob_sum_obj, history_obj in reversed(zip(self._parsed[_FS_LAYERS_KEY], - self._parsed[_HISTORY_KEY])): - - try: - image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[_BLOB_SUM_KEY]) - except digest_tools.InvalidDigestException: - err_message = 'could not parse manifest digest: %s' % blob_sum_obj[_BLOB_SUM_KEY] - raise ManifestInvalid(detail={'message': err_message}) - - metadata_string = history_obj[_V1_COMPAT_KEY] - - v1_metadata = json.loads(metadata_string) - command_list = v1_metadata.get('container_config', {}).get('Cmd', None) - command = json.dumps(command_list) if command_list else None - - if not 'id' in v1_metadata: - raise ManifestInvalid(detail={'message': 'invalid manifest v1 history'}) - - labels = v1_metadata.get('config', {}).get('Labels', {}) or {} - extracted = ExtractedV1Metadata(v1_metadata['id'], v1_metadata.get('parent'), - v1_metadata.get('created'), v1_metadata.get('comment'), - command, labels) - yield ImageMetadata(image_digest, extracted, metadata_string) - - @property - def payload(self): - protected = str(self._signatures[0][_PROTECTED_KEY]) - parsed_protected = json.loads(jwt.utils.base64url_decode(protected)) - signed_content_head = self._bytes[:parsed_protected[_FORMAT_LENGTH_KEY]] - signed_content_tail = jwt.utils.base64url_decode(str(parsed_protected[_FORMAT_TAIL_KEY])) - return signed_content_head + signed_content_tail - - -class SignedManifestBuilder(object): - """ Class which represents a manifest which is currently being built. - """ - def __init__(self, namespace_name, repo_name, tag, architecture='amd64', schema_ver=1): - repo_name_key = '{0}/{1}'.format(namespace_name, repo_name) - if namespace_name == '': - repo_name_key = repo_name - - self._base_payload = { - _REPO_TAG_KEY: tag, - _REPO_NAME_KEY: repo_name_key, - _ARCH_KEY: architecture, - _SCHEMA_VER: schema_ver, - } - - self._fs_layer_digests = [] - self._history = [] - - def add_layer(self, layer_digest, v1_json_metadata): - self._fs_layer_digests.append({ - _BLOB_SUM_KEY: layer_digest, - }) - self._history.append({ - _V1_COMPAT_KEY: v1_json_metadata, - }) - return self - - - def build(self, json_web_key): - """ Build the payload and sign it, returning a SignedManifest object. - """ - payload = OrderedDict(self._base_payload) - payload.update({ - _HISTORY_KEY: self._history, - _FS_LAYERS_KEY: self._fs_layer_digests, - }) - - payload_str = json.dumps(payload, indent=3) - - split_point = payload_str.rfind('\n}') - - protected_payload = { - 'formatTail': jwt.utils.base64url_encode(payload_str[split_point:]), - 'formatLength': split_point, - 'time': datetime.utcnow().strftime(ISO_DATETIME_FORMAT_ZULU), - } - protected = jwt.utils.base64url_encode(json.dumps(protected_payload)) - logger.debug('Generated protected block: %s', protected) - - bytes_to_sign = '{0}.{1}'.format(protected, jwt.utils.base64url_encode(payload_str)) - - signer = SIGNER_ALGS[JWS_ALGORITHM] - signature = jwt.utils.base64url_encode(signer.sign(bytes_to_sign, json_web_key.get_key())) - logger.debug('Generated signature: %s', signature) - - public_members = set(json_web_key.public_members) - public_key = {comp: value for comp, value in json_web_key.to_dict().items() - if comp in public_members} - - signature_block = { - 'header': { - 'jwk': public_key, - 'alg': JWS_ALGORITHM, - }, - 'signature': signature, - _PROTECTED_KEY: protected, - } - - logger.debug('Encoded signature block: %s', json.dumps(signature_block)) - - payload.update({ - _SIGNATURES_KEY: [signature_block], - }) - - return SignedManifest(json.dumps(payload, indent=3)) - - @v2_bp.route(MANIFEST_TAGNAME_ROUTE, methods=['GET']) @parse_repository_name() @process_registry_jwt_auth(scopes=['pull']) @require_repo_read @anon_protect -def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref): - try: - manifest = model.tag.load_tag_manifest(namespace_name, repo_name, manifest_ref) - except model.InvalidManifestException: - try: - model.tag.get_active_tag(namespace_name, repo_name, manifest_ref) - except RepositoryTag.DoesNotExist: +def fetch_manifest_by_tagname(namespace_name, repo_name, tag_name): + manifest = v2.get_manifest_by_tag(namespace_name, repo_name, tag_name) + if manifest is None: + tag = v2.get_active_tag(namespace_name, repo_name, tag_name) + if tag is None: raise ManifestUnknown() - try: - manifest = _generate_and_store_manifest(namespace_name, repo_name, manifest_ref) - except model.DataModelException: - logger.exception('Exception when generating manifest for %s/%s:%s', namespace_name, repo_name, - manifest_ref) + manifest = _generate_and_store_manifest(namespace_name, repo_name, tag_name) + if manifest is None: raise ManifestUnknown() - repo = model.repository.get_repository(namespace_name, repo_name) + repo = v2.get_repository(namespace_name, repo_name) if repo is not None: track_and_log('pull_repo', repo, analytics_name='pull_repo_100x', analytics_sample=0.01) metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2']) - response = make_response(manifest.json_data, 200) - response.headers['Content-Type'] = MANIFEST_CONTENT_TYPE + response = make_response(manifest.bytes, 200) + response.headers['Content-Type'] = DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE response.headers['Docker-Content-Digest'] = manifest.digest return response @@ -275,19 +67,18 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref): @require_repo_read @anon_protect def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref): - try: - manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, manifest_ref) - except model.InvalidManifestException: + manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, manifest_ref) + if manifest is None: # Without a tag name to reference, we can't make an attempt to generate the manifest raise ManifestUnknown() - repo = model.repository.get_repository(namespace_name, repo_name) + repo = v2.get_repository(namespace_name, repo_name) if repo is not None: track_and_log('pull_repo', repo) metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2']) - response = make_response(manifest.json_data, 200) - response.headers['Content-Type'] = MANIFEST_CONTENT_TYPE + response = make_response(manifest.json, 200) + response.headers['Content-Type'] = DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE response.headers['Docker-Content-Digest'] = manifest.digest return response @@ -295,7 +86,7 @@ def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref): def _reject_manifest2_schema2(func): @wraps(func) def wrapped(*args, **kwargs): - if request.content_type in MANIFEST2_SCHEMA2_CONTENT_TYPES: + if request.content_type in DOCKER_SCHEMA2_CONTENT_TYPES: raise ManifestInvalid(detail={'message': 'manifest schema version not supported'}, http_status_code=415) return func(*args, **kwargs) @@ -308,13 +99,13 @@ def _reject_manifest2_schema2(func): @process_registry_jwt_auth(scopes=['pull', 'push']) @require_repo_write @anon_protect -def write_manifest_by_tagname(namespace_name, repo_name, manifest_ref): +def write_manifest_by_tagname(namespace_name, repo_name, tag_name): try: - manifest = SignedManifest(request.data) - except ValueError: - raise ManifestInvalid(detail={'message': 'could not parse manifest'}) + manifest = DockerSchema1Manifest(request.data) + except ManifestException as me: + raise ManifestInvalid(detail={'message': me.message}) - if manifest.tag != manifest_ref: + if manifest.tag != tag_name: raise TagInvalid() return _write_manifest(namespace_name, repo_name, manifest) @@ -326,39 +117,22 @@ def write_manifest_by_tagname(namespace_name, repo_name, manifest_ref): @process_registry_jwt_auth(scopes=['pull', 'push']) @require_repo_write @anon_protect -def write_manifest_by_digest(namespace_name, repo_name, manifest_ref): +def write_manifest_by_digest(namespace_name, repo_name, digest): try: - manifest = SignedManifest(request.data) - except ValueError: - raise ManifestInvalid(detail={'message': 'could not parse manifest'}) + manifest = DockerSchema1Manifest(request.data) + except ManifestException as me: + raise ManifestInvalid(detail={'message': me.message}) - if manifest.digest != manifest_ref: + if manifest.digest != digest: raise ManifestInvalid(detail={'message': 'manifest digest mismatch'}) return _write_manifest(namespace_name, repo_name, manifest) -def _updated_v1_metadata(v1_metadata_json, updated_id_map): - parsed = json.loads(v1_metadata_json) - parsed['id'] = updated_id_map[parsed['id']] - - if parsed.get('parent') and parsed['parent'] in updated_id_map: - parsed['parent'] = updated_id_map[parsed['parent']] - - if parsed.get('container_config', {}).get('Image'): - existing_image = parsed['container_config']['Image'] - if existing_image in updated_id_map: - parsed['container_config']['image'] = updated_id_map[existing_image] - - return json.dumps(parsed) - - -def _write_manifest_itself(namespace_name, repo_name, manifest): - # Ensure that the manifest is for this repository. If the manifest's namespace is empty, then - # it is for the library namespace and we need an extra check. - if (manifest.namespace == '' and features.LIBRARY_SUPPORT and +def _write_manifest(namespace_name, repo_name, manifest): + if (manifest.namespace == '' and + features.LIBRARY_SUPPORT and namespace_name == app.config['LIBRARY_NAMESPACE']): - # This is a library manifest. All good. pass elif manifest.namespace != namespace_name: raise NameInvalid() @@ -367,135 +141,65 @@ def _write_manifest_itself(namespace_name, repo_name, manifest): raise NameInvalid() # Ensure that the repository exists. - repo = model.repository.get_repository(namespace_name, repo_name) + repo = v2.get_repository(namespace_name, repo_name) if repo is None: raise NameInvalid() - # Lookup all the images and their parent images (if any) inside the manifest. This will let us - # know which V1 images we need to synthesize and which ones are invalid. - layers = list(manifest.layers) - - docker_image_ids = {mdata.v1_metadata.docker_id for mdata in layers} - parent_image_ids = {mdata.v1_metadata.parent for mdata in layers - if mdata.v1_metadata.parent} - all_image_ids = list(docker_image_ids | parent_image_ids) - - images_query = model.image.lookup_repository_images(repo, all_image_ids) - images_map = {image.docker_image_id: image for image in images_query} - - # Lookup the storages associated with each blob in the manifest. - checksums = list({str(mdata.digest) for mdata in manifest.layers}) - storage_query = model.storage.lookup_repo_storages_by_content_checksum(repo, checksums) - storage_map = {storage.content_checksum: storage for storage in storage_query} - - # Ensure that we have valid V1 docker IDs. If Docker gives us a V1 layer ID pointing to - # a storage with a content checksum different from the existing, then we need to rewrite - # the Docker ID to ensure consistency. - tag_name = manifest.tag - has_rewritten_ids = False - updated_id_map = {} - - # Synthesized image id hash. Can be used to pull a "content addressable" image id out of thin air. - digest_history = hashlib.sha256() - - for mdata in layers: - digest_str = str(mdata.digest) - v1_mdata = mdata.v1_metadata - working_docker_id = v1_mdata.docker_id - - # Update our digest_history hash for the new layer data. - digest_history.update(digest_str) - digest_history.update("@") - digest_history.update(mdata.v1_metadata_str.encode('utf-8')) - digest_history.update("|") - - # Ensure that all blobs exist. - blob_storage = storage_map.get(digest_str) - if blob_storage is None: - raise BlobUnknown(detail={'digest': digest_str}) - - # Ensure that the V1 image's storage matches the V2 blob. If not, we've found - # a data inconsistency and need to create a new layer ID for the V1 image, and all images - # that follow it in the ancestry chain. - if ((v1_mdata.docker_id in images_map and - images_map[v1_mdata.docker_id].storage.content_checksum != digest_str) or - has_rewritten_ids): - - working_docker_id = digest_history.hexdigest() - logger.warning('Rewriting docker_id %s/%s %s -> %s', namespace_name, repo_name, - v1_mdata.docker_id, working_docker_id) - has_rewritten_ids = True - - # Store the new docker id in the map - updated_id_map[v1_mdata.docker_id] = working_docker_id - - # Lookup the parent image for the layer, if any. - parent_image = None - if v1_mdata.parent is not None: - parent_image = images_map.get(v1_mdata.parent) - if parent_image is None: - msg = 'Parent not found with docker image id {0}'.format(v1_mdata.parent) - raise ManifestInvalid(detail={'message': msg}) - - # Synthesize and store the v1 metadata in the db. - v1_metadata_json = mdata.v1_metadata_str - if has_rewritten_ids: - v1_metadata_json = _updated_v1_metadata(mdata.v1_metadata_str, updated_id_map) - - image = model.image.synthesize_v1_image(repo, blob_storage, working_docker_id, - v1_mdata.created, v1_mdata.comment, v1_mdata.command, - v1_metadata_json, parent_image) - images_map[v1_mdata.docker_id] = image - - if not layers: - # The manifest doesn't actually reference any layers! + if not manifest.layers: raise ManifestInvalid(detail={'message': 'manifest does not reference any layers'}) - # Store the manifest pointing to the tag. - manifest_digest = manifest.digest - leaf_layer_id = images_map[layers[-1].v1_metadata.docker_id].docker_image_id + # Ensure all the blobs in the manifest exist. + storage_query = model.storage.lookup_repo_storages_by_content_checksum(repo, manifest.checksums) + storage_map = {storage.content_checksum: storage for storage in storage_query} + for extracted_layer_metadata in manifest.layers: + digest_str = str(extracted_layer_metadata.digest) + if digest_str not in storage_map: + raise BlobUnknown(detail={'digest': digest_str}) + # Lookup all the images and their parent images (if any) inside the manifest. + # This will let us know which v1 images we need to synthesize and which ones are invalid. + all_image_ids = list(manifest.docker_image_ids | manifest.parent_image_ids) + images = v2.docker_v1_metadata_by_image_id(namespace_name, repo_name, all_image_ids) + images_map = {image.image_id: image for image in images} + + # Rewrite any v1 image IDs that do not match the checksum in the database. try: - tag_manifest, manifest_created = model.tag.store_tag_manifest(namespace_name, repo_name, - tag_name, leaf_layer_id, - manifest_digest, manifest.bytes) - except TagAlreadyCreatedException: - logger.warning('Tag %s was already created under repository %s/%s pointing to image %s', - tag_name, namespace_name, repo_name, leaf_layer_id) - raise TagAlreadyExists() + rewritten_images = manifest.rewrite_invalid_image_ids(images_map) + for rewritten_image in rewritten_images: + image = v2.synthesize_v1_image( + repo, + storage_map[rewritten_image.content_checksum], + rewritten_image.image_id, + rewritten_image.created, + rewritten_image.comment, + rewritten_image.command, + rewritten_image.compat_json, + rewritten_image.parent_image_id, + ) + images_map[image.image_id] = image + except ManifestException as me: + raise ManifestInvalid(detail={'message': me.message}) - if manifest_created: - for key, value in layers[-1].v1_metadata.labels.iteritems(): - model.label.create_manifest_label(tag_manifest, key, value, 'manifest') + # Store the manifest pointing to the tag. + leaf_layer_id = images_map[manifest.layers[-1].v1_metadata.image_id].image_id + v2.save_manifest(namespace_name, repo_name, tag_name, leaf_layer_id, manifest.digest, manifest.bytes) # Queue all blob manifests for replication. # TODO(jschorr): Find a way to optimize this insertion. if features.STORAGE_REPLICATION: - for mdata in layers: - digest_str = str(mdata.digest) - blob_storage = storage_map.get(digest_str) - queue_storage_replication(namespace_name, blob_storage) + for extracted_v1_metadata in manifest.layers: + digest_str = str(extracted_v1_metadata.digest) + queue_storage_replication(namespace_name, storage_map[digest_str]) - return (repo, tag_name, manifest_digest) - - -def _write_manifest(namespace_name, repo_name, manifest): - (repo, tag_name, manifest_digest) = _write_manifest_itself(namespace_name, repo_name, manifest) - - # Spawn the repo_push event. - event_data = { - 'updated_tags': [tag_name], - } - - track_and_log('push_repo', repo, tag=tag_name) - spawn_notification(repo, 'repo_push', event_data) + track_and_log('push_repo', repo, tag=manifest.tag) + spawn_notification(repo, 'repo_push', {'updated_tags': [manifest.tag]}) metric_queue.repository_push.Inc(labelvalues=[namespace_name, repo_name, 'v2']) response = make_response('OK', 202) - response.headers['Docker-Content-Digest'] = manifest_digest + response.headers['Docker-Content-Digest'] = manifest.digest response.headers['Location'] = url_for('v2.fetch_manifest_by_digest', repository='%s/%s' % (namespace_name, repo_name), - manifest_ref=manifest_digest) + manifest_ref=manifest.digest) return response @@ -504,33 +208,34 @@ def _write_manifest(namespace_name, repo_name, manifest): @process_registry_jwt_auth(scopes=['pull', 'push']) @require_repo_write @anon_protect -def delete_manifest_by_digest(namespace_name, repo_name, manifest_ref): - """ Delete the manifest specified by the digest. Note: there is no equivalent - method for deleting by tag name because it is forbidden by the spec. +def delete_manifest_by_digest(namespace_name, repo_name, digest): """ - try: - manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, manifest_ref) - except model.InvalidManifestException: - # Without a tag name to reference, we can't make an attempt to generate the manifest + Delete the manifest specified by the digest. + + Note: there is no equivalent method for deleting by tag name because it is + forbidden by the spec. + """ + tag = v2.get_tag_by_manifest_digest(namespace_name, repo_name, digest) + if tag is None: + # TODO(jzelinskie): disambiguate between no manifest and no tag raise ManifestUnknown() # Mark the tag as no longer alive. - try: - model.tag.delete_tag(namespace_name, repo_name, manifest.tag.name) - except model.DataModelException: - # Tag is not alive. + deleted = v2.delete_tag(namespace_name, repo_name, tag.name) + if not deleted: + # Tag was not alive. raise ManifestUnknown() - track_and_log('delete_tag', manifest.tag.repository, - tag=manifest.tag.name, digest=manifest_ref) + track_and_log('delete_tag', tag.repository, tag=tag.name, digest=digest) return make_response('', 202) def _generate_and_store_manifest(namespace_name, repo_name, tag_name): - # First look up the tag object and its ancestors - image = model.tag.get_tag_image(namespace_name, repo_name, tag_name, include_storage=True) - parents = model.image.get_parent_images(namespace_name, repo_name, image) + # Find the v1 metadata for this image and its parents. + v1_metadata = v2.docker_v1_metadata_by_tag(namespace_name, repo_name, tag_name) + parents_v1_metadata = v2.get_parents_docker_v1_metadata(namespace_name, repo_name, + v1_metadata.image_id) # If the manifest is being generated under the library namespace, then we make its namespace # empty. @@ -539,26 +244,18 @@ def _generate_and_store_manifest(namespace_name, repo_name, tag_name): manifest_namespace = '' # Create and populate the manifest builder - builder = SignedManifestBuilder(manifest_namespace, repo_name, tag_name) + builder = DockerSchema1ManifestBuilder(manifest_namespace, repo_name, tag_name) # Add the leaf layer - builder.add_layer(image.storage.content_checksum, image.v1_json_metadata) + builder.add_layer(v1_metadata.content_checksum, v1_metadata.compat_json) - for parent in parents: - builder.add_layer(parent.storage.content_checksum, parent.v1_json_metadata) + for parent_v1_metadata in parents_v1_metadata: + builder.add_layer(parent_v1_metadata.content_checksum, parent_v1_metadata.compat_json) # Sign the manifest with our signing key. manifest = builder.build(docker_v2_signing_key) - # Write the manifest to the DB. If an existing manifest already exists, return the - # one found. - try: - return model.tag.associate_generated_tag_manifest(namespace_name, repo_name, tag_name, - manifest.digest, manifest.bytes) - except IntegrityError as ie: - logger.debug('Got integrity error: %s', ie) - try: - return model.tag.load_tag_manifest(namespace_name, repo_name, tag_name) - except model.InvalidManifestException: - logger.exception('Exception when generating manifest') - raise model.DataModelException('Could not load or generate manifest') + # Write the manifest to the DB. + v2.create_manifest_and_update_tag(namespace_name, repo_name, tag_name, manifest.digest, + manifest.bytes) + return manifest From 3f722f880e9f1931ec0429e32ef4b76c86f196c9 Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Tue, 26 Jul 2016 18:41:51 -0400 Subject: [PATCH 10/34] v2: add pagination decorator --- data/model/v2.py | 8 ++++++ endpoints/v2/__init__.py | 56 +++++++++++++++++++++++++++++++++++----- endpoints/v2/catalog.py | 22 ++++++---------- endpoints/v2/tag.py | 24 +++++++---------- endpoints/v2/v2auth.py | 4 +-- endpoints/v2/v2util.py | 42 ------------------------------ 6 files changed, 77 insertions(+), 79 deletions(-) delete mode 100644 endpoints/v2/v2util.py diff --git a/data/model/v2.py b/data/model/v2.py index b677b462e..e10ab9054 100644 --- a/data/model/v2.py +++ b/data/model/v2.py @@ -100,3 +100,11 @@ def synthesize_v1_image(repo, storage, image_id, created, comment, command, comp def save_manifest(namespace_name, repo_name, tag_name, leaf_layer_id, manifest_digest, manifest_bytes): model.tag.store_tag_manifest(namespace_name, repo_name, tag_name, leaf_layer_id, manifest_digest, manifest_bytes) + + +def repository_tags(namespace_name, repo_name, limit, offset): + return [Tag()] + + +def get_visible_repositories(username, limit, offset): + return [Repository()] diff --git a/endpoints/v2/__init__.py b/endpoints/v2/__init__.py index 1ab42747f..42ac4afa4 100644 --- a/endpoints/v2/__init__.py +++ b/endpoints/v2/__init__.py @@ -2,13 +2,14 @@ import logging from functools import wraps from urlparse import urlparse +from urllib import urlencode from flask import Blueprint, make_response, url_for, request, jsonify from semantic_version import Spec import features -from app import app, metric_queue +from app import app, metric_queue, get_app_url from auth.auth_context import get_grant_context from auth.permissions import (ReadRepositoryPermission, ModifyRepositoryPermission, AdministerRepositoryPermission) @@ -19,12 +20,53 @@ from endpoints.v2.errors import V2RegistryException, Unauthorized from util.http import abort from util.registry.dockerver import docker_version from util.metrics.metricqueue import time_blueprint +from util.pagination import encrypt_page_token, decrypt_page_token logger = logging.getLogger(__name__) v2_bp = Blueprint('v2', __name__) time_blueprint(v2_bp, metric_queue) + +_MAX_RESULTS_PER_PAGE = 50 + + +def _paginate(limit_kwarg_name='limit', offset_kwarg_name='offset', + callback_kwarg_name='pagination_callback'): + def wrapper(func): + @wraps(func) + def wrapped(*args, **kwargs): + try: + requested_limit = int(request.args.get('n', _MAX_RESULTS_PER_PAGE)) + except ValueError: + requested_limit = 0 + + limit = max(min(requested_limit, _MAX_RESULTS_PER_PAGE), 1) + next_page_token = request.args.get('next_page', None) + + # Decrypt the next page token, if any. + offset = 0 + page_info = decrypt_page_token(next_page_token) + if page_info is not None: + # Note: we use offset here instead of ID >= n because one of the V2 queries is a UNION. + offset = page_info.get('offset', 0) + + def callback(num_results, response): + if num_results <= limit: + return + next_page_token = encrypt_page_token({'offset': limit+offset}) + link = get_app_url() + url_for(request.endpoint, **request.view_args) + link += '?%s; rel="next"' % urlencode({'n': limit, 'next_page': next_page_token}) + response.headers['Link'] = link + + kwargs[limit_kwarg_name] = limit + kwargs[offset_kwarg_name] = offset + kwargs[callback_kwarg_name] = callback + func(*args, **kwargs) + return wrapped + return wrapper + + @v2_bp.app_errorhandler(V2RegistryException) def handle_registry_v2_exception(error): response = jsonify({ @@ -104,8 +146,10 @@ def v2_support_enabled(): return response -from endpoints.v2 import v2auth -from endpoints.v2 import manifest -from endpoints.v2 import blob -from endpoints.v2 import tag -from endpoints.v2 import catalog +from endpoints.v2 import ( + blob, + catalog, + manifest, + tag, + v2auth, +) diff --git a/endpoints/v2/catalog.py b/endpoints/v2/catalog.py index c49b4091a..635378653 100644 --- a/endpoints/v2/catalog.py +++ b/endpoints/v2/catalog.py @@ -1,30 +1,24 @@ -from flask import jsonify, url_for +from flask import jsonify -from endpoints.v2 import v2_bp from auth.registry_jwt_auth import process_registry_jwt_auth, get_granted_entity from endpoints.decorators import anon_protect -from data import model -from endpoints.v2.v2util import add_pagination +from endpoints.v2 import v2_bp, _paginate @v2_bp.route('/_catalog', methods=['GET']) @process_registry_jwt_auth() @anon_protect -def catalog_search(): - url = url_for('v2.catalog_search') - +@_paginate() +def catalog_search(limit, offset, pagination_callback): username = None entity = get_granted_entity() if entity: username = entity.user.username - query = model.repository.get_visible_repositories(username, include_public=(username is None)) - link, query = add_pagination(query, url) - + visible_repositories = v2.get_visible_repositories(username, limit, offset) response = jsonify({ - 'repositories': ['%s/%s' % (repo.namespace_user.username, repo.name) for repo in query], + 'repositories': ['%s/%s' % (repo.namespace_name, repo.name) + for repo in visible_repositories], }) - if link is not None: - response.headers['Link'] = link - + pagination_callback(len(visible_repositories), response) return response diff --git a/endpoints/v2/tag.py b/endpoints/v2/tag.py index 44e87c7c0..66a4e20ea 100644 --- a/endpoints/v2/tag.py +++ b/endpoints/v2/tag.py @@ -1,33 +1,27 @@ -from flask import jsonify, url_for +from flask import jsonify from auth.registry_jwt_auth import process_registry_jwt_auth from endpoints.common import parse_repository_name -from endpoints.v2 import v2_bp, require_repo_read +from endpoints.v2 import v2_bp, require_repo_read, _paginate from endpoints.v2.errors import NameUnknown -from endpoints.v2.v2util import add_pagination from endpoints.decorators import anon_protect -from data import model @v2_bp.route('//tags/list', methods=['GET']) @parse_repository_name() @process_registry_jwt_auth(scopes=['pull']) @require_repo_read @anon_protect -def list_all_tags(namespace_name, repo_name): - repository = model.repository.get_repository(namespace_name, repo_name) - if repository is None: +@_paginate() +def list_all_tags(namespace_name, repo_name, limit, offset, pagination_callback): + repo = v2.get_repository(namespace_name, repo_name) + if repo is None: raise NameUnknown() - query = model.tag.list_repository_tags(namespace_name, repo_name) - url = url_for('v2.list_all_tags', repository='%s/%s' % (namespace_name, repo_name)) - link, query = add_pagination(query, url) - + tags = v2.repository_tags(namespace_name, repo_name, limit, offset) response = jsonify({ 'name': '{0}/{1}'.format(namespace_name, repo_name), - 'tags': [tag.name for tag in query], + 'tags': [tag.name for tag in tags], }) - if link is not None: - response.headers['Link'] = link - + pagination_callback(len(tags), response) return response diff --git a/endpoints/v2/v2auth.py b/endpoints/v2/v2auth.py index abae60b65..91de73fa4 100644 --- a/endpoints/v2/v2auth.py +++ b/endpoints/v2/v2auth.py @@ -1,6 +1,7 @@ import logging import re +from cachetools import lru_cache from flask import request, jsonify, abort from app import app, userevents, instance_keys @@ -9,7 +10,6 @@ from auth.auth import process_auth from auth.auth_context import get_authenticated_user, get_validated_token, get_validated_oauth_token from auth.permissions import (ModifyRepositoryPermission, ReadRepositoryPermission, CreateRepositoryPermission) -from cachetools import lru_cache from endpoints.v2 import v2_bp from endpoints.decorators import anon_protect from util.cache import no_cache @@ -22,7 +22,7 @@ logger = logging.getLogger(__name__) TOKEN_VALIDITY_LIFETIME_S = 60 * 60 # 1 hour SCOPE_REGEX_TEMPLATE = ( - r'^repository:((?:{}\/)?((?:[\.a-zA-Z0-9_\-]+\/)?[\.a-zA-Z0-9_\-]+)):((?:push|pull|\*)(?:,(?:push|pull|\*))*)$' + r'^repository:((?:{}\/)?((?:[\.a-zA-Z0-9_\-]+\/)?[\.a-zA-Z0-9_\-]+)):((?:push|pull|\*)(?:,(?:push|pull|\*))*)$' ) diff --git a/endpoints/v2/v2util.py b/endpoints/v2/v2util.py deleted file mode 100644 index df4a70fb9..000000000 --- a/endpoints/v2/v2util.py +++ /dev/null @@ -1,42 +0,0 @@ -from flask import request -from app import get_app_url -from util.pagination import encrypt_page_token, decrypt_page_token -import urllib -import logging - -_MAX_RESULTS_PER_PAGE = 50 - -def add_pagination(query, url): - """ Adds optional pagination to the given query by looking for the Docker V2 pagination request - args. - """ - try: - requested_limit = int(request.args.get('n', _MAX_RESULTS_PER_PAGE)) - except ValueError: - requested_limit = 0 - - limit = max(min(requested_limit, _MAX_RESULTS_PER_PAGE), 1) - next_page_token = request.args.get('next_page', None) - - # Decrypt the next page token, if any. - offset = 0 - page_info = decrypt_page_token(next_page_token) - if page_info is not None: - # Note: we use offset here instead of ID >= n because one of the V2 queries is a UNION. - offset = page_info.get('offset', 0) - query = query.offset(offset) - - query = query.limit(limit + 1) - url = get_app_url() + url - - results = list(query) - if len(results) <= limit: - return None, results - - # Add a link to the next page of results. - page_info = dict(offset=limit + offset) - next_page_token = encrypt_page_token(page_info) - - link = url + '?' + urllib.urlencode(dict(n=limit, next_page=next_page_token)) - link = link + '; rel="next"' - return link, results[0:-1] From e6c99bb471cc358a8cb3f765390ad4acd357d697 Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Mon, 1 Aug 2016 20:46:31 -0400 Subject: [PATCH 11/34] re-ordered BlobUploading fields --- data/database.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data/database.py b/data/database.py index 35dd3e66a..1f18b8d84 100644 --- a/data/database.py +++ b/data/database.py @@ -1030,10 +1030,10 @@ class BlobUploading(BaseModel): repository = ForeignKeyField(Repository, index=True) location = ForeignKeyField(BlobPlacementLocation) byte_count = IntegerField(default=0) - sha_state = ResumableSHA256Field(null=True, default=resumablehashlib.sha256) - storage_metadata = JSONField(null=True, default={}) - chunk_count = IntegerField(default=0) uncompressed_byte_count = IntegerField(null=True) + chunk_count = IntegerField(default=0) + storage_metadata = JSONField(null=True, default={}) + sha_state = ResumableSHA256Field(null=True, default=resumablehashlib.sha256) piece_sha_state = ResumableSHA1Field(null=True) piece_hashes = Base64BinaryField(null=True) From 21cbe0bd07ff61eab821081278b32d09654fad90 Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Mon, 1 Aug 2016 20:48:00 -0400 Subject: [PATCH 12/34] v2: mv _paginate paginate --- endpoints/v2/__init__.py | 4 ++-- endpoints/v2/catalog.py | 4 ++-- endpoints/v2/tag.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/endpoints/v2/__init__.py b/endpoints/v2/__init__.py index 42ac4afa4..6ac99ecdf 100644 --- a/endpoints/v2/__init__.py +++ b/endpoints/v2/__init__.py @@ -31,8 +31,8 @@ time_blueprint(v2_bp, metric_queue) _MAX_RESULTS_PER_PAGE = 50 -def _paginate(limit_kwarg_name='limit', offset_kwarg_name='offset', - callback_kwarg_name='pagination_callback'): +def paginate(limit_kwarg_name='limit', offset_kwarg_name='offset', + callback_kwarg_name='pagination_callback'): def wrapper(func): @wraps(func) def wrapped(*args, **kwargs): diff --git a/endpoints/v2/catalog.py b/endpoints/v2/catalog.py index 635378653..34b195dbc 100644 --- a/endpoints/v2/catalog.py +++ b/endpoints/v2/catalog.py @@ -2,12 +2,12 @@ from flask import jsonify from auth.registry_jwt_auth import process_registry_jwt_auth, get_granted_entity from endpoints.decorators import anon_protect -from endpoints.v2 import v2_bp, _paginate +from endpoints.v2 import v2_bp, paginate @v2_bp.route('/_catalog', methods=['GET']) @process_registry_jwt_auth() @anon_protect -@_paginate() +@paginate() def catalog_search(limit, offset, pagination_callback): username = None entity = get_granted_entity() diff --git a/endpoints/v2/tag.py b/endpoints/v2/tag.py index 66a4e20ea..11253aee0 100644 --- a/endpoints/v2/tag.py +++ b/endpoints/v2/tag.py @@ -2,7 +2,7 @@ from flask import jsonify from auth.registry_jwt_auth import process_registry_jwt_auth from endpoints.common import parse_repository_name -from endpoints.v2 import v2_bp, require_repo_read, _paginate +from endpoints.v2 import v2_bp, require_repo_read, paginate from endpoints.v2.errors import NameUnknown from endpoints.decorators import anon_protect @@ -11,7 +11,7 @@ from endpoints.decorators import anon_protect @process_registry_jwt_auth(scopes=['pull']) @require_repo_read @anon_protect -@_paginate() +@paginate() def list_all_tags(namespace_name, repo_name, limit, offset, pagination_callback): repo = v2.get_repository(namespace_name, repo_name) if repo is None: From 3de6000428d60bc67090fe0bbb35327a31bb160f Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Mon, 1 Aug 2016 20:48:34 -0400 Subject: [PATCH 13/34] v2: refactor blob.py to use data.types --- data/model/v2.py | 76 ++++++- data/types.py | 12 + endpoints/v2/blob.py | 506 +++++++++++++++++++++++++------------------ 3 files changed, 387 insertions(+), 207 deletions(-) diff --git a/data/model/v2.py b/data/model/v2.py index e10ab9054..6e9e570f8 100644 --- a/data/model/v2.py +++ b/data/model/v2.py @@ -1,8 +1,10 @@ from data.types import ( + Blob, + BlobUpload, + DockerV1Metadata, + ManifestJSON, Repository, Tag, - ManifestJSON, - DockerV1Metadata, ) def get_repository(namespace_name, repo_name): @@ -108,3 +110,73 @@ def repository_tags(namespace_name, repo_name, limit, offset): def get_visible_repositories(username, limit, offset): return [Repository()] + + +def create_blob_upload(namespace_name, repo_name, upload_uuid, location_name, storage_metadata): + """ + Creates a blob upload. + + Returns False if the upload's repository does not exist. + """ + + try: + model.blob.initiate_upload(namespace_name, repo_name, new_upload_uuid, location_name, + upload_metadata) + return True + except database.Repository.DoesNotExist: + return False + + +def blob_upload_by_uuid(uuid): + try: + found = model.blob.get_blob_upload(namespace_name, repo_name, upload_uuid) + except model.InvalidBlobUpload: + raise BlobUploadUnknown() + + return BlobUpload( + uuid=uuid, + byte_count=found.byte_count, + uncompressed_byte_count=found.uncompressed_byte_count, + chunk_count=found.chunk_count, + location_name=found.location.name, + storage_metadata=found.storage_metadata, + ) + + +def update_blob_upload(blob_upload): + # old implementation: + # blob_upload.save() + pass + + +def delete_blob_upload(uuid): + try: + found = model.blob.get_blob_upload(namespace_name, repo_name, upload_uuid) + except model.InvalidBlobUpload: + raise BlobUploadUnknown() + + found.delete_instance() + +def create_blob_and_temp_tag(namespace_name, repo_name, expected_digest, upload_obj): + return model.blob.store_blob_record_and_temp_link(namespace_name, repo_name, expected_digest, + upload_obj.location, + upload_obj.byte_count, + app.config['PUSH_TEMP_TAG_EXPIRATION_SEC'], + upload_obj.uncompressed_byte_count) + + +def blob_by_digest(namespace_name, repo_name, digest): + try: + return model.blob.get_repo_blob_by_digest(namespace_name, repo_name, digest) + except model.BlobDoesNotExist: + return None + + +def create_bittorrent_pieces(blob_storage, piece_size, piece_bytes) + model.storage.save_torrent_info(blob_storage.id, piece_size, piece_bytes) + + +def get_blob_path(blob): + # Once everything is moved over, this could be in util.registry and not even + # touch the database. + model.storage.get_layer_path(blob) diff --git a/data/types.py b/data/types.py index f734f3506..e93c06539 100644 --- a/data/types.py +++ b/data/types.py @@ -344,3 +344,15 @@ DockerV1Metadata = namedtuple('DockerV1Metadata', ['namespace_name', 'command', 'parent_image_id', 'compat_json']) + +BlobUpload = namedtuple('BlobUpload', ['uuid', + 'byte_count', + 'uncompressed_byte_count', + 'chunk_count', + 'sha_state', + 'location_name', + 'storage_metadata', + 'piece_sha_state', + 'piece_hashes']) + +Blob = namedtuple('Blob', ['digest', 'size', 'locations']) diff --git a/endpoints/v2/blob.py b/endpoints/v2/blob.py index 3136b4580..a5836bb61 100644 --- a/endpoints/v2/blob.py +++ b/endpoints/v2/blob.py @@ -7,7 +7,7 @@ import resumablehashlib from app import storage, app from auth.registry_jwt_auth import process_registry_jwt_auth -from data import model, database +from data import database from digest import digest_tools from endpoints.common import parse_repository_name from endpoints.v2 import v2_bp, require_repo_read, require_repo_write, get_input_stream @@ -33,28 +33,6 @@ class _InvalidRangeHeader(Exception): pass -def _base_blob_fetch(namespace_name, repo_name, digest): - """ Some work that is common to both GET and HEAD requests. Callers MUST check for proper - authorization before calling this method. - """ - try: - found = model.blob.get_repo_blob_by_digest(namespace_name, repo_name, digest) - except model.BlobDoesNotExist: - raise BlobUnknown() - - headers = { - 'Docker-Content-Digest': digest, - } - - # Add the Accept-Ranges header if the storage engine supports resumable - # downloads. - if storage.get_supports_resumable_downloads(found.locations): - logger.debug('Storage supports resumable downloads') - headers['Accept-Ranges'] = 'bytes' - - return found, headers - - @v2_bp.route(BLOB_DIGEST_ROUTE, methods=['HEAD']) @parse_repository_name() @process_registry_jwt_auth(scopes=['pull']) @@ -62,12 +40,25 @@ def _base_blob_fetch(namespace_name, repo_name, digest): @anon_protect @cache_control(max_age=31436000) def check_blob_exists(namespace_name, repo_name, digest): - found, headers = _base_blob_fetch(namespace_name, repo_name, digest) + # Find the blob. + blob = v2.blob_by_digest(namespace_name, repo_name, digest) + if blob is None: + raise BlobUnknown() + # Build the response headers. + headers = { + 'Docker-Content-Digest': digest, + 'Content-Length': blob.size, + 'Content-Type': BLOB_CONTENT_TYPE, + } + + # If our storage supports range requests, let the Docker client know. + if storage.get_supports_resumable_downloads(blob.locations): + headers['Accept-Ranges'] = 'bytes' + + # Write the response to the Docker client. response = make_response('') response.headers.extend(headers) - response.headers['Content-Length'] = found.image_size - response.headers['Content-Type'] = BLOB_CONTENT_TYPE return response @@ -78,31 +69,42 @@ def check_blob_exists(namespace_name, repo_name, digest): @anon_protect @cache_control(max_age=31536000) def download_blob(namespace_name, repo_name, digest): - found, headers = _base_blob_fetch(namespace_name, repo_name, digest) + # Find the blob. + blob = v2.blob_by_digest(namespace_name, repo_name, digest) + if blob is None: + raise BlobUnknown() - path = model.storage.get_layer_path(found) + # Build the response headers. + headers = {'Docker-Content-Digest': digest} + + # If our storage supports range requests, let the Docker client know. + if storage.get_supports_resumable_downloads(blob.locations): + headers['Accept-Ranges'] = 'bytes' + + # Find the storage path for the blob. + path = v2.get_blob_path(blob) + + # Short-circuit by redirecting if the storage supports it. logger.debug('Looking up the direct download URL for path: %s', path) - direct_download_url = storage.get_direct_download_url(found.locations, path) - + direct_download_url = storage.get_direct_download_url(blob.locations, path) if direct_download_url: logger.debug('Returning direct download URL') resp = redirect(direct_download_url) resp.headers.extend(headers) return resp - logger.debug('Streaming layer data') - - # Close the database handle here for this process before we send the long download. + # Close the database connection before we stream the download. + logger.debug('Closing database connection before streaming layer data') database.close_db_filter(None) - headers['Content-Length'] = found.image_size - headers['Content-Type'] = BLOB_CONTENT_TYPE - - return Response(storage.stream_read(found.locations, path), headers=headers) - - -def _render_range(num_uploaded_bytes, with_bytes_prefix=True): - return '{0}0-{1}'.format('bytes=' if with_bytes_prefix else '', num_uploaded_bytes - 1) + # Stream the response to the Docker client. + return Response( + storage.stream_read(blob.locations, path), + headers=headers.update({ + 'Content-Length': blob.size, + 'Content-Type': BLOB_CONTENT_TYPE, + }), + ) @v2_bp.route('//blobs/uploads/', methods=['POST']) @@ -111,37 +113,50 @@ def _render_range(num_uploaded_bytes, with_bytes_prefix=True): @require_repo_write @anon_protect def start_blob_upload(namespace_name, repo_name): + # Begin the blob upload process in the database and storage. location_name = storage.preferred_locations[0] new_upload_uuid, upload_metadata = storage.initiate_chunked_upload(location_name) - - try: - model.blob.initiate_upload(namespace_name, repo_name, new_upload_uuid, location_name, - upload_metadata) - except database.Repository.DoesNotExist: + repository_exists = v2.create_blob_upload(namespace_name, repo_name, new_upload_uuid, + location_name, upload_metadata) + if not repository_exists: raise NameUnknown() digest = request.args.get('digest', None) if digest is None: - # The user will send the blob data in another request + # Short-circuit because the user will send the blob data in another request. accepted = make_response('', 202) accepted.headers['Location'] = url_for('v2.upload_chunk', repository='%s/%s' % (namespace_name, repo_name), upload_uuid=new_upload_uuid) - accepted.headers['Range'] = _render_range(0) accepted.headers['Docker-Upload-UUID'] = new_upload_uuid return accepted - else: - # The user plans to send us the entire body right now - blob_upload, upload_error = _upload_chunk(namespace_name, repo_name, new_upload_uuid) - blob_upload.save() - if upload_error: - logger.error('Got error when uploading chunk for blob %s under repository %s/%s: %s', - namespace_name, repo_name, new_upload_uuid, upload_error) - _range_not_satisfiable(blob_upload.byte_count) + # The user plans to send us the entire body right now. + # Find the upload. + blob_upload = v2.blob_upload_by_uuid(new_upload_uuid) + if blob_upload is None: + raise BlobUploadUnknown() - return _finish_upload(namespace_name, repo_name, blob_upload, digest) + # Upload the chunk to storage while calculating some metadata and updating + # the upload state. + updated_blob_upload = _upload_chunk(blob_upload, *_start_offset_and_length(request.headers)) + if updated_blob_upload is None: + _abort_range_not_satisfiable(updated_blob_upload.byte_count, new_upload_uuid) + + # Save the upload state to the database. + v2.update_blob_upload(updated_blob_upload) + + # Finalize the upload process in the database and storage. + _finish_upload(namespace_name, repo_name, updated_blob_upload, digest) + + # Write the response to the docker client. + response = make_response('', 201) + response.headers['Docker-Content-Digest'] = digest + response.headers['Location'] = url_for('v2.download_blob', + repository='%s/%s' % (namespace_name, repo_name), + digest=digest) + return response @v2_bp.route('//blobs/uploads/', methods=['GET']) @@ -150,33 +165,141 @@ def start_blob_upload(namespace_name, repo_name): @require_repo_write @anon_protect def fetch_existing_upload(namespace_name, repo_name, upload_uuid): - try: - found = model.blob.get_blob_upload(namespace_name, repo_name, upload_uuid) - except model.InvalidBlobUpload: + blob_upload = v2.blob_upload_by_uuid(namespace_name, repo_name, upload_uuid) + if blob_upload is None: raise BlobUploadUnknown() - # Note: Docker byte ranges are exclusive so we have to add one to the byte count. accepted = make_response('', 204) - accepted.headers['Range'] = _render_range(found.byte_count + 1) - accepted.headers['Docker-Upload-UUID'] = upload_uuid + accepted.headers.extend({ + 'Docker-Upload-UUID': upload_uuid, + 'Range': _render_range(blob_upload.byte_count+1), # Docker byte ranges are exclusive + }) return accepted +@v2_bp.route('//blobs/uploads/', methods=['PATCH']) +@parse_repository_name() +@process_registry_jwt_auth(scopes=['pull', 'push']) +@require_repo_write +@anon_protect +def upload_chunk(namespace_name, repo_name, upload_uuid): + # Find the upload. + blob_upload = v2.blob_upload_by_uuid(namespace_name, repo_name, upload_uuid) + if blob_upload is None: + raise BlobUploadUnknown() + + # Upload the chunk to storage while calculating some metadata and updating + # the upload state. + updated_blob_upload = _upload_chunk(blob_upload, *_start_offset_and_length(request.headers)) + if updated_blob_upload is None: + _abort_range_not_satisfiable(updated_blob_upload.byte_count, upload_uuid) + + # Save the upload state to the database. + v2.update_blob_upload(updated_blob_upload) + + # Write the response to the Docker client. + accepted = make_response('', 204) + accepted.headers.extend({ + 'Location': _current_request_path(), + 'Range': _render_range(updated_blob_upload.byte_count, with_bytes_prefix=False), + 'Docker-Upload-UUID': upload_uuid, + }) + return accepted + + +@v2_bp.route('//blobs/uploads/', methods=['PUT']) +@parse_repository_name() +@process_registry_jwt_auth(scopes=['pull', 'push']) +@require_repo_write +@anon_protect +def monolithic_upload_or_last_chunk(namespace_name, repo_name, upload_uuid): + # Ensure the digest is present before proceeding. + digest = request.args.get('digest', None) + if digest is None: + raise BlobUploadInvalid() + + # Find the upload. + blob_upload = v2.blob_upload_by_uuid(namespace_name, repo_name, upload_uuid) + if blob_upload is None: + raise BlobUploadUnknown() + + # Upload the chunk to storage while calculating some metadata and updating + # the upload state. + updated_blob_upload = _upload_chunk(blob_upload, *_start_offset_and_length(request.headers)) + if updated_blob_upload is None: + _abort_range_not_satisfiable(updated_blob_upload.byte_count, upload_uuid) + + # Finalize the upload process in the database and storage. + _finish_upload(namespace_name, repo_name, updated_blob_upload, digest) + + # Write the response to the Docker client. + response = make_response('', 201) + response.headers.extend({ + 'Docker-Content-Digest': digest, + 'Location': url_for('v2.download_blob', repository='%s/%s' % (namespace_name, repo_name), + digest=digest) + }) + return response + + +@v2_bp.route('//blobs/uploads/', methods=['DELETE']) +@parse_repository_name() +@process_registry_jwt_auth(scopes=['pull', 'push']) +@require_repo_write +@anon_protect +def cancel_upload(namespace_name, repo_name, upload_uuid): + upload = v2.blob_upload_by_uuid(upload_uuid) + if upload is None: + raise BlobUploadUnknown() + + # We delete the record for the upload first, since if the partial upload in + # storage fails to delete, it doesn't break anything + v2.delete_blob_upload(upload_uuid) + storage.cancel_chunked_upload({upload.location_name}, upload.uuid, upload.storage_metadata) + + return make_response('', 204) + + +@v2_bp.route('//blobs/', methods=['DELETE']) +@parse_repository_name() +@process_registry_jwt_auth(scopes=['pull', 'push']) +@require_repo_write +@anon_protect +def delete_digest(namespace_name, repo_name, upload_uuid): + # We do not support deleting arbitrary digests, as they break repo images. + raise Unsupported() + + +def _render_range(num_uploaded_bytes, with_bytes_prefix=True): + return '{0}0-{1}'.format('bytes=' if with_bytes_prefix else '', num_uploaded_bytes - 1) + + def _current_request_path(): return '{0}{1}'.format(request.script_root, request.path) -def _range_not_satisfiable(valid_end): +def _abort_range_not_satisfiable(valid_end, upload_uuid): + """ + Writes a failure response for scenarios where the registry cannot function + with the provided range. + + TODO(jzelinskie): Unify this with the V2RegistryException class. + """ invalid_range = make_response('', 416) - invalid_range.headers['Location'] = _current_request_path() - invalid_range.headers['Range'] = '0-{0}'.format(valid_end) - invalid_range.headers['Docker-Upload-UUID'] = request.view_args['upload_uuid'] + invalid_range.headers.extend({ + 'Location': _current_request_path(), + 'Range': '0-{0}'.format(valid_end), + 'Docker-Upload-UUID': upload_uuid, + }) flask_abort(invalid_range) def _parse_range_header(range_header_text): - """ Parses the range header, and returns a tuple of the start offset and the length, - or raises an _InvalidRangeHeader exception. + """ + Parses the range header. + + Returns a tuple of the start offset and the length. + If the parse fails, raises _InvalidRangeHeader. """ found = RANGE_HEADER_REGEX.match(range_header_text) if found is None: @@ -191,60 +314,66 @@ def _parse_range_header(range_header_text): return (start, length) -def _upload_chunk(namespace_name, repo_name, upload_uuid): - """ Common code among the various uploading paths for appending data to blobs. - Callers MUST call .save() or .delete_instance() on the returned database object. - Returns the BlobUpload object and the error that occurred, if any (or None if none). +def _start_offset_and_length(headers): + """ + Returns a tuple of the start offset and the length. + If the range header doesn't exist, defaults to (0, -1). + If parsing fails, returns (None, None). """ - try: - found = model.blob.get_blob_upload(namespace_name, repo_name, upload_uuid) - except model.InvalidBlobUpload: - raise BlobUploadUnknown() - start_offset, length = 0, -1 - range_header = request.headers.get('range', None) + range_header = headers.get('range', None) if range_header is not None: try: start_offset, length = _parse_range_header(range_header) except _InvalidRangeHeader: - _range_not_satisfiable(found.byte_count) + return None, None + return start_offset, length - if start_offset > 0 and start_offset > found.byte_count: - _range_not_satisfiable(found.byte_count) - location_set = {found.location.name} +def _upload_chunk(blob_upload, start_offset, length): + """ + Calculates metadata while uploading a chunk to storage. + + Returns a BlobUpload object or None if there was a failure. + """ + # Check for invalidate arguments. + if None in {blob_upload, start_offset, length}: + return None + if start_offset > 0 and start_offset > blob_upload.byte_count: + return None + + location_set = {blob_upload.location_name} upload_error = None with database.CloseForLongOperation(app.config): input_fp = get_input_stream(request) - if start_offset > 0 and start_offset < found.byte_count: + if start_offset > 0 and start_offset < blob_upload.byte_count: # Skip the bytes which were received on a previous push, which are already stored and # included in the sha calculation - overlap_size = found.byte_count - start_offset + overlap_size = blob_upload.byte_count - start_offset input_fp = StreamSlice(input_fp, overlap_size) # Update our upload bounds to reflect the skipped portion of the overlap - start_offset = found.byte_count + start_offset = blob_upload.byte_count length = max(length - overlap_size, 0) # We use this to escape early in case we have already processed all of the bytes the user # wants to upload if length == 0: - return found, None + return blob_upload - input_fp = wrap_with_handler(input_fp, found.sha_state.update) + input_fp = wrap_with_handler(input_fp, blob_upload.sha_state.update) # Add a hasher for calculating SHA1s for torrents if this is the first chunk and/or we have # already calculated hash data for the previous chunk(s). piece_hasher = None - if found.chunk_count == 0 or found.piece_sha_state: - initial_sha1_value = found.piece_sha_state or resumablehashlib.sha1() - initial_sha1_pieces_value = found.piece_hashes or '' + if blob_upload.chunk_count == 0 or blob_upload.piece_sha_state: + initial_sha1_value = blob_upload.piece_sha_state or resumablehashlib.sha1() + initial_sha1_pieces_value = blob_upload.piece_hashes or '' piece_hasher = PieceHasher(app.config['BITTORRENT_PIECE_SIZE'], start_offset, - initial_sha1_pieces_value, - initial_sha1_value) + initial_sha1_pieces_value, initial_sha1_value) input_fp = wrap_with_handler(input_fp, piece_hasher.update) @@ -252,147 +381,114 @@ def _upload_chunk(namespace_name, repo_name, upload_uuid): # stream so we can determine the uncompressed size. We'll throw out this data if another chunk # comes in, but in the common case Docker only sends one chunk. size_info = None - if start_offset == 0 and found.chunk_count == 0: + if start_offset == 0 and blob_upload.chunk_count == 0: size_info, fn = calculate_size_handler() input_fp = wrap_with_handler(input_fp, fn) - chunk_result = storage.stream_upload_chunk(location_set, upload_uuid, start_offset, length, - input_fp, found.storage_metadata, - content_type=BLOB_CONTENT_TYPE) - length_written, new_metadata, upload_error = chunk_result + try: + length_written, new_metadata, error = storage.stream_upload_chunk( + location_set, + blob_upload.uuid, + start_offset, + length, + input_fp, + blob_upload.storage_metadata, + content_type=BLOB_CONTENT_TYPE, + ) + if error is not None: + return None + except InvalidChunkException: + return None # If we determined an uncompressed size and this is the first chunk, add it to the blob. # Otherwise, we clear the size from the blob as it was uploaded in multiple chunks. - if size_info is not None and found.chunk_count == 0 and size_info.is_valid: - found.uncompressed_byte_count = size_info.uncompressed_size + if size_info is not None and blob_upload.chunk_count == 0 and size_info.is_valid: + blob_upload.uncompressed_byte_count = size_info.uncompressed_size elif length_written > 0: # Otherwise, if we wrote some bytes and the above conditions were not met, then we don't # know the uncompressed size. - found.uncompressed_byte_count = None + blob_upload.uncompressed_byte_count = None if piece_hasher is not None: - found.piece_hashes = piece_hasher.piece_hashes - found.piece_sha_state = piece_hasher.hash_fragment - - found.storage_metadata = new_metadata - found.byte_count += length_written - found.chunk_count += 1 - return found, upload_error + blob_upload.piece_hashes = piece_hasher.piece_hashes + blob_upload.piece_sha_state = piece_hasher.hash_fragment + blob_upload.storage_metadata = new_metadata + blob_upload.byte_count += length_written + blob_upload.chunk_count += 1 + return blob_upload -def _finish_upload(namespace_name, repo_name, upload_obj, expected_digest): - # Verify that the digest's SHA matches that of the uploaded data. - computed_digest = digest_tools.sha256_digest_from_hashlib(upload_obj.sha_state) +def _validate_digest(blob_upload, expected_digest): + """ + Verifies that the digest's SHA matches that of the uploaded data. + """ + computed_digest = digest_tools.sha256_digest_from_hashlib(blob_upload.sha_state) if not digest_tools.digests_equal(computed_digest, expected_digest): logger.error('Digest mismatch for upload %s: Expected digest %s, found digest %s', upload_obj.uuid, expected_digest, computed_digest) raise BlobUploadInvalid(detail={'reason': 'Digest mismatch on uploaded blob'}) + +def _finalize_blob_storage(blob_upload, expected_digest): + """ + When an upload is successful, this ends the uploading process from the + storage's perspective. + + Returns True if the blob already existed. + """ final_blob_location = digest_tools.content_path(expected_digest) # Move the storage into place, or if this was a re-upload, cancel it with database.CloseForLongOperation(app.config): - already_exists = storage.exists({upload_obj.location.name}, final_blob_location) - if already_exists: - # It already existed, clean up our upload which served as proof that we had the file - storage.cancel_chunked_upload({upload_obj.location.name}, upload_obj.uuid, - upload_obj.storage_metadata) + already_existed = storage.exists({blob_upload.location_name}, final_blob_location) + if already_existed: + # It already existed, clean up our upload which served as proof that the + # uploader had the blob. + storage.cancel_chunked_upload({blob_upload.location_name}, blob_upload.uuid, + blob_upload.storage_metadata) else: # We were the first ones to upload this image (at least to this location) # Let's copy it into place - storage.complete_chunked_upload({upload_obj.location.name}, upload_obj.uuid, - final_blob_location, upload_obj.storage_metadata) - - # Mark the blob as uploaded. - blob_storage = model.blob.store_blob_record_and_temp_link(namespace_name, repo_name, expected_digest, - upload_obj.location, - upload_obj.byte_count, - app.config['PUSH_TEMP_TAG_EXPIRATION_SEC'], - upload_obj.uncompressed_byte_count) - - if upload_obj.piece_sha_state is not None and not already_exists: - piece_bytes = upload_obj.piece_hashes + upload_obj.piece_sha_state.digest() - model.storage.save_torrent_info(blob_storage, app.config['BITTORRENT_PIECE_SIZE'], piece_bytes) - - # Delete the upload tracking row. - upload_obj.delete_instance() - - response = make_response('', 201) - response.headers['Docker-Content-Digest'] = expected_digest - response.headers['Location'] = url_for('v2.download_blob', - repository='%s/%s' % (namespace_name, repo_name), - digest=expected_digest) - return response + storage.complete_chunked_upload({blob_upload.location_name}, blob_upload.uuid, + final_blob_location, blob_upload.storage_metadata) + return already_existed -@v2_bp.route('//blobs/uploads/', methods=['PATCH']) -@parse_repository_name() -@process_registry_jwt_auth(scopes=['pull', 'push']) -@require_repo_write -@anon_protect -def upload_chunk(namespace_name, repo_name, upload_uuid): - blob_upload, upload_error = _upload_chunk(namespace_name, repo_name, upload_uuid) - blob_upload.save() +def _finalize_blob_database(namespace_name, repo_name, blob_upload, digest, already_existed): + """ + When an upload is successful, this ends the uploading process from the + database's perspective. + """ + # Create the blob and temporarily tag it. + blob_storage = v2.create_blob_and_temp_tag( + namespace_name, + repo_name, + digest, + blob_upload.location_name, + blob_upload.byte_count, + blob_upload.uncompressed_byte_count, + app.config['PUSH_TEMP_TAG_EXPIRATION_SEC'], + ) - if upload_error: - logger.error('Got error when uploading chunk for blob %s under repository %s/%s: %s', - namespace_name, repo_name, upload_uuid, upload_error) - _range_not_satisfiable(blob_upload.byte_count) + # If it doesn't already exist, create the BitTorrent pieces for the blob. + if blob_upload.piece_sha_state is not None and not already_existed: + piece_bytes = blob_upload.piece_hashes + blob_upload.piece_sha_state.digest() + v2.create_bittorrent_pieces(blob_storage, app.config['BITTORRENT_PIECE_SIZE'], piece_bytes) - accepted = make_response('', 204) - accepted.headers['Location'] = _current_request_path() - accepted.headers['Range'] = _render_range(blob_upload.byte_count, with_bytes_prefix=False) - accepted.headers['Docker-Upload-UUID'] = upload_uuid - return accepted + # Delete the blob upload. + v2.delete_upload(blob_upload.uuid) -@v2_bp.route('//blobs/uploads/', methods=['PUT']) -@parse_repository_name() -@process_registry_jwt_auth(scopes=['pull', 'push']) -@require_repo_write -@anon_protect -def monolithic_upload_or_last_chunk(namespace_name, repo_name, upload_uuid): - digest = request.args.get('digest', None) - if digest is None: - raise BlobUploadInvalid(detail={'reason': 'Missing digest arg on monolithic upload'}) - - blob_upload, upload_error = _upload_chunk(namespace_name, repo_name, upload_uuid) - blob_upload.save() - - if upload_error: - logger.error('Got error when uploading chunk for blob %s under repository %s/%s: %s', - namespace_name, repo_name, upload_uuid, upload_error) - _range_not_satisfiable(blob_upload.byte_count) - - return _finish_upload(namespace_name, repo_name, blob_upload, digest) - - -@v2_bp.route('//blobs/uploads/', methods=['DELETE']) -@parse_repository_name() -@process_registry_jwt_auth(scopes=['pull', 'push']) -@require_repo_write -@anon_protect -def cancel_upload(namespace_name, repo_name, upload_uuid): - try: - found = model.blob.get_blob_upload(namespace_name, repo_name, upload_uuid) - except model.InvalidBlobUpload: - raise BlobUploadUnknown() - - # We delete the record for the upload first, since if the partial upload in - # storage fails to delete, it doesn't break anything - found.delete_instance() - storage.cancel_chunked_upload({found.location.name}, found.uuid, found.storage_metadata) - - return make_response('', 204) - - - -@v2_bp.route('//blobs/', methods=['DELETE']) -@parse_repository_name() -@process_registry_jwt_auth(scopes=['pull', 'push']) -@require_repo_write -@anon_protect -def delete_digest(namespace_name, repo_name, upload_uuid): - # We do not support deleting arbitrary digests, as they break repo images. - raise Unsupported() +def _finish_upload(namespace_name, repo_name, blob_upload, digest): + """ + When an upload is successful, this ends the uploading process. + """ + _validate_digest(blob_upload, digest) + _finalize_blob_database( + namespace_name, + repo_name, + blob_upload, + digest, + _finalize_blob_storage(blob_upload, digest), + ) From a516c08deba590b8007173d360746a326183fa8c Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Mon, 1 Aug 2016 20:59:39 -0400 Subject: [PATCH 14/34] v2: refactor auth to use data.types --- data/model/v2.py | 8 ++++++++ endpoints/v2/v2auth.py | 16 +++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/data/model/v2.py b/data/model/v2.py index 6e9e570f8..52a4d1e3d 100644 --- a/data/model/v2.py +++ b/data/model/v2.py @@ -7,6 +7,14 @@ from data.types import ( Tag, ) +def create_repository(namespace_name, repo_name, user): + model.repository.create_repository(namespace, reponame, user) + + +def repository_is_public(namespace_name, repo_name): + model.repository.repository_is_public(namespace, reponame)): + + def get_repository(namespace_name, repo_name): repo = model.repository.get_repository(namespace_name, repo_name) if repo is None: diff --git a/endpoints/v2/v2auth.py b/endpoints/v2/v2auth.py index 91de73fa4..e5a617df8 100644 --- a/endpoints/v2/v2auth.py +++ b/endpoints/v2/v2auth.py @@ -5,7 +5,6 @@ from cachetools import lru_cache from flask import request, jsonify, abort from app import app, userevents, instance_keys -from data import model from auth.auth import process_auth from auth.auth_context import get_authenticated_user, get_validated_token, get_validated_oauth_token from auth.permissions import (ModifyRepositoryPermission, ReadRepositoryPermission, @@ -21,9 +20,7 @@ logger = logging.getLogger(__name__) TOKEN_VALIDITY_LIFETIME_S = 60 * 60 # 1 hour -SCOPE_REGEX_TEMPLATE = ( - r'^repository:((?:{}\/)?((?:[\.a-zA-Z0-9_\-]+\/)?[\.a-zA-Z0-9_\-]+)):((?:push|pull|\*)(?:,(?:push|pull|\*))*)$' -) +SCOPE_REGEX_TEMPLATE = r'^repository:((?:{}\/)?((?:[\.a-zA-Z0-9_\-]+\/)?[\.a-zA-Z0-9_\-]+)):((?:push|pull|\*)(?:,(?:push|pull|\*))*)$' @lru_cache(maxsize=1) @@ -38,8 +35,9 @@ def get_scope_regex(): @no_cache @anon_protect def generate_registry_jwt(): - """ This endpoint will generate a JWT conforming to the Docker registry v2 auth spec: - https://docs.docker.com/registry/spec/auth/token/ + """ + This endpoint will generate a JWT conforming to the Docker Registry v2 Auth Spec: + https://docs.docker.com/registry/spec/auth/token/ """ audience_param = request.args.get('service') logger.debug('Request audience: %s', audience_param) @@ -97,7 +95,7 @@ def generate_registry_jwt(): if user is not None or token is not None: # Lookup the repository. If it exists, make sure the entity has modify # permission. Otherwise, make sure the entity has create permission. - repo = model.repository.get_repository(namespace, reponame) + repo = v2.get_repository(namespace, reponame) if repo: if ModifyRepositoryPermission(namespace, reponame).can(): final_actions.append('push') @@ -106,7 +104,7 @@ def generate_registry_jwt(): else: if CreateRepositoryPermission(namespace).can() and user is not None: logger.debug('Creating repository: %s/%s', namespace, reponame) - model.repository.create_repository(namespace, reponame, user) + v2.create_repository(namespace, reponame, user) final_actions.append('push') else: logger.debug('No permission to create repository %s/%s', namespace, reponame) @@ -114,7 +112,7 @@ def generate_registry_jwt(): if 'pull' in actions: # Grant pull if the user can read the repo or it is public. if (ReadRepositoryPermission(namespace, reponame).can() or - model.repository.repository_is_public(namespace, reponame)): + v2.repository_is_public(namespace, reponame)): final_actions.append('pull') else: logger.debug('No permission to pull repository %s/%s', namespace, reponame) From 32a6c22b43a88e8c2461f8451484fc9c9a631514 Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Tue, 2 Aug 2016 18:45:30 -0400 Subject: [PATCH 15/34] mv data/types image This change also merges formats into the new image module. --- data/model/v2.py | 25 +-- endpoints/v2/__init__.py | 2 +- endpoints/v2/blob.py | 9 +- endpoints/v2/manifest.py | 27 +-- endpoints/verbs.py | 18 +- formats/__init__.py | 0 formats/tarimageformatter.py | 56 ------ image/__init__.py | 103 ++++++++++ formats/aci.py => image/appc/__init__.py | 43 ++-- image/docker/__init__.py | 10 + data/types.py => image/docker/schema1.py | 245 ++++++++++++----------- image/docker/schema2.py | 11 + {formats => image/docker}/squashed.py | 35 ++-- image/docker/v1.py | 16 ++ 14 files changed, 342 insertions(+), 258 deletions(-) delete mode 100644 formats/__init__.py delete mode 100644 formats/tarimageformatter.py create mode 100644 image/__init__.py rename formats/aci.py => image/appc/__init__.py (86%) create mode 100644 image/docker/__init__.py rename data/types.py => image/docker/schema1.py (55%) create mode 100644 image/docker/schema2.py rename {formats => image/docker}/squashed.py (85%) create mode 100644 image/docker/v1.py diff --git a/data/model/v2.py b/data/model/v2.py index 52a4d1e3d..197ebebe0 100644 --- a/data/model/v2.py +++ b/data/model/v2.py @@ -1,11 +1,5 @@ -from data.types import ( - Blob, - BlobUpload, - DockerV1Metadata, - ManifestJSON, - Repository, - Tag, -) +from image import Blob, BlobUpload, ManifestJSON, Repository, Tag +from image.docker.v1 import DockerV1Metadata def create_repository(namespace_name, repo_name, user): model.repository.create_repository(namespace, reponame, user) @@ -75,14 +69,13 @@ def docker_v1_metadata_by_tag(namespace_name, repo_name, tag_name): def docker_v1_metadata_by_image_id(namespace_name, repo_name, image_ids): images_query = model.image.lookup_repository_images(repo, all_image_ids) - return [DockerV1Metadata( - namespace_name=namespace_name, - repo_name=repo_name, - image_id=image.docker_image_id, - checksum=image.v1_checksum, - content_checksum=image.content_checksum, - compat_json=image.v1_json_metadata, - ) for image in images_query] + return {image.docker_image_id: DockerV1Metadata(namespace_name=namespace_name, + repo_name=repo_name, + image_id=image.docker_image_id, + checksum=image.v1_checksum, + content_checksum=image.content_checksum, + compat_json=image.v1_json_metadata) + for image in images_query} def get_parents_docker_v1_metadata(namespace_name, repo_name, image_id): diff --git a/endpoints/v2/__init__.py b/endpoints/v2/__init__.py index 6ac99ecdf..97ecc40e6 100644 --- a/endpoints/v2/__init__.py +++ b/endpoints/v2/__init__.py @@ -54,7 +54,7 @@ def paginate(limit_kwarg_name='limit', offset_kwarg_name='offset', def callback(num_results, response): if num_results <= limit: return - next_page_token = encrypt_page_token({'offset': limit+offset}) + next_page_token = encrypt_page_token({'offset': limit + offset}) link = get_app_url() + url_for(request.endpoint, **request.view_args) link += '?%s; rel="next"' % urlencode({'n': limit, 'next_page': next_page_token}) response.headers['Link'] = link diff --git a/endpoints/v2/blob.py b/endpoints/v2/blob.py index a5836bb61..bd2dc2a5b 100644 --- a/endpoints/v2/blob.py +++ b/endpoints/v2/blob.py @@ -216,7 +216,7 @@ def monolithic_upload_or_last_chunk(namespace_name, repo_name, upload_uuid): # Ensure the digest is present before proceeding. digest = request.args.get('digest', None) if digest is None: - raise BlobUploadInvalid() + raise BlobUploadInvalid(detail={'reason': 'Missing digest arg on monolithic upload'}) # Find the upload. blob_upload = v2.blob_upload_by_uuid(namespace_name, repo_name, upload_uuid) @@ -271,6 +271,9 @@ def delete_digest(namespace_name, repo_name, upload_uuid): def _render_range(num_uploaded_bytes, with_bytes_prefix=True): + """ + Returns a string formatted to be used in the Range header. + """ return '{0}0-{1}'.format('bytes=' if with_bytes_prefix else '', num_uploaded_bytes - 1) @@ -327,6 +330,7 @@ def _start_offset_and_length(headers): start_offset, length = _parse_range_header(range_header) except _InvalidRangeHeader: return None, None + return start_offset, length @@ -339,6 +343,7 @@ def _upload_chunk(blob_upload, start_offset, length): # Check for invalidate arguments. if None in {blob_upload, start_offset, length}: return None + if start_offset > 0 and start_offset > blob_upload.byte_count: return None @@ -425,7 +430,7 @@ def _validate_digest(blob_upload, expected_digest): computed_digest = digest_tools.sha256_digest_from_hashlib(blob_upload.sha_state) if not digest_tools.digests_equal(computed_digest, expected_digest): logger.error('Digest mismatch for upload %s: Expected digest %s, found digest %s', - upload_obj.uuid, expected_digest, computed_digest) + blob_upload.uuid, expected_digest, computed_digest) raise BlobUploadInvalid(detail={'reason': 'Digest mismatch on uploaded blob'}) diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index cb92b1ebe..a5adfac89 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -9,13 +9,6 @@ import features from app import docker_v2_signing_key, app, metric_queue from auth.registry_jwt_auth import process_registry_jwt_auth from data import model -from data.types import ( - DockerSchema1Manifest, - DockerSchema1ManifestBuilder, - ManifestException, - DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE, - DOCKER_SCHEMA2_CONTENT_TYPES, -) from digest import digest_tools from endpoints.common import parse_repository_name from endpoints.decorators import anon_protect @@ -24,6 +17,9 @@ from endpoints.v2.errors import (BlobUnknown, ManifestInvalid, ManifestUnknown, NameInvalid) from endpoints.trackhelper import track_and_log from endpoints.notificationhelper import spawn_notification +from image.docker import ManifestException +from image.docker.schema1 import DockerSchema1Manifest, DockerSchema1ManifestBuilder +from image.docker.schema2 import DOCKER_SCHEMA2_CONTENT_TYPES from util.registry.replication import queue_storage_replication from util.names import VALID_TAG_PATTERN @@ -56,7 +52,7 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, tag_name): metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2']) response = make_response(manifest.bytes, 200) - response.headers['Content-Type'] = DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE + response.headers['Content-Type'] = manifest.content_type response.headers['Docker-Content-Digest'] = manifest.digest return response @@ -78,7 +74,7 @@ def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref): metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2']) response = make_response(manifest.json, 200) - response.headers['Content-Type'] = DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE + response.headers['Content-Type'] = manifest.content_type response.headers['Docker-Content-Digest'] = manifest.digest return response @@ -151,16 +147,15 @@ def _write_manifest(namespace_name, repo_name, manifest): # Ensure all the blobs in the manifest exist. storage_query = model.storage.lookup_repo_storages_by_content_checksum(repo, manifest.checksums) storage_map = {storage.content_checksum: storage for storage in storage_query} - for extracted_layer_metadata in manifest.layers: - digest_str = str(extracted_layer_metadata.digest) + for layer in manifest.layers: + digest_str = str(layer.digest) if digest_str not in storage_map: raise BlobUnknown(detail={'digest': digest_str}) # Lookup all the images and their parent images (if any) inside the manifest. # This will let us know which v1 images we need to synthesize and which ones are invalid. all_image_ids = list(manifest.docker_image_ids | manifest.parent_image_ids) - images = v2.docker_v1_metadata_by_image_id(namespace_name, repo_name, all_image_ids) - images_map = {image.image_id: image for image in images} + images_map = v2.docker_v1_metadata_by_image_id(namespace_name, repo_name, all_image_ids) # Rewrite any v1 image IDs that do not match the checksum in the database. try: @@ -181,14 +176,14 @@ def _write_manifest(namespace_name, repo_name, manifest): raise ManifestInvalid(detail={'message': me.message}) # Store the manifest pointing to the tag. - leaf_layer_id = images_map[manifest.layers[-1].v1_metadata.image_id].image_id + leaf_layer_id = images_map[manifest.leaf_layer.v1_metadata.image_id].image_id v2.save_manifest(namespace_name, repo_name, tag_name, leaf_layer_id, manifest.digest, manifest.bytes) # Queue all blob manifests for replication. # TODO(jschorr): Find a way to optimize this insertion. if features.STORAGE_REPLICATION: - for extracted_v1_metadata in manifest.layers: - digest_str = str(extracted_v1_metadata.digest) + for layer in manifest.layers: + digest_str = str(layer.digest) queue_storage_replication(namespace_name, storage_map[digest_str]) track_and_log('push_repo', repo, tag=manifest.tag) diff --git a/endpoints/verbs.py b/endpoints/verbs.py index eff2ca35c..933fc9b0a 100644 --- a/endpoints/verbs.py +++ b/endpoints/verbs.py @@ -11,18 +11,18 @@ from auth.auth import process_auth from auth.auth_context import get_authenticated_user from auth.permissions import ReadRepositoryPermission from data import model, database -from endpoints.trackhelper import track_and_log +from endpoints.common import route_show_if, parse_repository_name from endpoints.decorators import anon_protect +from endpoints.trackhelper import track_and_log +from endpoints.v2.blob import BLOB_DIGEST_ROUTE +from image.appc import AppCImageFormatter +from image.docker.squashed import SquashedDockerImageFormatter +from storage import Storage +from util.registry.filelike import wrap_with_handler from util.registry.queuefile import QueueFile from util.registry.queueprocess import QueueProcess from util.registry.torrent import (make_torrent, per_user_torrent_filename, public_torrent_filename, PieceHasher) -from util.registry.filelike import wrap_with_handler -from formats.squashed import SquashedDockerImage -from formats.aci import ACIImage -from storage import Storage -from endpoints.v2.blob import BLOB_DIGEST_ROUTE -from endpoints.common import route_show_if, parse_repository_name verbs = Blueprint('verbs', __name__) @@ -372,7 +372,7 @@ def get_aci_signature(server, namespace, repository, tag, os, arch): @verbs.route('/aci/////aci///', methods=['GET', 'HEAD']) @process_auth def get_aci_image(server, namespace, repository, tag, os, arch): - return _repo_verb(namespace, repository, tag, 'aci', ACIImage(), + return _repo_verb(namespace, repository, tag, 'aci', AppCImageFormatter(), sign=True, checker=os_arch_checker(os, arch), os=os, arch=arch) @@ -380,7 +380,7 @@ def get_aci_image(server, namespace, repository, tag, os, arch): @verbs.route('/squash///', methods=['GET']) @process_auth def get_squashed_tag(namespace, repository, tag): - return _repo_verb(namespace, repository, tag, 'squash', SquashedDockerImage()) + return _repo_verb(namespace, repository, tag, 'squash', SquashedDockerImageFormatter()) @route_show_if(features.BITTORRENT) diff --git a/formats/__init__.py b/formats/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/formats/tarimageformatter.py b/formats/tarimageformatter.py deleted file mode 100644 index 2274af85e..000000000 --- a/formats/tarimageformatter.py +++ /dev/null @@ -1,56 +0,0 @@ -import tarfile -from util.registry.gzipwrap import GzipWrap - -class TarImageFormatter(object): - """ Base class for classes which produce a TAR containing image and layer data. """ - - def build_stream(self, namespace, repository, tag, synthetic_image_id, layer_json, - get_image_iterator, get_layer_iterator, get_image_json): - """ Builds and streams a synthetic .tar.gz that represents the formatted TAR created by this - class's implementation. - """ - return GzipWrap(self.stream_generator(namespace, repository, tag, - synthetic_image_id, layer_json, - get_image_iterator, get_layer_iterator, - get_image_json)) - - def stream_generator(self, namespace, repository, tag, synthetic_image_id, - layer_json, get_image_iterator, get_layer_iterator, get_image_json): - raise NotImplementedError - - def tar_file(self, name, contents, mtime=None): - """ Returns the TAR binary representation for a file with the given name and file contents. """ - length = len(contents) - tar_data = self.tar_file_header(name, length, mtime=mtime) - tar_data += contents - tar_data += self.tar_file_padding(length) - return tar_data - - def tar_file_padding(self, length): - """ Returns TAR file padding for file data of the given length. """ - if length % 512 != 0: - return '\0' * (512 - (length % 512)) - - return '' - - def tar_file_header(self, name, file_size, mtime=None): - """ Returns TAR file header data for a file with the given name and size. """ - info = tarfile.TarInfo(name=name) - info.type = tarfile.REGTYPE - info.size = file_size - - if mtime is not None: - info.mtime = mtime - return info.tobuf() - - def tar_folder(self, name, mtime=None): - """ Returns TAR file header data for a folder with the given name. """ - info = tarfile.TarInfo(name=name) - info.type = tarfile.DIRTYPE - - if mtime is not None: - info.mtime = mtime - - # allow the directory to be readable by non-root users - info.mode = 0755 - return info.tobuf() diff --git a/image/__init__.py b/image/__init__.py new file mode 100644 index 000000000..e09f7ae72 --- /dev/null +++ b/image/__init__.py @@ -0,0 +1,103 @@ +import tarfile + +from collections import namedtuple + +from util.registry.gzipwrap import GzipWrap + + +class ManifestJSON(namedtuple('ManifestJSON', ['digest', 'json', 'media_type'])): + """ + ManifestJSON represents a Manifest of any format. + """ + + +class Repository(namedtuple('Repository', ['id', 'name', 'namespace_name'])): + """ + Repository represents a collection of tags. + """ + + +class Tag(namedtuple('Tag', ['name', 'repository'])): + """ + Tag represents a user-facing alias for referencing a set of Manifests. + """ + + +class BlobUpload(namedtuple('BlobUpload', ['uuid', 'byte_count', 'uncompressed_byte_count', + 'chunk_count', 'sha_state', 'location_name', + 'storage_metadata', 'piece_sha_state', 'piece_hashes'])): + """ + BlobUpload represents the current state of an Blob being uploaded. + """ + + +class Blob(namedtuple('Blob', ['digest', 'size', 'locations'])): + """ + Blob represents an opaque binary blob saved to the storage system. + """ + + +class TarImageFormatter(object): + """ + Base class for classes which produce a tar containing image and layer data. + """ + + def build_stream(self, namespace, repository, tag, synthetic_image_id, layer_json, + get_image_iterator, get_layer_iterator, get_image_json): + """ + Builds and streams a synthetic .tar.gz that represents the formatted tar created by this class's + implementation. + """ + return GzipWrap(self.stream_generator(namespace, repository, tag, + synthetic_image_id, layer_json, + get_image_iterator, get_layer_iterator, + get_image_json)) + + def stream_generator(self, namespace, repository, tag, synthetic_image_id, + layer_json, get_image_iterator, get_layer_iterator, get_image_json): + raise NotImplementedError + + def tar_file(self, name, contents, mtime=None): + """ + Returns the tar binary representation for a file with the given name and file contents. + """ + length = len(contents) + tar_data = self.tar_file_header(name, length, mtime=mtime) + tar_data += contents + tar_data += self.tar_file_padding(length) + return tar_data + + def tar_file_padding(self, length): + """ + Returns tar file padding for file data of the given length. + """ + if length % 512 != 0: + return '\0' * (512 - (length % 512)) + + return '' + + def tar_file_header(self, name, file_size, mtime=None): + """ + Returns tar file header data for a file with the given name and size. + """ + info = tarfile.TarInfo(name=name) + info.type = tarfile.REGTYPE + info.size = file_size + + if mtime is not None: + info.mtime = mtime + return info.tobuf() + + def tar_folder(self, name, mtime=None): + """ + Returns tar file header data for a folder with the given name. + """ + info = tarfile.TarInfo(name=name) + info.type = tarfile.DIRTYPE + + if mtime is not None: + info.mtime = mtime + + # allow the directory to be readable by non-root users + info.mode = 0755 + return info.tobuf() diff --git a/formats/aci.py b/image/appc/__init__.py similarity index 86% rename from formats/aci.py rename to image/appc/__init__.py index c24f691bd..592825e43 100644 --- a/formats/aci.py +++ b/image/appc/__init__.py @@ -6,14 +6,15 @@ from uuid import uuid4 from app import app from util.registry.streamlayerformat import StreamLayerMerger -from formats.tarimageformatter import TarImageFormatter +from image import TarImageFormatter ACNAME_REGEX = re.compile(r'[^a-z-]+') -class ACIImage(TarImageFormatter): - """ Image formatter which produces an ACI-compatible TAR. +class AppCImageFormatter(TarImageFormatter): + """ + Image formatter which produces an tarball according to the AppC specification. """ def stream_generator(self, namespace, repository, tag, synthetic_image_id, @@ -40,7 +41,9 @@ class ACIImage(TarImageFormatter): @staticmethod def _build_isolators(docker_config): - """ Builds ACI isolator config from the docker config. """ + """ + Builds ACI isolator config from the docker config. + """ def _isolate_memory(memory): return { @@ -107,22 +110,24 @@ class ACIImage(TarImageFormatter): @staticmethod def _build_ports(docker_config): - """ Builds the ports definitions for the ACI. """ + """ + Builds the ports definitions for the ACI. + + Formats: + port/tcp + port/udp + port + """ ports = [] - for docker_port_definition in ACIImage._get_docker_config_value(docker_config, 'Ports', []): - # Formats: - # port/tcp - # port/udp - # port - + for docker_port in AppCImageFormatter._get_docker_config_value(docker_config, 'Ports', []): protocol = 'tcp' port_number = -1 - if '/' in docker_port_definition: - (port_number, protocol) = docker_port_definition.split('/') + if '/' in docker_port: + (port_number, protocol) = docker_port.split('/') else: - port_number = docker_port_definition + port_number = docker_port try: port_number = int(port_number) @@ -149,9 +154,9 @@ class ACIImage(TarImageFormatter): volumes = [] def get_name(docker_volume_path): - return "volume-%s" % ACIImage._ac_name(docker_volume_path) + return "volume-%s" % AppCImageFormatter._ac_name(docker_volume_path) - for docker_volume_path in ACIImage._get_docker_config_value(docker_config, 'Volumes', []): + for docker_volume_path in AppCImageFormatter._get_docker_config_value(docker_config, 'Volumes', []): if not docker_volume_path: continue @@ -219,9 +224,9 @@ class ACIImage(TarImageFormatter): "eventHandlers": [], "workingDirectory": config.get('WorkingDir', '') or '/', "environment": [{"name": key, "value": value} for (key, value) in env_vars], - "isolators": ACIImage._build_isolators(config), - "mountPoints": ACIImage._build_volumes(config), - "ports": ACIImage._build_ports(config), + "isolators": AppCImageFormatter._build_isolators(config), + "mountPoints": AppCImageFormatter._build_volumes(config), + "ports": AppCImageFormatter._build_ports(config), "annotations": [ {"name": "created", "value": docker_layer_data.get('created', '')}, {"name": "homepage", "value": source_url}, diff --git a/image/docker/__init__.py b/image/docker/__init__.py new file mode 100644 index 000000000..74ceba2d7 --- /dev/null +++ b/image/docker/__init__.py @@ -0,0 +1,10 @@ +""" +docker implements pure data transformations according to the many Docker specifications. +""" + +class DockerException(Exception): + pass + + +class ManifestException(DockerException): + pass diff --git a/data/types.py b/image/docker/schema1.py similarity index 55% rename from data/types.py rename to image/docker/schema1.py index e93c06539..f154b7d21 100644 --- a/data/types.py +++ b/image/docker/schema1.py @@ -1,5 +1,11 @@ -import json +""" +schema1 implements pure data transformations according to the Docker Manifest v2.1 Specification. + +https://github.com/docker/distribution/blob/master/docs/spec/manifest-v2-1.md +""" + import hashlib +import json import logging from collections import namedtuple, OrderedDict @@ -9,72 +15,72 @@ from jwkest.jws import SIGNER_ALGS, keyrep from jwt.utils import base64url_encode, base64url_decode from digest import digest_tools +from image.docker import ManifestException +from image.docker.v1 import DockerV1Metadata logger = logging.getLogger(__name__) -DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+prettyjws' -DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v2+json' -DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.list.v2+json' +# Content Types +DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+json' +DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+prettyjws' +DOCKER_SCHEMA1_CONTENT_TYPES = [DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE, + DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE] -DOCKER_SCHEMA2_CONTENT_TYPES = [DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, - DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE] +# Keys for signature-related data +DOCKER_SCHEMA1_SIGNATURES_KEY = 'signatures' +DOCKER_SCHEMA1_HEADER_KEY = 'header' +DOCKER_SCHEMA1_SIGNATURE_KEY = 'signature' +DOCKER_SCHEMA1_PROTECTED_KEY = 'protected' +DOCKER_SCHEMA1_FORMAT_LENGTH_KEY = 'formatLength' +DOCKER_SCHEMA1_FORMAT_TAIL_KEY = 'formatTail' +# Keys for manifest-related data +DOCKER_SCHEMA1_REPO_NAME_KEY = 'name' +DOCKER_SCHEMA1_REPO_TAG_KEY = 'tag' +DOCKER_SCHEMA1_ARCH_KEY = 'architecture' +DOCKER_SCHEMA1_FS_LAYERS_KEY = 'fsLayers' +DOCKER_SCHEMA1_BLOB_SUM_KEY = 'blobSum' +DOCKER_SCHEMA1_HISTORY_KEY = 'history' +DOCKER_SCHEMA1_V1_COMPAT_KEY = 'v1Compatibility' +DOCKER_SCHEMA1_SCHEMA_VER_KEY = 'schemaVersion' -# These are used to extract backwards compatiblity data from Docker Manifest Schema 1 -ExtractedLayerMetadata = namedtuple( - 'ExtractedLayerMetadata', - ['digest', 'v1_metadata', 'v1_metadata_str'] -) -ExtractedDockerV1Metadata = namedtuple( - 'ExtractedDockerV1Metadata', - ['image_id', 'parent_image_id', 'created', 'comment', 'command'] -) - - -# Constants used for Docker Manifest Schema 2.1 -_DOCKER_SCHEMA_1_SIGNATURES_KEY = 'signatures' -_DOCKER_SCHEMA_1_PROTECTED_KEY = 'protected' -_DOCKER_SCHEMA_1_FORMAT_LENGTH_KEY = 'formatLength' -_DOCKER_SCHEMA_1_FORMAT_TAIL_KEY = 'formatTail' -_DOCKER_SCHEMA_1_REPO_NAME_KEY = 'name' -_DOCKER_SCHEMA_1_REPO_TAG_KEY = 'tag' -_DOCKER_SCHEMA_1_FS_LAYERS_KEY = 'fsLayers' -_DOCKER_SCHEMA_1_HISTORY_KEY = 'history' -_DOCKER_SCHEMA_1_BLOB_SUM_KEY = 'blobSum' -_DOCKER_SCHEMA_1_V1_COMPAT_KEY = 'v1Compatibility' -_DOCKER_SCHEMA_1_ARCH_KEY = 'architecture' -_DOCKER_SCHEMA_1_SCHEMA_VER_KEY = 'schemaVersion' +# Format for time used in the protected payload. _ISO_DATETIME_FORMAT_ZULU = '%Y-%m-%dT%H:%M:%SZ' -_JWS_ALGORITHM = 'RS256' + +# The algorithm we use to sign the JWS. +_JWS_SIGNING_ALGORITHM = 'RS256' -class ManifestException(Exception): +class MalformedSchema1Manifest(ManifestException): + """ + Raised when a manifest fails an assertion that should be true according to the Docker Manifest + v2.1 Specification. + """ pass -class ManifestMalformed(ManifestException): +class InvalidSchema1Signature(ManifestException): + """ + Raised when there is a failure verifying the signature of a signed Docker 2.1 Manifest. + """ pass -class ManifestSignatureFailure(ManifestException): - pass +class Schema1Layer(namedtuple('Schema1Layer', ['digest', 'v1_metadata', 'raw_v1_metadata'])): + """ + Represents all of the data about an individual layer in a given Manifest. + This is the union of the fsLayers (digest) and the history entries (v1_compatibility). + """ -def _updated_v1_metadata(v1_metadata_json, updated_id_map): - parsed = json.loads(v1_metadata_json) - parsed['id'] = updated_id_map[parsed['id']] - - if parsed.get('parent') and parsed['parent'] in updated_id_map: - parsed['parent'] = updated_id_map[parsed['parent']] - - if parsed.get('container_config', {}).get('Image'): - existing_image = parsed['container_config']['Image'] - if existing_image in updated_id_map: - parsed['container_config']['image'] = updated_id_map[existing_image] - - return json.dumps(parsed) +class Schema1V1Metadata(namedtuple('Schema1V1Metadata', ['image_id', 'parent_image_id', 'created', + 'comment', 'command'])): + """ + Represents the necessary data extracted from the v1 compatibility string in a given layer of a + Manifest. + """ class DockerSchema1Manifest(object): @@ -83,17 +89,18 @@ class DockerSchema1Manifest(object): self._bytes = manifest_bytes self._parsed = json.loads(manifest_bytes) - self._signatures = self._parsed[_DOCKER_SCHEMA_1_SIGNATURES_KEY] - self._tag = self._parsed[_DOCKER_SCHEMA_1_REPO_TAG_KEY] + self._signatures = self._parsed[DOCKER_SCHEMA1_SIGNATURES_KEY] + self._tag = self._parsed[DOCKER_SCHEMA1_REPO_TAG_KEY] - repo_name_tuple = self._parsed[_DOCKER_SCHEMA_1_REPO_NAME_KEY].split('/') + repo_name = self._parsed[DOCKER_SCHEMA1_REPO_NAME_KEY] + repo_name_tuple = repo_name.split('/') if len(repo_name_tuple) > 1: self._namespace, self._repo_name = repo_name_tuple elif len(repo_name_tuple) == 1: self._namespace = '' self._repo_name = repo_name_tuple[0] else: - raise ManifestMalformed('malformed repository name') + raise MalformedSchema1Manifest('malformed repository name: %s' % repo_name) if validate: self._validate() @@ -108,7 +115,11 @@ class DockerSchema1Manifest(object): sig = base64url_decode(signature['signature'].encode('utf-8')) verified = signer.verify(bytes_to_verify, sig, gk) if not verified: - raise ManifestSignatureFailure() + raise InvalidSchema1Signature() + + @property + def content_type(self): + return DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE @property def signatures(self): @@ -151,6 +162,10 @@ class DockerSchema1Manifest(object): def checksums(self): return list({str(mdata.digest) for mdata in self.layers}) + @property + def leaf_layer(self): + return self.layers[-1] + @property def layers(self): if self._layers is None: @@ -158,38 +173,39 @@ class DockerSchema1Manifest(object): return self._layers def _generate_layers(self): - """ Returns a generator of objects that have the blobSum and v1Compatibility keys in them, - starting from the base image and working toward the leaf node. """ - for blob_sum_obj, history_obj in reversed(zip(self._parsed[_DOCKER_SCHEMA_1_FS_LAYERS_KEY], - self._parsed[_DOCKER_SCHEMA_1_HISTORY_KEY])): + Returns a generator of objects that have the blobSum and v1Compatibility keys in them, + starting from the base image and working toward the leaf node. + """ + for blob_sum_obj, history_obj in reversed(zip(self._parsed[DOCKER_SCHEMA1_FS_LAYERS_KEY], + self._parsed[DOCKER_SCHEMA1_HISTORY_KEY])): try: - image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[_DOCKER_SCHEMA_1_BLOB_SUM_KEY]) + image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[DOCKER_SCHEMA1_BLOB_SUM_KEY]) except digest_tools.InvalidDigestException: - raise ManifestMalformed('could not parse manifest digest: %s' % - blob_sum_obj[_DOCKER_SCHEMA_1_BLOB_SUM_KEY]) + raise MalformedSchema1Manifest('could not parse manifest digest: %s' % + blob_sum_obj[DOCKER_SCHEMA1_BLOB_SUM_KEY]) - metadata_string = history_obj[_DOCKER_SCHEMA_1_V1_COMPAT_KEY] + metadata_string = history_obj[DOCKER_SCHEMA1_V1_COMPAT_KEY] v1_metadata = json.loads(metadata_string) command_list = v1_metadata.get('container_config', {}).get('Cmd', None) command = json.dumps(command_list) if command_list else None if not 'id' in v1_metadata: - raise ManifestMalformed('invalid manifest v1 history') + raise MalformedSchema1Manifest('id field missing from v1Compatibility JSON') - extracted = ExtractedDockerV1Metadata(v1_metadata['id'], v1_metadata.get('parent'), - v1_metadata.get('created'), v1_metadata.get('comment'), - command) - yield ExtractedLayerMetadata(image_digest, extracted, metadata_string) + extracted = Schema1V1Metadata(v1_metadata['id'], v1_metadata.get('parent'), + v1_metadata.get('created'), v1_metadata.get('comment'), + command) + yield Schema1Layer(image_digest, extracted, metadata_string) @property def payload(self): - protected = str(self._signatures[0][_DOCKER_SCHEMA_1_PROTECTED_KEY]) + protected = str(self._signatures[0][DOCKER_SCHEMA1_PROTECTED_KEY]) parsed_protected = json.loads(base64url_decode(protected)) - signed_content_head = self._bytes[:parsed_protected[_DOCKER_SCHEMA_1_FORMAT_LENGTH_KEY]] - signed_content_tail = base64url_decode(str(parsed_protected[_DOCKER_SCHEMA_1_FORMAT_TAIL_KEY])) + signed_content_head = self._bytes[:parsed_protected[DOCKER_SCHEMA1_FORMAT_LENGTH_KEY]] + signed_content_tail = base64url_decode(str(parsed_protected[DOCKER_SCHEMA1_FORMAT_TAIL_KEY])) return signed_content_head + signed_content_tail def rewrite_invalid_image_ids(self, images_map): @@ -205,15 +221,15 @@ class DockerSchema1Manifest(object): has_rewritten_ids = False updated_id_map = {} - for extracted_layer_metadata in self.layers: - digest_str = str(extracted_layer_metadata.digest) - extracted_v1_metadata = extracted_layer_metadata.v1_metadata + for layer in self.layers: + digest_str = str(layer.digest) + extracted_v1_metadata = layer.v1_metadata working_image_id = extracted_v1_metadata.image_id # Update our digest_history hash for the new layer data. digest_history.update(digest_str) digest_history.update("@") - digest_history.update(extracted_layer_metadata.v1_metadata_str.encode('utf-8')) + digest_history.update(layer.raw_v1_metadata.encode('utf-8')) digest_history.update("|") # Ensure that the v1 image's storage matches the V2 blob. If not, we've @@ -233,12 +249,11 @@ class DockerSchema1Manifest(object): if extracted_v1_metadata.parent_image_id is not None: parent_image_id = images_map.get(extracted_v1_metadata.parent_image_id, None) if parent_image_id is None: - raise ManifestMalformed( - 'Parent not found with image ID: {0}'.format(extracted_v1_metadata.parent_image_id) - ) + raise MalformedSchema1Manifest('parent not found with image ID: %s' % + extracted_v1_metadata.parent_image_id) # Synthesize and store the v1 metadata in the db. - v1_metadata_json = extracted_layer_metadata.v1_metadata_str + v1_metadata_json = layer.raw_v1_metadata if has_rewritten_ids: v1_metadata_json = _updated_v1_metadata(v1_metadata_json, updated_id_map) @@ -253,17 +268,19 @@ class DockerSchema1Manifest(object): class DockerSchema1ManifestBuilder(object): - """ Class which represents a manifest which is currently being built. """ + """ + A convenient abstraction around creating new DockerSchema1Manifests. + """ def __init__(self, namespace_name, repo_name, tag, architecture='amd64'): repo_name_key = '{0}/{1}'.format(namespace_name, repo_name) if namespace_name == '': repo_name_key = repo_name self._base_payload = { - _DOCKER_SCHEMA_1_REPO_TAG_KEY: tag, - _DOCKER_SCHEMA_1_REPO_NAME_KEY: repo_name_key, - _DOCKER_SCHEMA_1_ARCH_KEY: architecture, - _DOCKER_SCHEMA_1_SCHEMA_VER_KEY: 1, + DOCKER_SCHEMA1_REPO_TAG_KEY: tag, + DOCKER_SCHEMA1_REPO_NAME_KEY: repo_name_key, + DOCKER_SCHEMA1_ARCH_KEY: architecture, + DOCKER_SCHEMA1_SCHEMA_VER_KEY: 1, } self._fs_layer_digests = [] @@ -271,21 +288,22 @@ class DockerSchema1ManifestBuilder(object): def add_layer(self, layer_digest, v1_json_metadata): self._fs_layer_digests.append({ - _DOCKER_SCHEMA_1_BLOB_SUM_KEY: layer_digest, + DOCKER_SCHEMA1_BLOB_SUM_KEY: layer_digest, }) self._history.append({ - _DOCKER_SCHEMA_1_V1_COMPAT_KEY: v1_json_metadata, + DOCKER_SCHEMA1_V1_COMPAT_KEY: v1_json_metadata, }) return self def build(self, json_web_key): - """ Build the payload and sign it, returning a SignedManifest object. + """ + Builds a DockerSchema1Manifest object complete with signature. """ payload = OrderedDict(self._base_payload) payload.update({ - _DOCKER_SCHEMA_1_HISTORY_KEY: self._history, - _DOCKER_SCHEMA_1_FS_LAYERS_KEY: self._fs_layer_digests, + DOCKER_SCHEMA1_HISTORY_KEY: self._history, + DOCKER_SCHEMA1_FS_LAYERS_KEY: self._fs_layer_digests, }) payload_str = json.dumps(payload, indent=3) @@ -302,7 +320,7 @@ class DockerSchema1ManifestBuilder(object): bytes_to_sign = '{0}.{1}'.format(protected, base64url_encode(payload_str)) - signer = SIGNER_ALGS[_JWS_ALGORITHM] + signer = SIGNER_ALGS[_JWS_SIGNING_ALGORITHM] signature = base64url_encode(signer.sign(bytes_to_sign, json_web_key.get_key())) logger.debug('Generated signature: %s', signature) @@ -311,48 +329,31 @@ class DockerSchema1ManifestBuilder(object): if comp in public_members} signature_block = { - 'header': { - 'jwk': public_key, - 'alg': _JWS_ALGORITHM, - }, - 'signature': signature, - _DOCKER_SCHEMA_1_PROTECTED_KEY: protected, + DOCKER_SCHEMA1_HEADER_KEY: {'jwk': public_key, 'alg': _JWS_SIGNING_ALGORITHM}, + DOCKER_SCHEMA1_SIGNATURE_KEY: signature, + DOCKER_SCHEMA1_PROTECTED_KEY: protected, } logger.debug('Encoded signature block: %s', json.dumps(signature_block)) - payload.update({ - _DOCKER_SCHEMA_1_SIGNATURES_KEY: [signature_block], - }) + payload.update({DOCKER_SCHEMA1_SIGNATURES_KEY: [signature_block]}) return DockerSchema1Manifest(json.dumps(payload, indent=3)) -Repository = namedtuple('Repository', ['id', 'name', 'namespace_name']) +def _updated_v1_metadata(v1_metadata_json, updated_id_map): + """ + Updates v1_metadata with new image IDs. + """ + parsed = json.loads(v1_metadata_json) + parsed['id'] = updated_id_map[parsed['id']] -Tag = namedtuple('Tag', ['name', 'repository']) + if parsed.get('parent') and parsed['parent'] in updated_id_map: + parsed['parent'] = updated_id_map[parsed['parent']] -ManifestJSON = namedtuple('ManifestJSON', ['digest', 'json']) + if parsed.get('container_config', {}).get('Image'): + existing_image = parsed['container_config']['Image'] + if existing_image in updated_id_map: + parsed['container_config']['image'] = updated_id_map[existing_image] -DockerV1Metadata = namedtuple('DockerV1Metadata', ['namespace_name', - 'repo_name', - 'image_id', - 'checksum', - 'content_checksum', - 'created', - 'comment', - 'command', - 'parent_image_id', - 'compat_json']) - -BlobUpload = namedtuple('BlobUpload', ['uuid', - 'byte_count', - 'uncompressed_byte_count', - 'chunk_count', - 'sha_state', - 'location_name', - 'storage_metadata', - 'piece_sha_state', - 'piece_hashes']) - -Blob = namedtuple('Blob', ['digest', 'size', 'locations']) + return json.dumps(parsed) diff --git a/image/docker/schema2.py b/image/docker/schema2.py new file mode 100644 index 000000000..4a69ab8bb --- /dev/null +++ b/image/docker/schema2.py @@ -0,0 +1,11 @@ +""" +schema2 implements pure data transformations according to the Docker Manifest v2.2 Specification. + +https://github.com/docker/distribution/blob/master/docs/spec/manifest-v2-2.md +""" + +# Content Types +DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v2+json' +DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.list.v2+json' +DOCKER_SCHEMA2_CONTENT_TYPES = [DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, + DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE] diff --git a/formats/squashed.py b/image/docker/squashed.py similarity index 85% rename from formats/squashed.py rename to image/docker/squashed.py index ba0964339..0f6628952 100644 --- a/formats/squashed.py +++ b/image/docker/squashed.py @@ -1,30 +1,31 @@ -from app import app -from util.registry.gzipwrap import GZIP_BUFFER_SIZE -from util.registry.streamlayerformat import StreamLayerMerger -from formats.tarimageformatter import TarImageFormatter - import copy import json import math import calendar +from app import app +from image import TarImageFormatter +from util.registry.gzipwrap import GZIP_BUFFER_SIZE +from util.registry.streamlayerformat import StreamLayerMerger + + class FileEstimationException(Exception): - """ Exception raised by build_docker_load_stream if the estimated size of the layer TAR - was lower than the actual size. This means the sent TAR header is wrong, and we have - to fail. + """ + Exception raised by build_docker_load_stream if the estimated size of the layer tar was lower + than the actual size. This means the sent tar header is wrong, and we have to fail. """ pass -class SquashedDockerImage(TarImageFormatter): - """ Image formatter which produces a squashed image compatible with the `docker load` - command. +class SquashedDockerImageFormatter(TarImageFormatter): + """ + Image formatter which produces a squashed image compatible with the `docker load` command. """ - # Multiplier against the image size reported by Docker to account for the TAR metadata. + # Multiplier against the image size reported by Docker to account for the tar metadata. # Note: This multiplier was not formally calculated in anyway and should be adjusted overtime # if/when we encounter issues with it. Unfortunately, we cannot make it too large or the Docker - # daemon dies when trying to load the entire TAR into memory. + # daemon dies when trying to load the entire tar into memory. SIZE_MULTIPLIER = 1.2 def stream_generator(self, namespace, repository, tag, synthetic_image_id, @@ -39,7 +40,7 @@ class SquashedDockerImage(TarImageFormatter): # repositories - JSON file containing a repo -> tag -> image map # {image ID folder}: # json - The layer JSON - # layer.tar - The TARed contents of the layer + # layer.tar - The tared contents of the layer # VERSION - The docker import version: '1.0' layer_merger = StreamLayerMerger(get_layer_iterator) @@ -57,7 +58,7 @@ class SquashedDockerImage(TarImageFormatter): yield self.tar_folder(synthetic_image_id, mtime=image_mtime) # Yield the JSON layer data. - layer_json = SquashedDockerImage._build_layer_json(layer_json, synthetic_image_id) + layer_json = SquashedDockerImageFormatter._build_layer_json(layer_json, synthetic_image_id) yield self.tar_file(synthetic_image_id + '/json', json.dumps(layer_json), mtime=image_mtime) # Yield the VERSION file. @@ -73,7 +74,7 @@ class SquashedDockerImage(TarImageFormatter): estimated_file_size += image.storage.uncompressed_size else: image_json = get_image_json(image) - estimated_file_size += image_json.get('Size', 0) * SquashedDockerImage.SIZE_MULTIPLIER + estimated_file_size += image_json.get('Size', 0) * SquashedDockerImageFormatter.SIZE_MULTIPLIER # Make sure the estimated file size is an integer number of bytes. estimated_file_size = int(math.ceil(estimated_file_size)) @@ -105,7 +106,7 @@ class SquashedDockerImage(TarImageFormatter): # Yield any file padding to 512 bytes that is necessary. yield self.tar_file_padding(estimated_file_size) - # Last two records are empty in TAR spec. + # Last two records are empty in tar spec. yield '\0' * 512 yield '\0' * 512 diff --git a/image/docker/v1.py b/image/docker/v1.py new file mode 100644 index 000000000..b6df9f21a --- /dev/null +++ b/image/docker/v1.py @@ -0,0 +1,16 @@ +""" +v1 implements pure data transformations according to the Docker Image Specification v1.1. + +https://github.com/docker/docker/blob/master/image/spec/v1.1.md +""" + +from collections import namedtuple + +class DockerV1Metadata(namedtuple('DockerV1Metadata', + ['namespace_name', 'repo_name', 'image_id', 'checksum', + 'content_checksum', 'created', 'comment', 'command', + 'parent_image_id', 'compat_json'])): + """ + DockerV1Metadata represents all of the metadata for a given Docker v1 Image. + The original form of the metadata is stored in the compat_json field. + """ From 59529569dc83ec776d299b1e4651a53edb68d907 Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Wed, 3 Aug 2016 13:54:14 -0400 Subject: [PATCH 16/34] reorder imports --- util/registry/torrent.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/util/registry/torrent.py b/util/registry/torrent.py index d81caa162..ec93e1405 100644 --- a/util/registry/torrent.py +++ b/util/registry/torrent.py @@ -1,12 +1,13 @@ -import bencode import hashlib -import jwt -import resumablehashlib import time import urllib from cachetools import lru_cache +import bencode +import jwt +import resumablehashlib + from app import app, instance_keys @@ -14,6 +15,7 @@ ANNOUNCE_URL = app.config['BITTORRENT_ANNOUNCE_URL'] FILENAME_PEPPER = app.config['BITTORRENT_FILENAME_PEPPER'] REGISTRY_TITLE = app.config['REGISTRY_TITLE'] + @lru_cache(maxsize=1) def _load_private_key(private_key_file_path): with open(private_key_file_path) as private_key_file: From 56951397a9b8d42faabb03ac1f74643e2147a3e4 Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Wed, 3 Aug 2016 13:55:39 -0400 Subject: [PATCH 17/34] content type lists -> sets --- image/docker/schema1.py | 4 ++-- image/docker/schema2.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/image/docker/schema1.py b/image/docker/schema1.py index f154b7d21..14e77ad83 100644 --- a/image/docker/schema1.py +++ b/image/docker/schema1.py @@ -25,8 +25,8 @@ logger = logging.getLogger(__name__) # Content Types DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+json' DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+prettyjws' -DOCKER_SCHEMA1_CONTENT_TYPES = [DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE, - DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE] +DOCKER_SCHEMA1_CONTENT_TYPES = {DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE, + DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE} # Keys for signature-related data DOCKER_SCHEMA1_SIGNATURES_KEY = 'signatures' diff --git a/image/docker/schema2.py b/image/docker/schema2.py index 4a69ab8bb..504f5df80 100644 --- a/image/docker/schema2.py +++ b/image/docker/schema2.py @@ -7,5 +7,5 @@ https://github.com/docker/distribution/blob/master/docs/spec/manifest-v2-2.md # Content Types DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v2+json' DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.list.v2+json' -DOCKER_SCHEMA2_CONTENT_TYPES = [DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, - DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE] +DOCKER_SCHEMA2_CONTENT_TYPES = {DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, + DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE} From b68e1b5efc3d6300b2db0f736dae47673966ab67 Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Wed, 3 Aug 2016 14:00:40 -0400 Subject: [PATCH 18/34] add "get_" prefix to all db read funcs --- data/model/v2.py | 6 +++--- endpoints/v2/blob.py | 4 ++-- endpoints/v2/manifest.py | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/data/model/v2.py b/data/model/v2.py index 197ebebe0..12a9174ed 100644 --- a/data/model/v2.py +++ b/data/model/v2.py @@ -53,7 +53,7 @@ def delete_tag(namespace_name, repo_name, tag_name): return True -def docker_v1_metadata_by_tag(namespace_name, repo_name, tag_name): +def get_docker_v1_metadata_by_tag(namespace_name, repo_name, tag_name): if not repo_image: return None @@ -67,7 +67,7 @@ def docker_v1_metadata_by_tag(namespace_name, repo_name, tag_name): ) -def docker_v1_metadata_by_image_id(namespace_name, repo_name, image_ids): +def get_docker_v1_metadata_by_image_id(namespace_name, repo_name, image_ids): images_query = model.image.lookup_repository_images(repo, all_image_ids) return {image.docker_image_id: DockerV1Metadata(namespace_name=namespace_name, repo_name=repo_name, @@ -166,7 +166,7 @@ def create_blob_and_temp_tag(namespace_name, repo_name, expected_digest, upload_ upload_obj.uncompressed_byte_count) -def blob_by_digest(namespace_name, repo_name, digest): +def get_blob_by_digest(namespace_name, repo_name, digest): try: return model.blob.get_repo_blob_by_digest(namespace_name, repo_name, digest) except model.BlobDoesNotExist: diff --git a/endpoints/v2/blob.py b/endpoints/v2/blob.py index bd2dc2a5b..c3eabef38 100644 --- a/endpoints/v2/blob.py +++ b/endpoints/v2/blob.py @@ -41,7 +41,7 @@ class _InvalidRangeHeader(Exception): @cache_control(max_age=31436000) def check_blob_exists(namespace_name, repo_name, digest): # Find the blob. - blob = v2.blob_by_digest(namespace_name, repo_name, digest) + blob = v2.get_blob_by_digest(namespace_name, repo_name, digest) if blob is None: raise BlobUnknown() @@ -70,7 +70,7 @@ def check_blob_exists(namespace_name, repo_name, digest): @cache_control(max_age=31536000) def download_blob(namespace_name, repo_name, digest): # Find the blob. - blob = v2.blob_by_digest(namespace_name, repo_name, digest) + blob = v2.get_blob_by_digest(namespace_name, repo_name, digest) if blob is None: raise BlobUnknown() diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index a5adfac89..40a4445e2 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -155,7 +155,7 @@ def _write_manifest(namespace_name, repo_name, manifest): # Lookup all the images and their parent images (if any) inside the manifest. # This will let us know which v1 images we need to synthesize and which ones are invalid. all_image_ids = list(manifest.docker_image_ids | manifest.parent_image_ids) - images_map = v2.docker_v1_metadata_by_image_id(namespace_name, repo_name, all_image_ids) + images_map = v2.get_docker_v1_metadata_by_image_id(namespace_name, repo_name, all_image_ids) # Rewrite any v1 image IDs that do not match the checksum in the database. try: @@ -228,7 +228,7 @@ def delete_manifest_by_digest(namespace_name, repo_name, digest): def _generate_and_store_manifest(namespace_name, repo_name, tag_name): # Find the v1 metadata for this image and its parents. - v1_metadata = v2.docker_v1_metadata_by_tag(namespace_name, repo_name, tag_name) + v1_metadata = v2.get_docker_v1_metadata_by_tag(namespace_name, repo_name, tag_name) parents_v1_metadata = v2.get_parents_docker_v1_metadata(namespace_name, repo_name, v1_metadata.image_id) From 35579093ca36614d3aa3e3a5293dcebbc4ef7bfd Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Wed, 3 Aug 2016 15:18:51 -0400 Subject: [PATCH 19/34] s/close_db_filter/CloseForLongOperation --- endpoints/v2/blob.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/endpoints/v2/blob.py b/endpoints/v2/blob.py index c3eabef38..8f6a43277 100644 --- a/endpoints/v2/blob.py +++ b/endpoints/v2/blob.py @@ -95,16 +95,16 @@ def download_blob(namespace_name, repo_name, digest): # Close the database connection before we stream the download. logger.debug('Closing database connection before streaming layer data') - database.close_db_filter(None) + with database.CloseForLongOperation(app.config): + # Stream the response to the Docker client. + return Response( + storage.stream_read(blob.locations, path), + headers=headers.update({ + 'Content-Length': blob.size, + 'Content-Type': BLOB_CONTENT_TYPE, + }), + ) - # Stream the response to the Docker client. - return Response( - storage.stream_read(blob.locations, path), - headers=headers.update({ - 'Content-Length': blob.size, - 'Content-Type': BLOB_CONTENT_TYPE, - }), - ) @v2_bp.route('//blobs/uploads/', methods=['POST']) From 16b451437f796e2f1bb98d06095eaba9991d1e57 Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Tue, 9 Aug 2016 12:28:00 -0400 Subject: [PATCH 20/34] v2/blob: s/make_response/Response() --- endpoints/v2/blob.py | 91 ++++++++++++++++++++-------------------- endpoints/v2/manifest.py | 35 +++++++++------- 2 files changed, 65 insertions(+), 61 deletions(-) diff --git a/endpoints/v2/blob.py b/endpoints/v2/blob.py index 8f6a43277..b463816db 100644 --- a/endpoints/v2/blob.py +++ b/endpoints/v2/blob.py @@ -1,7 +1,7 @@ import logging import re -from flask import make_response, url_for, request, redirect, Response, abort as flask_abort +from flask import url_for, request, redirect, Response, abort as flask_abort import resumablehashlib @@ -57,9 +57,7 @@ def check_blob_exists(namespace_name, repo_name, digest): headers['Accept-Ranges'] = 'bytes' # Write the response to the Docker client. - response = make_response('') - response.headers.extend(headers) - return response + return Response(headers=headers) @v2_bp.route(BLOB_DIGEST_ROUTE, methods=['GET']) @@ -124,13 +122,15 @@ def start_blob_upload(namespace_name, repo_name): digest = request.args.get('digest', None) if digest is None: # Short-circuit because the user will send the blob data in another request. - accepted = make_response('', 202) - accepted.headers['Location'] = url_for('v2.upload_chunk', - repository='%s/%s' % (namespace_name, repo_name), - upload_uuid=new_upload_uuid) - accepted.headers['Range'] = _render_range(0) - accepted.headers['Docker-Upload-UUID'] = new_upload_uuid - return accepted + return Response( + status=202, + headers={ + 'Docker-Upload-UUID': new_upload_uuid, + 'Range': _render_range(0), + 'Location': url_for('v2.upload_chunk', repository='%s/%s' % (namespace_name, repo_name), + upload_uuid=new_upload_uuid) + }, + ) # The user plans to send us the entire body right now. # Find the upload. @@ -151,12 +151,14 @@ def start_blob_upload(namespace_name, repo_name): _finish_upload(namespace_name, repo_name, updated_blob_upload, digest) # Write the response to the docker client. - response = make_response('', 201) - response.headers['Docker-Content-Digest'] = digest - response.headers['Location'] = url_for('v2.download_blob', - repository='%s/%s' % (namespace_name, repo_name), - digest=digest) - return response + return Response( + status=201, + headers={ + 'Docker-Content-Digest': digest, + 'Location': url_for('v2.download_blob', repository='%s/%s' % (namespace_name, repo_name), + digest=digest), + }, + ) @v2_bp.route('//blobs/uploads/', methods=['GET']) @@ -169,12 +171,13 @@ def fetch_existing_upload(namespace_name, repo_name, upload_uuid): if blob_upload is None: raise BlobUploadUnknown() - accepted = make_response('', 204) - accepted.headers.extend({ - 'Docker-Upload-UUID': upload_uuid, - 'Range': _render_range(blob_upload.byte_count+1), # Docker byte ranges are exclusive - }) - return accepted + return Response( + status=204, + headers={ + 'Docker-Upload-UUID': upload_uuid, + 'Range': _render_range(blob_upload.byte_count+1), # Docker byte ranges are exclusive + }, + ) @v2_bp.route('//blobs/uploads/', methods=['PATCH']) @@ -198,13 +201,14 @@ def upload_chunk(namespace_name, repo_name, upload_uuid): v2.update_blob_upload(updated_blob_upload) # Write the response to the Docker client. - accepted = make_response('', 204) - accepted.headers.extend({ - 'Location': _current_request_path(), - 'Range': _render_range(updated_blob_upload.byte_count, with_bytes_prefix=False), - 'Docker-Upload-UUID': upload_uuid, - }) - return accepted + return Response( + status=204, + headers={ + 'Location': _current_request_path(), + 'Range': _render_range(updated_blob_upload.byte_count, with_bytes_prefix=False), + 'Docker-Upload-UUID': upload_uuid, + }, + ) @v2_bp.route('//blobs/uploads/', methods=['PUT']) @@ -233,13 +237,14 @@ def monolithic_upload_or_last_chunk(namespace_name, repo_name, upload_uuid): _finish_upload(namespace_name, repo_name, updated_blob_upload, digest) # Write the response to the Docker client. - response = make_response('', 201) - response.headers.extend({ - 'Docker-Content-Digest': digest, - 'Location': url_for('v2.download_blob', repository='%s/%s' % (namespace_name, repo_name), - digest=digest) - }) - return response + return Response( + status=201, + headers={ + 'Docker-Content-Digest': digest, + 'Location': url_for('v2.download_blob', repository='%s/%s' % (namespace_name, repo_name), + digest=digest), + } + ) @v2_bp.route('//blobs/uploads/', methods=['DELETE']) @@ -257,7 +262,7 @@ def cancel_upload(namespace_name, repo_name, upload_uuid): v2.delete_blob_upload(upload_uuid) storage.cancel_chunked_upload({upload.location_name}, upload.uuid, upload.storage_metadata) - return make_response('', 204) + return Response(status=204) @v2_bp.route('//blobs/', methods=['DELETE']) @@ -288,13 +293,9 @@ def _abort_range_not_satisfiable(valid_end, upload_uuid): TODO(jzelinskie): Unify this with the V2RegistryException class. """ - invalid_range = make_response('', 416) - invalid_range.headers.extend({ - 'Location': _current_request_path(), - 'Range': '0-{0}'.format(valid_end), - 'Docker-Upload-UUID': upload_uuid, - }) - flask_abort(invalid_range) + flask_abort(Response(status=416, headers={'Location': _current_request_path(), + 'Range': '0-{0}'.format(valid_end), + 'Docker-Upload-UUID': upload_uuid})) def _parse_range_header(range_header_text): diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index 40a4445e2..96cb54257 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -2,7 +2,7 @@ import logging from functools import wraps -from flask import make_response, request, url_for +from flask import request, url_for, Response import features @@ -51,10 +51,11 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, tag_name): track_and_log('pull_repo', repo, analytics_name='pull_repo_100x', analytics_sample=0.01) metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2']) - response = make_response(manifest.bytes, 200) - response.headers['Content-Type'] = manifest.content_type - response.headers['Docker-Content-Digest'] = manifest.digest - return response + return Response( + manifest.bytes, + status=200, + headers={'Content-Type': manifest.content_type, 'Docker-Content-Digest': manifest.digest}, + ) @v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['GET']) @@ -73,10 +74,8 @@ def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref): track_and_log('pull_repo', repo) metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2']) - response = make_response(manifest.json, 200) - response.headers['Content-Type'] = manifest.content_type - response.headers['Docker-Content-Digest'] = manifest.digest - return response + return Response(manifest.json, status=200, headers={'Content-Type': manifest.content_type, + 'Docker-Content-Digest': manifest.digest}) def _reject_manifest2_schema2(func): @@ -190,12 +189,16 @@ def _write_manifest(namespace_name, repo_name, manifest): spawn_notification(repo, 'repo_push', {'updated_tags': [manifest.tag]}) metric_queue.repository_push.Inc(labelvalues=[namespace_name, repo_name, 'v2']) - response = make_response('OK', 202) - response.headers['Docker-Content-Digest'] = manifest.digest - response.headers['Location'] = url_for('v2.fetch_manifest_by_digest', - repository='%s/%s' % (namespace_name, repo_name), - manifest_ref=manifest.digest) - return response + return Response( + 'OK', + status=202, + headers={ + 'Docker-Content-Digest': manifest.digest, + 'Location': url_for('v2.fetch_manifest_by_digest', + repository='%s/%s' % (namespace_name, repo_name), + manifest_ref=manifest.digest), + }, + ) @v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['DELETE']) @@ -223,7 +226,7 @@ def delete_manifest_by_digest(namespace_name, repo_name, digest): track_and_log('delete_tag', tag.repository, tag=tag.name, digest=digest) - return make_response('', 202) + return Response(status=202) def _generate_and_store_manifest(namespace_name, repo_name, tag_name): From 9f743fd6cdf9761f43fa713642a546eab3f56ef0 Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Tue, 9 Aug 2016 15:11:35 -0400 Subject: [PATCH 21/34] address PR comments --- endpoints/v2/__init__.py | 3 +++ endpoints/v2/blob.py | 26 +++++++++++++------------- image/docker/__init__.py | 4 ++-- image/docker/squashed.py | 2 +- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/endpoints/v2/__init__.py b/endpoints/v2/__init__.py index 97ecc40e6..1e01f6416 100644 --- a/endpoints/v2/__init__.py +++ b/endpoints/v2/__init__.py @@ -33,6 +33,9 @@ _MAX_RESULTS_PER_PAGE = 50 def paginate(limit_kwarg_name='limit', offset_kwarg_name='offset', callback_kwarg_name='pagination_callback'): + """ + Decorates a handler adding a parsed pagination token and a callback to encode a response token. + """ def wrapper(func): @wraps(func) def wrapped(*args, **kwargs): diff --git a/endpoints/v2/blob.py b/endpoints/v2/blob.py index b463816db..f04e767da 100644 --- a/endpoints/v2/blob.py +++ b/endpoints/v2/blob.py @@ -343,9 +343,11 @@ def _upload_chunk(blob_upload, start_offset, length): """ # Check for invalidate arguments. if None in {blob_upload, start_offset, length}: + logger.error('None provided as argument to _upload_chunk') return None if start_offset > 0 and start_offset > blob_upload.byte_count: + logger.error('start_offset provided to _upload_chunk greater than blob.upload.byte_count') return None location_set = {blob_upload.location_name} @@ -391,19 +393,17 @@ def _upload_chunk(blob_upload, start_offset, length): size_info, fn = calculate_size_handler() input_fp = wrap_with_handler(input_fp, fn) - try: - length_written, new_metadata, error = storage.stream_upload_chunk( - location_set, - blob_upload.uuid, - start_offset, - length, - input_fp, - blob_upload.storage_metadata, - content_type=BLOB_CONTENT_TYPE, - ) - if error is not None: - return None - except InvalidChunkException: + length_written, new_metadata, error = storage.stream_upload_chunk( + location_set, + blob_upload.uuid, + start_offset, + length, + input_fp, + blob_upload.storage_metadata, + content_type=BLOB_CONTENT_TYPE, + ) + if error is not None: + logger.error('storage.stream_upload_chunk returned error %s', error) return None # If we determined an uncompressed size and this is the first chunk, add it to the blob. diff --git a/image/docker/__init__.py b/image/docker/__init__.py index 74ceba2d7..f694dcb12 100644 --- a/image/docker/__init__.py +++ b/image/docker/__init__.py @@ -2,9 +2,9 @@ docker implements pure data transformations according to the many Docker specifications. """ -class DockerException(Exception): +class DockerFormatException(Exception): pass -class ManifestException(DockerException): +class ManifestException(DockerFormatException): pass diff --git a/image/docker/squashed.py b/image/docker/squashed.py index 0f6628952..d3c886185 100644 --- a/image/docker/squashed.py +++ b/image/docker/squashed.py @@ -40,7 +40,7 @@ class SquashedDockerImageFormatter(TarImageFormatter): # repositories - JSON file containing a repo -> tag -> image map # {image ID folder}: # json - The layer JSON - # layer.tar - The tared contents of the layer + # layer.tar - The tarballed contents of the layer # VERSION - The docker import version: '1.0' layer_merger = StreamLayerMerger(get_layer_iterator) From c77a7bc0b92540caf8440328378374c414a4f3df Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Tue, 9 Aug 2016 16:02:49 -0400 Subject: [PATCH 22/34] v2/blob: _upload_chunk parse range header --- endpoints/v2/blob.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/endpoints/v2/blob.py b/endpoints/v2/blob.py index f04e767da..38ae12140 100644 --- a/endpoints/v2/blob.py +++ b/endpoints/v2/blob.py @@ -140,7 +140,7 @@ def start_blob_upload(namespace_name, repo_name): # Upload the chunk to storage while calculating some metadata and updating # the upload state. - updated_blob_upload = _upload_chunk(blob_upload, *_start_offset_and_length(request.headers)) + updated_blob_upload = _upload_chunk(blob_upload, request.headers.get('range')) if updated_blob_upload is None: _abort_range_not_satisfiable(updated_blob_upload.byte_count, new_upload_uuid) @@ -193,7 +193,7 @@ def upload_chunk(namespace_name, repo_name, upload_uuid): # Upload the chunk to storage while calculating some metadata and updating # the upload state. - updated_blob_upload = _upload_chunk(blob_upload, *_start_offset_and_length(request.headers)) + updated_blob_upload = _upload_chunk(blob_upload, request.headers.get('range')) if updated_blob_upload is None: _abort_range_not_satisfiable(updated_blob_upload.byte_count, upload_uuid) @@ -229,7 +229,7 @@ def monolithic_upload_or_last_chunk(namespace_name, repo_name, upload_uuid): # Upload the chunk to storage while calculating some metadata and updating # the upload state. - updated_blob_upload = _upload_chunk(blob_upload, *_start_offset_and_length(request.headers)) + updated_blob_upload = _upload_chunk(blob_upload, request.headers.get('range')) if updated_blob_upload is None: _abort_range_not_satisfiable(updated_blob_upload.byte_count, upload_uuid) @@ -318,14 +318,13 @@ def _parse_range_header(range_header_text): return (start, length) -def _start_offset_and_length(headers): +def _start_offset_and_length(range_header): """ Returns a tuple of the start offset and the length. If the range header doesn't exist, defaults to (0, -1). If parsing fails, returns (None, None). """ start_offset, length = 0, -1 - range_header = headers.get('range', None) if range_header is not None: try: start_offset, length = _parse_range_header(range_header) @@ -335,15 +334,16 @@ def _start_offset_and_length(headers): return start_offset, length -def _upload_chunk(blob_upload, start_offset, length): +def _upload_chunk(blob_upload, range_header): """ Calculates metadata while uploading a chunk to storage. Returns a BlobUpload object or None if there was a failure. """ - # Check for invalidate arguments. + # Get the offset and length of the current chunk. + start_offset, length = _start_offset_and_length(range_header) if None in {blob_upload, start_offset, length}: - logger.error('None provided as argument to _upload_chunk') + logger.error('Invalid arguments provided to _upload_chunk') return None if start_offset > 0 and start_offset > blob_upload.byte_count: From 419779b9c51dc5c258204f3e3045bcae25425802 Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Tue, 9 Aug 2016 16:06:12 -0400 Subject: [PATCH 23/34] v2/blob: remove references to docker client --- endpoints/v2/blob.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/endpoints/v2/blob.py b/endpoints/v2/blob.py index 38ae12140..2a18bb979 100644 --- a/endpoints/v2/blob.py +++ b/endpoints/v2/blob.py @@ -52,11 +52,11 @@ def check_blob_exists(namespace_name, repo_name, digest): 'Content-Type': BLOB_CONTENT_TYPE, } - # If our storage supports range requests, let the Docker client know. + # If our storage supports range requests, let the client know. if storage.get_supports_resumable_downloads(blob.locations): headers['Accept-Ranges'] = 'bytes' - # Write the response to the Docker client. + # Write the response to the client. return Response(headers=headers) @@ -75,7 +75,7 @@ def download_blob(namespace_name, repo_name, digest): # Build the response headers. headers = {'Docker-Content-Digest': digest} - # If our storage supports range requests, let the Docker client know. + # If our storage supports range requests, let the client know. if storage.get_supports_resumable_downloads(blob.locations): headers['Accept-Ranges'] = 'bytes' @@ -94,7 +94,7 @@ def download_blob(namespace_name, repo_name, digest): # Close the database connection before we stream the download. logger.debug('Closing database connection before streaming layer data') with database.CloseForLongOperation(app.config): - # Stream the response to the Docker client. + # Stream the response to the client. return Response( storage.stream_read(blob.locations, path), headers=headers.update({ @@ -150,7 +150,7 @@ def start_blob_upload(namespace_name, repo_name): # Finalize the upload process in the database and storage. _finish_upload(namespace_name, repo_name, updated_blob_upload, digest) - # Write the response to the docker client. + # Write the response to the client. return Response( status=201, headers={ @@ -175,7 +175,7 @@ def fetch_existing_upload(namespace_name, repo_name, upload_uuid): status=204, headers={ 'Docker-Upload-UUID': upload_uuid, - 'Range': _render_range(blob_upload.byte_count+1), # Docker byte ranges are exclusive + 'Range': _render_range(blob_upload.byte_count+1), # byte ranges are exclusive }, ) @@ -200,7 +200,7 @@ def upload_chunk(namespace_name, repo_name, upload_uuid): # Save the upload state to the database. v2.update_blob_upload(updated_blob_upload) - # Write the response to the Docker client. + # Write the response to the client. return Response( status=204, headers={ @@ -236,7 +236,7 @@ def monolithic_upload_or_last_chunk(namespace_name, repo_name, upload_uuid): # Finalize the upload process in the database and storage. _finish_upload(namespace_name, repo_name, updated_blob_upload, digest) - # Write the response to the Docker client. + # Write the response to the client. return Response( status=201, headers={ @@ -387,7 +387,7 @@ def _upload_chunk(blob_upload, range_header): # If this is the first chunk and we're starting at the 0 offset, add a handler to gunzip the # stream so we can determine the uncompressed size. We'll throw out this data if another chunk - # comes in, but in the common case Docker only sends one chunk. + # comes in, but in the common case the docker client only sends one chunk. size_info = None if start_offset == 0 and blob_upload.chunk_count == 0: size_info, fn = calculate_size_handler() From d67991987b9cc80142b46158e5da50ae79279f0f Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Fri, 19 Aug 2016 14:00:21 -0400 Subject: [PATCH 24/34] v1: refactor index --- data/model/v1.py | 51 ++++++++++++++++++++++++ endpoints/v1/index.py | 90 +++++++++++++++++++------------------------ image/__init__.py | 3 +- 3 files changed, 93 insertions(+), 51 deletions(-) diff --git a/data/model/v1.py b/data/model/v1.py index 3dfa1e123..209edd3df 100644 --- a/data/model/v1.py +++ b/data/model/v1.py @@ -196,3 +196,54 @@ def find_image_id_by_tag(namespace_name, repo_name, tag_name): def delete_tag(namespace_name, repo_name, tag_name): """ Deletes the given tag from the given repository. """ model.tag.delete_tag(namespace_name, repo_name, tag_name) + + +def load_token(password): + try: + model.token.load_token_data(password) + return True + except model.InvalidTokenException: + return False + + +def verify_robot(username, password): + try: + model.user.verify_robot(username, password) + return True + except model.InvalidRobotException: + return False + + +def change_user_password(user, new_password): + model.user.change_password(user, new_password) + + +def change_user_email(user, new_email_address): + model.user.update_email(user, new_email_address) + + +def get_repository(namespace_name, repo_name): + #repo = model.repository.get_repository(namespace_name, repo_name) + return Repository() + + +def create_repository(namespace_name, repo_name, user): + #repo = model.repository.create_repository(namespace_name, repo_name, user) + pass + + +def repository_is_public(namespace_name, repo_name): + # return model.repository.repository_is_public(namespace_name, repo_name) + pass + + +def validate_oauth_token(password): + if model.oauth_access_token(password): + return True + return False + + +def get_sorted_matching_repositories(search_term, only_public, can_read, limit): + matching_repos = model.repository.get_sorted_matching_repositories(query, only_public, can_read, + limit=5) + return [Repository()] diff --git a/endpoints/v1/index.py b/endpoints/v1/index.py index 82d26837e..bb3270e68 100644 --- a/endpoints/v1/index.py +++ b/endpoints/v1/index.py @@ -6,7 +6,7 @@ from functools import wraps from flask import request, make_response, jsonify, session -from data import model +from data.model import v1 from app import authentication, userevents, metric_queue from auth.auth import process_auth, generate_signed_token from auth.auth_context import get_authenticated_user, get_validated_token, get_validated_oauth_token @@ -85,26 +85,19 @@ def create_user(): success = make_response('"Username or email already exists"', 400) if username == '$token': - try: - model.token.load_token_data(password) + if v1.load_token(password): return success - except model.InvalidTokenException: - abort(400, 'Invalid access token.', issue='invalid-access-token') + abort(400, 'Invalid access token.', issue='invalid-access-token') elif username == '$oauthtoken': - validated = model.oauth.validate_access_token(password) - if validated is not None: + if v1.validate_oauth_token(password): return success - else: - abort(400, 'Invalid oauth access token.', issue='invalid-oauth-access-token') + abort(400, 'Invalid oauth access token.', issue='invalid-oauth-access-token') elif '+' in username: - try: - model.user.verify_robot(username, password) + if v1.verify_robot(username, password): return success - except model.InvalidRobotException: - abort(400, 'Invalid robot account or password.', - issue='robot-login-failure') + abort(400, 'Invalid robot account or password.', issue='robot-login-failure') (verified, error_message) = authentication.verify_and_link_user(username, password, basic_auth=True) @@ -148,23 +141,21 @@ def get_user(): @anon_allowed def update_user(username): permission = UserAdminPermission(username) - if permission.can(): update_request = request.get_json() if 'password' in update_request: logger.debug('Updating user password') - model.user.change_password(get_authenticated_user(), update_request['password']) + v1.change_user_password(get_authenticated_user(), update_request['password']) if 'email' in update_request: logger.debug('Updating user email') - model.user.update_email(get_authenticated_user(), update_request['email']) + v1.change_user_email(get_authenticated_user(), update_request['email']) return jsonify({ 'username': get_authenticated_user().username, - 'email': get_authenticated_user().email, + 'email': get_authenticated_user().email }) - abort(403) @@ -179,7 +170,7 @@ def create_repository(namespace_name, repo_name): abort(400, message='Invalid repository name. Repository names cannot contain slashes.') logger.debug('Looking up repository %s/%s', namespace_name, repo_name) - repo = model.repository.get_repository(namespace_name, repo_name) + repo = v1.get_repository(namespace_name, repo_name) logger.debug('Found repository %s/%s', namespace_name, repo_name) if not repo and get_authenticated_user() is None: @@ -189,15 +180,15 @@ def create_repository(namespace_name, repo_name): issue='no-login') elif repo: - permission = ModifyRepositoryPermission(namespace_name, repo_name) - if not permission.can(): + modify_perm = ModifyRepositoryPermission(namespace_name, repo_name) + if not modify_perm.can(): abort(403, message='You do not have permission to modify repository %(namespace)s/%(repository)s', issue='no-repo-write-permission', namespace=namespace_name, repository=repo_name) else: - permission = CreateRepositoryPermission(namespace_name) - if not permission.can(): + create_perm = CreateRepositoryPermission(namespace_name) + if not create_perm.can(): logger.info('Attempt to create a new repo %s/%s with insufficient perms', namespace_name, repo_name) msg = 'You do not have permission to create repositories in namespace "%(namespace)s"' @@ -207,7 +198,7 @@ def create_repository(namespace_name, repo_name): logger.debug('Creating repository %s/%s with owner: %s', namespace_name, repo_name, get_authenticated_user().username) - repo = model.repository.create_repository(namespace_name, repo_name, get_authenticated_user()) + v1.create_repository(namespace_name, repo_name, get_authenticated_user()) if get_authenticated_user(): user_event_data = { @@ -232,7 +223,7 @@ def update_images(namespace_name, repo_name): if permission.can(): logger.debug('Looking up repository') - repo = model.repository.get_repository(namespace_name, repo_name) + repo = v1.get_repository(namespace_name, repo_name) if not repo: # Make sure the repo actually exists. abort(404, message='Unknown repository', issue='unknown-repo') @@ -262,10 +253,10 @@ def get_repository_images(namespace_name, repo_name): permission = ReadRepositoryPermission(namespace_name, repo_name) # TODO invalidate token? - if permission.can() or model.repository.repository_is_public(namespace_name, repo_name): + if permission.can() or v1.repository_is_public(namespace_name, repo_name): # We can't rely on permissions to tell us if a repo exists anymore logger.debug('Looking up repository') - repo = model.repository.get_repository(namespace_name, repo_name) + repo = v1.get_repository(namespace_name, repo_name) if not repo: abort(404, message='Unknown repository', issue='unknown-repo') @@ -296,27 +287,6 @@ def put_repository_auth(namespace_name, repo_name): abort(501, 'Not Implemented', issue='not-implemented') -def conduct_repo_search(username, query, results): - """ Finds matching repositories. """ - def can_read(repo): - if repo.is_public: - return True - - return ReadRepositoryPermission(repo.namespace_user.username, repo.name).can() - - only_public = username is None - matching_repos = model.repository.get_sorted_matching_repositories(query, only_public, can_read, - limit=5) - - for repo in matching_repos: - results.append({ - 'name': repo.namespace_user.username + '/' + repo.name, - 'description': repo.description, - 'is_public': repo.is_public, - 'href': '/repository/' + repo.namespace_user.username + '/' + repo.name - }) - - @v1_bp.route('/search', methods=['GET']) @process_auth @anon_protect @@ -330,7 +300,7 @@ def get_search(): results = [] if query: - conduct_repo_search(username, query, results) + _conduct_repo_search(username, query, results) data = { "query": query, @@ -341,3 +311,23 @@ def get_search(): resp = make_response(json.dumps(data), 200) resp.mimetype = 'application/json' return resp + + +def _conduct_repo_search(username, query, results): + """ Finds matching repositories. """ + def can_read(repo): + if repo.is_public: + return True + + return ReadRepositoryPermission(repo.namespace_name, repo.name).can() + + only_public = username is None + matching_repos = v1.get_sorted_matching_repositories(query, only_public, can_read, limit=5) + + for repo in matching_repos: + results.append({ + 'name': repo.namespace_name + '/' + repo.name, + 'description': repo.description, + 'is_public': repo.is_public, + 'href': '/repository/' + repo.namespace_name + '/' + repo.name + }) diff --git a/image/__init__.py b/image/__init__.py index e09f7ae72..81485dd23 100644 --- a/image/__init__.py +++ b/image/__init__.py @@ -11,7 +11,8 @@ class ManifestJSON(namedtuple('ManifestJSON', ['digest', 'json', 'media_type'])) """ -class Repository(namedtuple('Repository', ['id', 'name', 'namespace_name'])): +class Repository(namedtuple('Repository', ['id', 'name', 'namespace_name', 'description', + 'is_public'])): """ Repository represents a collection of tags. """ From db60df827d7f0e1adfd7197bc8d9725458390e36 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 16 Aug 2016 15:23:00 -0400 Subject: [PATCH 25/34] Implement V2 interfaces and remaining V1 interfaces Also adds some tests to registry tests for V1 stuff. Note: All *registry* tests currently pass, but as verbs are not yet converted, the verb tests in registry_tests.py currently fail. --- data/interfaces/__init__.py | 0 data/interfaces/common.py | 12 + data/{model => interfaces}/v1.py | 60 +++-- data/interfaces/v2.py | 361 +++++++++++++++++++++++++++++++ data/model/blob.py | 6 +- data/model/tag.py | 10 + data/model/v2.py | 183 ---------------- endpoints/notificationhelper.py | 9 +- endpoints/v1/index.py | 7 +- endpoints/v1/registry.py | 2 +- endpoints/v1/tag.py | 2 +- endpoints/v2/__init__.py | 2 +- endpoints/v2/blob.py | 38 ++-- endpoints/v2/catalog.py | 5 +- endpoints/v2/manifest.py | 54 ++--- endpoints/v2/tag.py | 1 + endpoints/v2/v2auth.py | 1 + image/__init__.py | 15 +- image/docker/schema1.py | 27 ++- requirements-nover.txt | 107 ++++----- test/registry_tests.py | 24 +- 21 files changed, 588 insertions(+), 338 deletions(-) create mode 100644 data/interfaces/__init__.py create mode 100644 data/interfaces/common.py rename data/{model => interfaces}/v1.py (82%) create mode 100644 data/interfaces/v2.py delete mode 100644 data/model/v2.py diff --git a/data/interfaces/__init__.py b/data/interfaces/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/data/interfaces/common.py b/data/interfaces/common.py new file mode 100644 index 000000000..f0812515c --- /dev/null +++ b/data/interfaces/common.py @@ -0,0 +1,12 @@ +from image import Repository +from data import model + +def repository_for_repo(repo): + """ Returns a Repository object representing the repo data model instance given. """ + return Repository( + id=repo.id, + name=repo.name, + namespace_name=repo.namespace_user.username, + description=repo.description, + is_public=model.repository.is_repository_public(repo) + ) diff --git a/data/model/v1.py b/data/interfaces/v1.py similarity index 82% rename from data/model/v1.py rename to data/interfaces/v1.py index 209edd3df..cbffaa3ef 100644 --- a/data/model/v1.py +++ b/data/interfaces/v1.py @@ -2,6 +2,7 @@ from app import app, storage as store from data import model from data.model import db_transaction from util.morecollections import AttrDict +from data.interfaces.common import repository_for_repo def placement_locations_docker_v1(namespace_name, repo_name, image_id): """ Returns all the placements for the image with the given V1 Docker ID, found under the @@ -198,52 +199,65 @@ def delete_tag(namespace_name, repo_name, tag_name): model.tag.delete_tag(namespace_name, repo_name, tag_name) -def load_token(password): +def load_token(token): + """ Loads the data associated with the given (deprecated) access token, and, if found + returns True. + """ try: - model.token.load_token_data(password) + model.token.load_token_data(token) return True except model.InvalidTokenException: return False -def verify_robot(username, password): +def verify_robot(username, token): + """ Returns True if the given robot username and token match an existing robot + account. + """ try: - model.user.verify_robot(username, password) - return True + return bool(model.user.verify_robot(username, token)) except model.InvalidRobotException: return False def change_user_password(user, new_password): + """ Changes the password associated with the given user. """ model.user.change_password(user, new_password) -def change_user_email(user, new_email_address): - model.user.update_email(user, new_email_address) - - def get_repository(namespace_name, repo_name): - #repo = model.repository.get_repository(namespace_name, repo_name) - return Repository() + """ Returns the repository with the given name under the given namespace or None if none. """ + repo = model.repository.get_repository(namespace_name, repo_name) + if repo is None: + return None + + return repository_for_repo(repo) -def create_repository(namespace_name, repo_name, user): - #repo = model.repository.create_repository(namespace_name, repo_name, user) - pass +def create_repository(namespace_name, repo_name, user=None): + """ Creates a new repository under the given namespace with the given name, for the given user. + """ + model.repository.create_repository(namespace_name, repo_name, user) def repository_is_public(namespace_name, repo_name): - # return model.repository.repository_is_public(namespace_name, repo_name) - pass + """ Returns whether the repository with the given name under the given namespace is public. + If no matching repository was found, returns False. + """ + return model.repository.repository_is_public(namespace_name, repo_name) -def validate_oauth_token(password): - if model.oauth_access_token(password): - return True - return False +def validate_oauth_token(token): + """ Returns whether the given OAuth token validates. """ + return bool(model.oauth.validate_access_token(token)) def get_sorted_matching_repositories(search_term, only_public, can_read, limit): - matching_repos = model.repository.get_sorted_matching_repositories(query, only_public, can_read, - limit=5) - return [Repository()] + """ Returns a sorted list of repositories matching the given search term. can_read is a callback + that will be invoked for each repository found, to filter results to only those visible to + the current user (if any). + """ + repos = model.repository.get_sorted_matching_repositories(search_term, only_public, can_read, + limit=limit) + + return [repository_for_repo(repo) for repo in repos] diff --git a/data/interfaces/v2.py b/data/interfaces/v2.py new file mode 100644 index 000000000..ff3e151dd --- /dev/null +++ b/data/interfaces/v2.py @@ -0,0 +1,361 @@ +from peewee import IntegrityError + +from data import model, database +from data.model import DataModelException +from image import Blob, BlobUpload, ManifestJSON, RepositoryReference, Tag +from image.docker.v1 import DockerV1Metadata +from data.interfaces.common import repository_for_repo + +_MEDIA_TYPE = "application/vnd.docker.distribution.manifest.v1+prettyjws" + +def create_repository(namespace_name, repo_name, creating_user=None): + """ Creates a new repository under the specified namespace with the given name. The user supplied + is the user creating the repository, if any. + """ + return model.repository.create_repository(namespace_name, repo_name, creating_user) + + +def repository_is_public(namespace_name, repo_name): + """ Returns true if the repository with the given name under the given namespace has public + visibility. + """ + return model.repository.repository_is_public(namespace_name, repo_name) + + + +def get_repository(namespace_name, repo_name): + """ Returns a repository tuple for the repository with the given name under the given namespace. + Returns None if no such repository was found. + """ + repo = model.repository.get_repository(namespace_name, repo_name) + if repo is None: + return None + + return repository_for_repo(repo) + + +def has_active_tag(namespace_name, repo_name, tag_name): + """ Returns whether there is an active tag for the tag with the given name under the matching + repository, if any, or None if none. + """ + try: + model.tag.get_active_tag(namespace_name, repo_name, tag_name) + return True + except database.RepositoryTag.DoesNotExist: + return False + + +def get_manifest_by_tag(namespace_name, repo_name, tag_name): + """ Returns the current manifest for the tag with the given name under the matching repository, + if any, or None if none. + """ + try: + manifest = model.tag.load_tag_manifest(namespace_name, repo_name, tag_name) + return ManifestJSON(digest=manifest.digest, json=manifest.json_data, media_type=_MEDIA_TYPE) + except model.InvalidManifestException: + return None + + +def get_manifest_by_digest(namespace_name, repo_name, digest): + """ Returns the manifest matching the given digest under the matching repository, if any, + or None if none. + """ + try: + manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, digest) + return ManifestJSON(digest=digest, json=manifest.json_data, media_type=_MEDIA_TYPE) + except model.InvalidManifestException: + return None + + +def delete_manifest_by_digest(namespace_name, repo_name, digest): + """ Deletes the manifest with the associated digest (if any) and returns all removed tags + that pointed to that manifest. If the manifest was not found, returns an empty list. + """ + tags = model.tag.delete_manifest_by_digest(namespace_name, repo_name, digest) + + def _tag_view(tag): + return Tag( + name=tag.name, + repository=RepositoryReference( + id=tag.repository_id, + name=repo_name, + namespace_name=namespace_name, + ) + ) + + return [_tag_view(tag) for tag in tags] + + +def _docker_v1_metadata(namespace_name, repo_name, repo_image): + """ Returns a DockerV1Metadata object for the given image under the repository with the given + namespace and name. Note that the namespace and name are passed here as an optimization, + and are *not checked* against the image. + """ + return DockerV1Metadata( + namespace_name=namespace_name, + repo_name=repo_name, + image_id=repo_image.docker_image_id, + checksum=repo_image.v1_checksum, + content_checksum=repo_image.storage.content_checksum, + compat_json=repo_image.v1_json_metadata, + + created=repo_image.created, + comment=repo_image.comment, + command=repo_image.command, + parent_image_id=None, # TODO: make sure this isn't needed anywhere, as it is expensive to lookup + ) + + +def get_docker_v1_metadata_by_tag(namespace_name, repo_name, tag_name): + """ Returns the Docker V1 metadata associated with the tag with the given name under the + matching repository, if any. If none, returns None. + """ + try: + repo_image = model.tag.get_tag_image(namespace_name, repo_name, tag_name, include_storage=True) + return _docker_v1_metadata(namespace_name, repo_name, repo_image) + except DataModelException: + return None + + + +def get_docker_v1_metadata_by_image_id(namespace_name, repo_name, docker_image_ids): + """ Returns a map of Docker V1 metadata for each given image ID, matched under the repository + with the given namespace and name. Returns an empty map if the matching repository was not + found. + """ + repo = model.repository.get_repository(namespace_name, repo_name) + if repo is None: + return {} + + images_query = model.image.lookup_repository_images(repo, docker_image_ids) + return {image.docker_image_id: _docker_v1_metadata(namespace_name, repo_name, image) + for image in images_query} + + +def get_parents_docker_v1_metadata(namespace_name, repo_name, docker_image_id): + """ Returns an ordered list containing the Docker V1 metadata for each parent of the image + with the given docker ID under the matching repository. Returns an empty list if the image + was not found. + """ + repo_image = model.image.get_repo_image(namespace_name, repo_name, docker_image_id) + if repo_image is None: + return [] + + parents = model.image.get_parent_images(namespace_name, repo_name, repo_image) + return [_docker_v1_metadata(namespace_name, repo_name, image) for image in parents] + + +def create_manifest_and_update_tag(namespace_name, repo_name, tag_name, manifest_digest, + manifest_bytes): + """ Creates a new manifest with the given digest and byte data, and assigns the tag with the + given name under the matching repository to it. + """ + try: + model.tag.associate_generated_tag_manifest(namespace_name, repo_name, tag_name, + manifest_digest, manifest_bytes) + except IntegrityError: + # It's already there! + pass + + +def synthesize_v1_image(repository, storage, image_id, created, comment, command, compat_json, + parent_image_id): + """ Synthesizes a V1 image under the specified repository, pointing to the given storage + and returns the V1 metadata for the synthesized image. + """ + repo = model.repository.get_repository(repository.namespace_name, repository.name) + if repo is None: + raise DataModelException('Unknown repository: %s/%s' % (repository.namespace_name, + repository.name)) + + parent_image = None + if parent_image_id is not None: + parent_image = model.image.get_image(repo, parent_image_id) + if parent_image is None: + raise DataModelException('Unknown parent image: %s' % parent_image_id) + + storage_obj = model.storage.get_storage_by_uuid(storage.uuid) + if storage_obj is None: + raise DataModelException('Unknown storage: %s' % storage.uuid) + + repo_image = model.image.synthesize_v1_image(repo, storage_obj, image_id, created, comment, + command, compat_json, parent_image) + return _docker_v1_metadata(repo.namespace_user.username, repo.name, repo_image) + + +def save_manifest(namespace_name, repo_name, tag_name, leaf_layer_docker_id, manifest_digest, + manifest_bytes): + """ Saves a manifest pointing to the given leaf image, with the given manifest, under the matching + repository as a tag with the given name. + """ + model.tag.store_tag_manifest(namespace_name, repo_name, tag_name, leaf_layer_docker_id, + manifest_digest, manifest_bytes) + + +def repository_tags(namespace_name, repo_name, limit, offset): + """ Returns the active tags under the repository with the given name and namespace. """ + tags_query = model.tag.list_repository_tags(namespace_name, repo_name) + tags_query = tags_query.limit(limit).offset(offset) + + def _tag_view(tag): + return Tag( + name=tag.name, + repository=RepositoryReference( + id=tag.repository_id, + name=repo_name, + namespace_name=namespace_name, + ) + ) + + return [_tag_view(tag) for tag in tags_query] + + +def get_visible_repositories(username, limit, offset): + """ Returns the repositories visible to the user with the given username, if any. """ + query = model.repository.get_visible_repositories(username, include_public=(username is None)) + query = query.limit(limit).offset(offset) + return [repository_for_repo(repo) for repo in query] + + +def create_blob_upload(namespace_name, repo_name, upload_uuid, location_name, storage_metadata): + """ Creates a blob upload under the matching repository with the given UUID and metadata. + Returns whether the matching repository exists. + """ + try: + model.blob.initiate_upload(namespace_name, repo_name, upload_uuid, location_name, + storage_metadata) + return True + except database.Repository.DoesNotExist: + return False + + +def blob_upload_by_uuid(namespace_name, repo_name, upload_uuid): + """ Searches for a blob upload with the given UUID under the given repository and returns it + or None if none. + """ + try: + found = model.blob.get_blob_upload(namespace_name, repo_name, upload_uuid) + except model.InvalidBlobUpload: + return None + + return BlobUpload( + repo_namespace_name=namespace_name, + repo_name=repo_name, + uuid=upload_uuid, + byte_count=found.byte_count, + uncompressed_byte_count=found.uncompressed_byte_count, + chunk_count=found.chunk_count, + sha_state=found.sha_state, + piece_sha_state=found.piece_sha_state, + piece_hashes=found.piece_hashes, + location_name=found.location.name, + storage_metadata=found.storage_metadata, + ) + + +def update_blob_upload(blob_upload): + """ Saves any changes to the blob upload object given to the backing data store. + + Fields that can change: + - uncompressed_byte_count + - piece_hashes + - piece_sha_state + - storage_metadata + - byte_count + - chunk_count + - sha_state + """ + # Lookup the blob upload object. + try: + blob_upload_record = model.blob.get_blob_upload(blob_upload.repo_namespace_name, + blob_upload.repo_name, blob_upload.uuid) + except model.InvalidBlobUpload: + return + + blob_upload_record.uncompressed_byte_count = blob_upload.uncompressed_byte_count + blob_upload_record.piece_hashes = blob_upload.piece_hashes + blob_upload_record.piece_sha_state = blob_upload.piece_sha_state + blob_upload_record.storage_metadata = blob_upload.storage_metadata + blob_upload_record.byte_count = blob_upload.byte_count + blob_upload_record.chunk_count = blob_upload.chunk_count + blob_upload_record.sha_state = blob_upload.sha_state + blob_upload_record.save() + + +def delete_blob_upload(namespace_name, repo_name, uuid): + """ Deletes the blob upload with the given uuid under the matching repository. If none, does + nothing. + """ + try: + found = model.blob.get_blob_upload(namespace_name, repo_name, uuid) + except model.InvalidBlobUpload: + return + + found.delete_instance() + + +def create_blob_and_temp_tag(namespace_name, repo_name, blob_digest, blob_upload, expiration_sec): + """ Crates a blob and links a temporary tag with the specified expiration to it under the + matching repository. + """ + location_obj = model.storage.get_image_location_for_name(blob_upload.location_name) + blob_record = model.blob.store_blob_record_and_temp_link(namespace_name, repo_name, + blob_digest, + location_obj.id, + blob_upload.byte_count, + expiration_sec, + blob_upload.uncompressed_byte_count) + return Blob( + uuid=blob_record.uuid, + digest=blob_digest, + size=blob_upload.byte_count, + locations=[blob_upload.location_name], + ) + + +def lookup_blobs_by_digest(namespace_name, repo_name, digests): + """ Returns all the blobs with matching digests found under the matching repository. If the + repository doesn't exist, returns {}. + """ + repo = model.repository.get_repository(namespace_name, repo_name) + if repo is None: + return {} + + def _blob_view(blob_record): + return Blob( + uuid=blob_record.uuid, + digest=blob_record.content_checksum, + size=blob_record.image_size, + locations=None, # Note: Locations is None in this case. + ) + + query = model.storage.lookup_repo_storages_by_content_checksum(repo, digests) + return {storage.content_checksum: _blob_view(storage) for storage in query} + + +def get_blob_by_digest(namespace_name, repo_name, digest): + """ Returns the blob with the given digest under the matching repository or None if none. """ + try: + blob_record = model.blob.get_repo_blob_by_digest(namespace_name, repo_name, digest) + return Blob( + uuid=blob_record.uuid, + digest=digest, + size=blob_record.image_size, + locations=blob_record.locations, + ) + except model.BlobDoesNotExist: + return None + + +def save_bittorrent_pieces(blob, piece_size, piece_bytes): + """ Saves the BitTorrent piece hashes for the given blob. """ + blob_record = model.storage.get_storage_by_uuid(blob.uuid) + model.storage.save_torrent_info(blob_record, piece_size, piece_bytes) + + +def get_blob_path(blob): + # Once everything is moved over, this could be in util.registry and not even + # touch the database. + blob_record = model.storage.get_storage_by_uuid(blob.uuid) + return model.storage.get_layer_path(blob_record) + diff --git a/data/model/blob.py b/data/model/blob.py index 2584d96f2..470bb519d 100644 --- a/data/model/blob.py +++ b/data/model/blob.py @@ -3,7 +3,7 @@ from uuid import uuid4 from data.model import (tag, _basequery, BlobDoesNotExist, InvalidBlobUpload, db_transaction, storage as storage_model, InvalidImageException) from data.database import (Repository, Namespace, ImageStorage, Image, ImageStoragePlacement, - BlobUpload) + BlobUpload, ImageStorageLocation) def get_repo_blob_by_digest(namespace, repo_name, blob_digest): @@ -63,7 +63,9 @@ def get_blob_upload(namespace, repo_name, upload_uuid): """ try: return (BlobUpload - .select() + .select(BlobUpload, ImageStorageLocation) + .join(ImageStorageLocation) + .switch(BlobUpload) .join(Repository) .join(Namespace, on=(Namespace.id == Repository.namespace_user)) .where(Repository.name == repo_name, Namespace.username == namespace, diff --git a/data/model/tag.py b/data/model/tag.py index 6139b5fff..10537d071 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -333,6 +333,16 @@ def load_tag_manifest(namespace, repo_name, tag_name): raise InvalidManifestException(msg) +def delete_manifest_by_digest(namespace, repo_name, digest): + tag_manifests = list(_load_repo_manifests(namespace, repo_name) + .where(TagManifest.digest == digest)) + + for tag_manifest in tag_manifests: + delete_tag(namespace, repo_name, tag_manifest.tag.name) + + return [tag_manifest.tag for tag_manifest in tag_manifests] + + def load_manifest_by_digest(namespace, repo_name, digest): try: return (_load_repo_manifests(namespace, repo_name) diff --git a/data/model/v2.py b/data/model/v2.py deleted file mode 100644 index 12a9174ed..000000000 --- a/data/model/v2.py +++ /dev/null @@ -1,183 +0,0 @@ -from image import Blob, BlobUpload, ManifestJSON, Repository, Tag -from image.docker.v1 import DockerV1Metadata - -def create_repository(namespace_name, repo_name, user): - model.repository.create_repository(namespace, reponame, user) - - -def repository_is_public(namespace_name, repo_name): - model.repository.repository_is_public(namespace, reponame)): - - -def get_repository(namespace_name, repo_name): - repo = model.repository.get_repository(namespace_name, repo_name) - if repo is None: - return None - - return Repository( - id=repo.id, - name=repo.name, - namespace_name=repo.namespace_user.username, - ) - - -def get_active_tag(namespace_name, repo_name, tag_name): - try: - return model.tag.get_active_tag(namespace_name, repo_name, tag_name) - except RepositoryTag.DoesNotExist: - return None - - -def get_manifest_by_tag(namespace_name, repo_name, tag_name): - try: - manifest = model.tag.load_tag_manifest(namespace_name, repo_name, manifest_ref) - return ManifestJSON(digest=digest, json=manifest.json_data) - except model.InvalidManifestException: - return None - - -def get_manifest_by_digest(namespace_name, repo_name, digest): - try: - manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, manifest_ref) - return ManifestJSON(digest=digest, json=manifest.json_data) - except model.InvalidManifestException: - return None - - -def get_tag_by_manifest_digest(namespace_name, repo_name, digest): - return Tag() - - -def delete_tag(namespace_name, repo_name, tag_name): - model.tag.delete_tag(namespace_name, repo_name, tag.name) - return True - - -def get_docker_v1_metadata_by_tag(namespace_name, repo_name, tag_name): - if not repo_image: - return None - - return DockerV1Metadata( - namespace_name=namespace_name, - repo_name=repo_name, - image_id=image_id, - checksum=repo_image.v1_checksum, - content_checksum=repo_image.content_checksum, - compat_json=repo_image.v1_json_metadata, - ) - - -def get_docker_v1_metadata_by_image_id(namespace_name, repo_name, image_ids): - images_query = model.image.lookup_repository_images(repo, all_image_ids) - return {image.docker_image_id: DockerV1Metadata(namespace_name=namespace_name, - repo_name=repo_name, - image_id=image.docker_image_id, - checksum=image.v1_checksum, - content_checksum=image.content_checksum, - compat_json=image.v1_json_metadata) - for image in images_query} - - -def get_parents_docker_v1_metadata(namespace_name, repo_name, image_id): - # Old implementation: - # parents = model.image.get_parent_images(namespace_name, repo_name, image) - - # desired: - # return a list of the AttrDict in docker_v1_metadata - return [] - - -def create_manifest_and_update_tag(namespace_name, repo_name, tag_name, manifest_digest, manifest_bytes): - try: - model.tag.associate_generated_tag_manifest(namespace_name, repo_name, tag_name, - manifest.digest, manifest.bytes) - except IntegrityError: - # It's already there! - pass - - -def synthesize_v1_image(repo, storage, image_id, created, comment, command, compat_json, parent_image_id): - model.image.synthesize_v1_image(repo, storage, image_id, created, comment, command, compat_json, parent_image_id) - - -def save_manifest(namespace_name, repo_name, tag_name, leaf_layer_id, manifest_digest, manifest_bytes): - model.tag.store_tag_manifest(namespace_name, repo_name, tag_name, leaf_layer_id, manifest_digest, - manifest_bytes) - - -def repository_tags(namespace_name, repo_name, limit, offset): - return [Tag()] - - -def get_visible_repositories(username, limit, offset): - return [Repository()] - - -def create_blob_upload(namespace_name, repo_name, upload_uuid, location_name, storage_metadata): - """ - Creates a blob upload. - - Returns False if the upload's repository does not exist. - """ - - try: - model.blob.initiate_upload(namespace_name, repo_name, new_upload_uuid, location_name, - upload_metadata) - return True - except database.Repository.DoesNotExist: - return False - - -def blob_upload_by_uuid(uuid): - try: - found = model.blob.get_blob_upload(namespace_name, repo_name, upload_uuid) - except model.InvalidBlobUpload: - raise BlobUploadUnknown() - - return BlobUpload( - uuid=uuid, - byte_count=found.byte_count, - uncompressed_byte_count=found.uncompressed_byte_count, - chunk_count=found.chunk_count, - location_name=found.location.name, - storage_metadata=found.storage_metadata, - ) - - -def update_blob_upload(blob_upload): - # old implementation: - # blob_upload.save() - pass - - -def delete_blob_upload(uuid): - try: - found = model.blob.get_blob_upload(namespace_name, repo_name, upload_uuid) - except model.InvalidBlobUpload: - raise BlobUploadUnknown() - - found.delete_instance() - -def create_blob_and_temp_tag(namespace_name, repo_name, expected_digest, upload_obj): - return model.blob.store_blob_record_and_temp_link(namespace_name, repo_name, expected_digest, - upload_obj.location, - upload_obj.byte_count, - app.config['PUSH_TEMP_TAG_EXPIRATION_SEC'], - upload_obj.uncompressed_byte_count) - - -def get_blob_by_digest(namespace_name, repo_name, digest): - try: - return model.blob.get_repo_blob_by_digest(namespace_name, repo_name, digest) - except model.BlobDoesNotExist: - return None - - -def create_bittorrent_pieces(blob_storage, piece_size, piece_bytes) - model.storage.save_torrent_info(blob_storage.id, piece_size, piece_bytes) - - -def get_blob_path(blob): - # Once everything is moved over, this could be in util.registry and not even - # touch the database. - model.storage.get_layer_path(blob) diff --git a/endpoints/notificationhelper.py b/endpoints/notificationhelper.py index b5a71c574..8640d741d 100644 --- a/endpoints/notificationhelper.py +++ b/endpoints/notificationhelper.py @@ -5,7 +5,7 @@ from auth.auth_context import get_authenticated_user, get_validated_oauth_token import json def build_event_data(repo, extra_data={}, subpage=None): - repo_string = '%s/%s' % (repo.namespace_user.username, repo.name) + repo_string = '%s/%s' % (repo.namespace_name, repo.name) homepage = '%s://%s/repository/%s' % (app.config['PREFERRED_URL_SCHEME'], app.config['SERVER_HOSTNAME'], repo_string) @@ -18,11 +18,10 @@ def build_event_data(repo, extra_data={}, subpage=None): event_data = { 'repository': repo_string, - 'namespace': repo.namespace_user.username, + 'namespace': repo.namespace_name, 'name': repo.name, 'docker_url': '%s/%s' % (app.config['SERVER_HOSTNAME'], repo_string), 'homepage': homepage, - 'visibility': repo.visibility.name } event_data.update(extra_data) @@ -54,10 +53,10 @@ def spawn_notification(repo, event_name, extra_data={}, subpage=None, pathargs=[ performer_data=None): event_data = build_event_data(repo, extra_data=extra_data, subpage=subpage) - notifications = model.notification.list_repo_notifications(repo.namespace_user.username, + notifications = model.notification.list_repo_notifications(repo.namespace_name, repo.name, event_name=event_name) for notification in list(notifications): notification_data = build_notification_data(notification, event_data, performer_data) - path = [repo.namespace_user.username, repo.name, event_name] + pathargs + path = [repo.namespace_name, repo.name, event_name] + pathargs notification_queue.put(path, json.dumps(notification_data)) diff --git a/endpoints/v1/index.py b/endpoints/v1/index.py index bb3270e68..250731a11 100644 --- a/endpoints/v1/index.py +++ b/endpoints/v1/index.py @@ -6,8 +6,9 @@ from functools import wraps from flask import request, make_response, jsonify, session -from data.model import v1 +from data.interfaces import v1 from app import authentication, userevents, metric_queue +from app import authentication, userevents from auth.auth import process_auth, generate_signed_token from auth.auth_context import get_authenticated_user, get_validated_token, get_validated_oauth_token from auth.permissions import (ModifyRepositoryPermission, UserAdminPermission, @@ -148,10 +149,6 @@ def update_user(username): logger.debug('Updating user password') v1.change_user_password(get_authenticated_user(), update_request['password']) - if 'email' in update_request: - logger.debug('Updating user email') - v1.change_user_email(get_authenticated_user(), update_request['email']) - return jsonify({ 'username': get_authenticated_user().username, 'email': get_authenticated_user().email diff --git a/endpoints/v1/registry.py b/endpoints/v1/registry.py index 6132c44b8..1a4ca4fc3 100644 --- a/endpoints/v1/registry.py +++ b/endpoints/v1/registry.py @@ -14,7 +14,7 @@ from auth.permissions import (ReadRepositoryPermission, ModifyRepositoryPermission) from auth.registry_jwt_auth import get_granted_username from data import model, database -from data.model import v1 +from data.interfaces import v1 from digest import checksums from endpoints.v1 import v1_bp from endpoints.decorators import anon_protect diff --git a/endpoints/v1/tag.py b/endpoints/v1/tag.py index b7830215e..822e55cb1 100644 --- a/endpoints/v1/tag.py +++ b/endpoints/v1/tag.py @@ -9,7 +9,7 @@ from auth.auth import process_auth from auth.permissions import (ReadRepositoryPermission, ModifyRepositoryPermission) from data import model -from data.model import v1 +from data.interfaces import v1 from endpoints.common import parse_repository_name from endpoints.decorators import anon_protect from endpoints.v1 import v1_bp diff --git a/endpoints/v2/__init__.py b/endpoints/v2/__init__.py index 1e01f6416..07e325df9 100644 --- a/endpoints/v2/__init__.py +++ b/endpoints/v2/__init__.py @@ -65,7 +65,7 @@ def paginate(limit_kwarg_name='limit', offset_kwarg_name='offset', kwargs[limit_kwarg_name] = limit kwargs[offset_kwarg_name] = offset kwargs[callback_kwarg_name] = callback - func(*args, **kwargs) + return func(*args, **kwargs) return wrapped return wrapper diff --git a/endpoints/v2/blob.py b/endpoints/v2/blob.py index 2a18bb979..504a7d83e 100644 --- a/endpoints/v2/blob.py +++ b/endpoints/v2/blob.py @@ -8,6 +8,7 @@ import resumablehashlib from app import storage, app from auth.registry_jwt_auth import process_registry_jwt_auth from data import database +from data.interfaces import v2 from digest import digest_tools from endpoints.common import parse_repository_name from endpoints.v2 import v2_bp, require_repo_read, require_repo_write, get_input_stream @@ -134,7 +135,7 @@ def start_blob_upload(namespace_name, repo_name): # The user plans to send us the entire body right now. # Find the upload. - blob_upload = v2.blob_upload_by_uuid(new_upload_uuid) + blob_upload = v2.blob_upload_by_uuid(namespace_name, repo_name, new_upload_uuid) if blob_upload is None: raise BlobUploadUnknown() @@ -142,7 +143,7 @@ def start_blob_upload(namespace_name, repo_name): # the upload state. updated_blob_upload = _upload_chunk(blob_upload, request.headers.get('range')) if updated_blob_upload is None: - _abort_range_not_satisfiable(updated_blob_upload.byte_count, new_upload_uuid) + _abort_range_not_satisfiable(blob_upload.byte_count, new_upload_uuid) # Save the upload state to the database. v2.update_blob_upload(updated_blob_upload) @@ -195,7 +196,7 @@ def upload_chunk(namespace_name, repo_name, upload_uuid): # the upload state. updated_blob_upload = _upload_chunk(blob_upload, request.headers.get('range')) if updated_blob_upload is None: - _abort_range_not_satisfiable(updated_blob_upload.byte_count, upload_uuid) + _abort_range_not_satisfiable(blob_upload.byte_count, upload_uuid) # Save the upload state to the database. v2.update_blob_upload(updated_blob_upload) @@ -231,7 +232,7 @@ def monolithic_upload_or_last_chunk(namespace_name, repo_name, upload_uuid): # the upload state. updated_blob_upload = _upload_chunk(blob_upload, request.headers.get('range')) if updated_blob_upload is None: - _abort_range_not_satisfiable(updated_blob_upload.byte_count, upload_uuid) + _abort_range_not_satisfiable(blob_upload.byte_count, upload_uuid) # Finalize the upload process in the database and storage. _finish_upload(namespace_name, repo_name, updated_blob_upload, digest) @@ -253,14 +254,15 @@ def monolithic_upload_or_last_chunk(namespace_name, repo_name, upload_uuid): @require_repo_write @anon_protect def cancel_upload(namespace_name, repo_name, upload_uuid): - upload = v2.blob_upload_by_uuid(upload_uuid) - if upload is None: + blob_upload = v2.blob_upload_by_uuid(namespace_name, repo_name, upload_uuid) + if blob_upload is None: raise BlobUploadUnknown() # We delete the record for the upload first, since if the partial upload in - # storage fails to delete, it doesn't break anything - v2.delete_blob_upload(upload_uuid) - storage.cancel_chunked_upload({upload.location_name}, upload.uuid, upload.storage_metadata) + # storage fails to delete, it doesn't break anything. + v2.delete_blob_upload(namespace_name, repo_name, upload_uuid) + storage.cancel_chunked_upload({blob_upload.location_name}, blob_upload.uuid, + blob_upload.storage_metadata) return Response(status=204) @@ -342,7 +344,7 @@ def _upload_chunk(blob_upload, range_header): """ # Get the offset and length of the current chunk. start_offset, length = _start_offset_and_length(range_header) - if None in {blob_upload, start_offset, length}: + if blob_upload is None or None in {start_offset, length}: logger.error('Invalid arguments provided to _upload_chunk') return None @@ -393,7 +395,7 @@ def _upload_chunk(blob_upload, range_header): size_info, fn = calculate_size_handler() input_fp = wrap_with_handler(input_fp, fn) - length_written, new_metadata, error = storage.stream_upload_chunk( + length_written, new_metadata, upload_error = storage.stream_upload_chunk( location_set, blob_upload.uuid, start_offset, @@ -402,8 +404,9 @@ def _upload_chunk(blob_upload, range_header): blob_upload.storage_metadata, content_type=BLOB_CONTENT_TYPE, ) - if error is not None: - logger.error('storage.stream_upload_chunk returned error %s', error) + + if upload_error is not None: + logger.error('storage.stream_upload_chunk returned error %s', upload_error) return None # If we determined an uncompressed size and this is the first chunk, add it to the blob. @@ -418,6 +421,7 @@ def _upload_chunk(blob_upload, range_header): if piece_hasher is not None: blob_upload.piece_hashes = piece_hasher.piece_hashes blob_upload.piece_sha_state = piece_hasher.hash_fragment + blob_upload.storage_metadata = new_metadata blob_upload.byte_count += length_written blob_upload.chunk_count += 1 @@ -471,19 +475,17 @@ def _finalize_blob_database(namespace_name, repo_name, blob_upload, digest, alre namespace_name, repo_name, digest, - blob_upload.location_name, - blob_upload.byte_count, - blob_upload.uncompressed_byte_count, + blob_upload, app.config['PUSH_TEMP_TAG_EXPIRATION_SEC'], ) # If it doesn't already exist, create the BitTorrent pieces for the blob. if blob_upload.piece_sha_state is not None and not already_existed: piece_bytes = blob_upload.piece_hashes + blob_upload.piece_sha_state.digest() - v2.create_bittorrent_pieces(blob_storage, app.config['BITTORRENT_PIECE_SIZE'], piece_bytes) + v2.save_bittorrent_pieces(blob_storage, app.config['BITTORRENT_PIECE_SIZE'], piece_bytes) # Delete the blob upload. - v2.delete_upload(blob_upload.uuid) + v2.delete_blob_upload(namespace_name, repo_name, blob_upload.uuid) def _finish_upload(namespace_name, repo_name, blob_upload, digest): diff --git a/endpoints/v2/catalog.py b/endpoints/v2/catalog.py index 34b195dbc..9586c7909 100644 --- a/endpoints/v2/catalog.py +++ b/endpoints/v2/catalog.py @@ -3,6 +3,7 @@ from flask import jsonify from auth.registry_jwt_auth import process_registry_jwt_auth, get_granted_entity from endpoints.decorators import anon_protect from endpoints.v2 import v2_bp, paginate +from data.interfaces import v2 @v2_bp.route('/_catalog', methods=['GET']) @process_registry_jwt_auth() @@ -14,10 +15,10 @@ def catalog_search(limit, offset, pagination_callback): if entity: username = entity.user.username - visible_repositories = v2.get_visible_repositories(username, limit, offset) + visible_repositories = v2.get_visible_repositories(username, limit+1, offset) response = jsonify({ 'repositories': ['%s/%s' % (repo.namespace_name, repo.name) - for repo in visible_repositories], + for repo in visible_repositories][0:limit], }) pagination_callback(len(visible_repositories), response) diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index 96cb54257..0fdd5cb51 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -9,6 +9,7 @@ import features from app import docker_v2_signing_key, app, metric_queue from auth.registry_jwt_auth import process_registry_jwt_auth from data import model +from data.interfaces import v2 from digest import digest_tools from endpoints.common import parse_repository_name from endpoints.decorators import anon_protect @@ -35,14 +36,14 @@ MANIFEST_TAGNAME_ROUTE = BASE_MANIFEST_ROUTE.format(VALID_TAG_PATTERN) @process_registry_jwt_auth(scopes=['pull']) @require_repo_read @anon_protect -def fetch_manifest_by_tagname(namespace_name, repo_name, tag_name): - manifest = v2.get_manifest_by_tag(namespace_name, repo_name, tag_name) +def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref): + manifest = v2.get_manifest_by_tag(namespace_name, repo_name, manifest_ref) if manifest is None: - tag = v2.get_active_tag(namespace_name, repo_name, tag_name) - if tag is None: + has_tag = v2.has_active_tag(namespace_name, repo_name, manifest_ref) + if not has_tag: raise ManifestUnknown() - manifest = _generate_and_store_manifest(namespace_name, repo_name, tag_name) + manifest = _generate_and_store_manifest(namespace_name, repo_name, manifest_ref) if manifest is None: raise ManifestUnknown() @@ -52,9 +53,9 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, tag_name): metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2']) return Response( - manifest.bytes, + manifest.json, status=200, - headers={'Content-Type': manifest.content_type, 'Docker-Content-Digest': manifest.digest}, + headers={'Content-Type': manifest.media_type, 'Docker-Content-Digest': manifest.digest}, ) @@ -64,7 +65,7 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, tag_name): @require_repo_read @anon_protect def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref): - manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, manifest_ref) + manifest = v2.get_manifest_by_digest(namespace_name, repo_name, manifest_ref) if manifest is None: # Without a tag name to reference, we can't make an attempt to generate the manifest raise ManifestUnknown() @@ -74,7 +75,7 @@ def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref): track_and_log('pull_repo', repo) metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2']) - return Response(manifest.json, status=200, headers={'Content-Type': manifest.content_type, + return Response(manifest.json, status=200, headers={'Content-Type': manifest.media_type, 'Docker-Content-Digest': manifest.digest}) @@ -94,13 +95,13 @@ def _reject_manifest2_schema2(func): @process_registry_jwt_auth(scopes=['pull', 'push']) @require_repo_write @anon_protect -def write_manifest_by_tagname(namespace_name, repo_name, tag_name): +def write_manifest_by_tagname(namespace_name, repo_name, manifest_ref): try: manifest = DockerSchema1Manifest(request.data) except ManifestException as me: raise ManifestInvalid(detail={'message': me.message}) - if manifest.tag != tag_name: + if manifest.tag != manifest_ref: raise TagInvalid() return _write_manifest(namespace_name, repo_name, manifest) @@ -144,8 +145,7 @@ def _write_manifest(namespace_name, repo_name, manifest): raise ManifestInvalid(detail={'message': 'manifest does not reference any layers'}) # Ensure all the blobs in the manifest exist. - storage_query = model.storage.lookup_repo_storages_by_content_checksum(repo, manifest.checksums) - storage_map = {storage.content_checksum: storage for storage in storage_query} + storage_map = v2.lookup_blobs_by_digest(namespace_name, repo_name, manifest.checksums) for layer in manifest.layers: digest_str = str(layer.digest) if digest_str not in storage_map: @@ -153,14 +153,14 @@ def _write_manifest(namespace_name, repo_name, manifest): # Lookup all the images and their parent images (if any) inside the manifest. # This will let us know which v1 images we need to synthesize and which ones are invalid. - all_image_ids = list(manifest.docker_image_ids | manifest.parent_image_ids) + all_image_ids = list(manifest.parent_image_ids | manifest.image_ids) images_map = v2.get_docker_v1_metadata_by_image_id(namespace_name, repo_name, all_image_ids) # Rewrite any v1 image IDs that do not match the checksum in the database. try: - rewritten_images = manifest.rewrite_invalid_image_ids(images_map) + rewritten_images = list(manifest.rewrite_invalid_image_ids(images_map)) for rewritten_image in rewritten_images: - image = v2.synthesize_v1_image( + v1_metadata = v2.synthesize_v1_image( repo, storage_map[rewritten_image.content_checksum], rewritten_image.image_id, @@ -170,13 +170,13 @@ def _write_manifest(namespace_name, repo_name, manifest): rewritten_image.compat_json, rewritten_image.parent_image_id, ) - images_map[image.image_id] = image except ManifestException as me: raise ManifestInvalid(detail={'message': me.message}) # Store the manifest pointing to the tag. - leaf_layer_id = images_map[manifest.leaf_layer.v1_metadata.image_id].image_id - v2.save_manifest(namespace_name, repo_name, tag_name, leaf_layer_id, manifest.digest, manifest.bytes) + leaf_layer_id = rewritten_images[-1].image_id + v2.save_manifest(namespace_name, repo_name, manifest.tag, leaf_layer_id, manifest.digest, + manifest.bytes) # Queue all blob manifests for replication. # TODO(jschorr): Find a way to optimize this insertion. @@ -206,25 +206,19 @@ def _write_manifest(namespace_name, repo_name, manifest): @process_registry_jwt_auth(scopes=['pull', 'push']) @require_repo_write @anon_protect -def delete_manifest_by_digest(namespace_name, repo_name, digest): +def delete_manifest_by_digest(namespace_name, repo_name, manifest_ref): """ Delete the manifest specified by the digest. Note: there is no equivalent method for deleting by tag name because it is forbidden by the spec. """ - tag = v2.get_tag_by_manifest_digest(namespace_name, repo_name, digest) - if tag is None: - # TODO(jzelinskie): disambiguate between no manifest and no tag + tags = v2.delete_manifest_by_digest(namespace_name, repo_name, manifest_ref) + if not tags: raise ManifestUnknown() - # Mark the tag as no longer alive. - deleted = v2.delete_tag(namespace_name, repo_name, tag.name) - if not deleted: - # Tag was not alive. - raise ManifestUnknown() - - track_and_log('delete_tag', tag.repository, tag=tag.name, digest=digest) + for tag in tags: + track_and_log('delete_tag', tag.repository, tag=tag.name, digest=manifest_ref) return Response(status=202) diff --git a/endpoints/v2/tag.py b/endpoints/v2/tag.py index 11253aee0..8fd3d32ab 100644 --- a/endpoints/v2/tag.py +++ b/endpoints/v2/tag.py @@ -5,6 +5,7 @@ from endpoints.common import parse_repository_name from endpoints.v2 import v2_bp, require_repo_read, paginate from endpoints.v2.errors import NameUnknown from endpoints.decorators import anon_protect +from data.interfaces import v2 @v2_bp.route('//tags/list', methods=['GET']) @parse_repository_name() diff --git a/endpoints/v2/v2auth.py b/endpoints/v2/v2auth.py index e5a617df8..2398bde99 100644 --- a/endpoints/v2/v2auth.py +++ b/endpoints/v2/v2auth.py @@ -11,6 +11,7 @@ from auth.permissions import (ModifyRepositoryPermission, ReadRepositoryPermissi CreateRepositoryPermission) from endpoints.v2 import v2_bp from endpoints.decorators import anon_protect +from data.interfaces import v2 from util.cache import no_cache from util.names import parse_namespace_repository, REPOSITORY_NAME_REGEX from util.security.registry_jwt import generate_bearer_token, build_context_and_subject diff --git a/image/__init__.py b/image/__init__.py index 81485dd23..1ea1f42bb 100644 --- a/image/__init__.py +++ b/image/__init__.py @@ -1,6 +1,7 @@ import tarfile from collections import namedtuple +from namedlist import namedlist from util.registry.gzipwrap import GzipWrap @@ -10,6 +11,11 @@ class ManifestJSON(namedtuple('ManifestJSON', ['digest', 'json', 'media_type'])) ManifestJSON represents a Manifest of any format. """ +class RepositoryReference(namedtuple('RepositoryReference', ['id', 'name', 'namespace_name'])): + """ + RepositoryReference represents a reference to a Repository, without its full metadata. + """ + class Repository(namedtuple('Repository', ['id', 'name', 'namespace_name', 'description', 'is_public'])): @@ -24,15 +30,16 @@ class Tag(namedtuple('Tag', ['name', 'repository'])): """ -class BlobUpload(namedtuple('BlobUpload', ['uuid', 'byte_count', 'uncompressed_byte_count', - 'chunk_count', 'sha_state', 'location_name', - 'storage_metadata', 'piece_sha_state', 'piece_hashes'])): +class BlobUpload(namedlist('BlobUpload', ['uuid', 'byte_count', 'uncompressed_byte_count', + 'chunk_count', 'sha_state', 'location_name', + 'storage_metadata', 'piece_sha_state', 'piece_hashes', + 'repo_namespace_name', 'repo_name'])): """ BlobUpload represents the current state of an Blob being uploaded. """ -class Blob(namedtuple('Blob', ['digest', 'size', 'locations'])): +class Blob(namedtuple('Blob', ['uuid', 'digest', 'size', 'locations'])): """ Blob represents an opaque binary blob saved to the storage system. """ diff --git a/image/docker/schema1.py b/image/docker/schema1.py index 14e77ad83..23c49d61b 100644 --- a/image/docker/schema1.py +++ b/image/docker/schema1.py @@ -121,6 +121,10 @@ class DockerSchema1Manifest(object): def content_type(self): return DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE + @property + def media_type(self): + return DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE + @property def signatures(self): return self._signatures @@ -137,6 +141,10 @@ class DockerSchema1Manifest(object): def tag(self): return self._tag + @property + def json(self): + return self._bytes + @property def bytes(self): return self._bytes @@ -216,11 +224,12 @@ class DockerSchema1Manifest(object): content, but the checksums don't match, then we need to rewrite the image ID to something new in order to ensure consistency. """ - # used to synthesize a new "content addressable" image id - digest_history = hashlib.sha256() + # Used to synthesize a new "content addressable" image id + digest_history = hashlib.sha256() has_rewritten_ids = False updated_id_map = {} + for layer in self.layers: digest_str = str(layer.digest) extracted_v1_metadata = layer.v1_metadata @@ -247,25 +256,33 @@ class DockerSchema1Manifest(object): # Lookup the parent image for the layer, if any. parent_image_id = None if extracted_v1_metadata.parent_image_id is not None: - parent_image_id = images_map.get(extracted_v1_metadata.parent_image_id, None) - if parent_image_id is None: + parent_image = images_map.get(extracted_v1_metadata.parent_image_id, None) + if parent_image is None: raise MalformedSchema1Manifest('parent not found with image ID: %s' % extracted_v1_metadata.parent_image_id) + parent_image_id = updated_id_map.get(parent_image.image_id, parent_image.image_id) # Synthesize and store the v1 metadata in the db. v1_metadata_json = layer.raw_v1_metadata if has_rewritten_ids: v1_metadata_json = _updated_v1_metadata(v1_metadata_json, updated_id_map) - yield DockerV1Metadata( + updated_image = DockerV1Metadata( + namespace_name=self.namespace, + repo_name=self.repo_name, image_id=working_image_id, created=extracted_v1_metadata.created, comment=extracted_v1_metadata.comment, command=extracted_v1_metadata.command, compat_json=v1_metadata_json, parent_image_id=parent_image_id, + checksum=None, # TODO: Check if we need this. + content_checksum=digest_str, ) + images_map[updated_image.image_id] = updated_image + yield updated_image + class DockerSchema1ManifestBuilder(object): """ diff --git a/requirements-nover.txt b/requirements-nover.txt index 01fce31d3..f1c093b26 100644 --- a/requirements-nover.txt +++ b/requirements-nover.txt @@ -1,65 +1,66 @@ -autobahn==0.9.3-3 -aiowsgi -trollius -flask -py-bcrypt -Flask-Principal -Flask-Login -Flask-Mail -python-dateutil -boto -pymysql==0.6.7 # Remove version when baseimage has Python 2.7.9+ -stripe -gunicorn<19.0 -gevent -mixpanel -beautifulsoup4 -marisa-trie -APScheduler==3.0.5 -xhtml2pdf -redis -hiredis -flask-restful==0.2.12 -jsonschema --e git+https://github.com/NateFerrero/oauth2lib.git#egg=oauth2lib -alembic -sqlalchemy -python-magic -reportlab==2.7 -raven -peewee -python-ldap -pycryptodome -psycopg2 -pyyaml -PyGithub -e git+https://github.com/DevTable/aniso8601-fake.git#egg=aniso8610 -e git+https://github.com/DevTable/anunidecode.git#egg=anunidecode -e git+https://github.com/DevTable/container-cloud-config.git#egg=container-cloud-config +-e git+https://github.com/DevTable/python-etcd.git@sslfix#egg=python-etcd +-e git+https://github.com/NateFerrero/oauth2lib.git#egg=oauth2lib +-e git+https://github.com/coreos/mockldap.git@v0.1.x#egg=mockldap -e git+https://github.com/coreos/py-bitbucket.git#egg=py-bitbucket -e git+https://github.com/coreos/pyapi-gitlab.git@timeout#egg=pyapi-gitlab --e git+https://github.com/coreos/mockldap.git@v0.1.x#egg=mockldap -e git+https://github.com/coreos/resumablehashlib.git#egg=resumablehashlib --e git+https://github.com/DevTable/python-etcd.git@sslfix#egg=python-etcd -gipc -pyOpenSSL -pygpgme -cachetools -mock -psutil -stringscore -python-swiftclient -python-keystoneclient +APScheduler==3.0.5 +Flask-Login +Flask-Mail +Flask-Principal Flask-Testing -pyjwt -toposort -pyjwkest -jsonpath-rw -bintrees -redlock -semantic-version +PyGithub +aiowsgi +alembic +autobahn==0.9.3-3 +beautifulsoup4 bencode +bintrees +boto +cachetools cryptography +flask +flask-restful==0.2.12 +gevent +gipc +gunicorn<19.0 +hiredis httmock +jsonpath-rw +jsonschema +marisa-trie +mixpanel +mock moto +namedlist +peewee +psutil +psycopg2 +py-bcrypt +pyOpenSSL +pycryptodome +pygpgme +pyjwkest +pyjwt +pymysql==0.6.7 # Remove version when baseimage has Python 2.7.9+ +python-dateutil +python-keystoneclient +python-ldap +python-magic +python-swiftclient +pyyaml +raven +redis +redlock +reportlab==2.7 +semantic-version +sqlalchemy +stringscore +stripe +toposort +trollius tzlocal +xhtml2pdf diff --git a/test/registry_tests.py b/test/registry_tests.py index 971476e13..d59a1637c 100644 --- a/test/registry_tests.py +++ b/test/registry_tests.py @@ -21,8 +21,9 @@ from data import model from endpoints.v1 import v1_bp from endpoints.v2 import v2_bp from endpoints.verbs import verbs -from endpoints.v2.manifest import SignedManifestBuilder from endpoints.api import api_bp +from image.docker.schema1 import DockerSchema1ManifestBuilder + from initdb import wipe_database, initialize_database, populate_database from endpoints.csrf import generate_csrf_token from tempfile import NamedTemporaryFile @@ -425,7 +426,6 @@ class V1RegistryPullMixin(V1RegistryMixin): # Ensure we do (or do not) have a matching image ID. tag_image_id = tags_result['latest'] known_ids = [item['id'] for item in images] - self.assertEquals(not munge_shas, tag_image_id in known_ids) # Retrieve the ancestry of the tag image. @@ -545,7 +545,7 @@ class V2RegistryPushMixin(V2RegistryMixin): # Build a fake manifest. tag_name = tag_name or 'latest' - builder = SignedManifestBuilder(namespace, repository, tag_name) + builder = DockerSchema1ManifestBuilder(namespace, repository, tag_name) full_contents = {} for image_data in reversed(images): @@ -1090,6 +1090,20 @@ class RegistryTestsMixin(object): class V1RegistryTests(V1RegistryPullMixin, V1RegistryPushMixin, RegistryTestsMixin, RegistryTestCaseMixin, LiveServerTestCase): """ Tests for V1 registry. """ + def test_users(self): + # Not logged in, should 404. + self.conduct('GET', '/v1/users', expected_code=404) + + # Try some logins. + self.conduct('POST', '/v1/users', json_data={'username': 'freshuser'}, expected_code=400) + resp = self.conduct('POST', '/v1/users', + json_data={'username': 'devtable', 'password': 'password'}, + expected_code=400) + + # Because Docker + self.assertEquals('"Username or email already exists"', resp.text) + + def test_push_reponame_with_slashes(self): # Attempt to add a repository name with slashes. This should fail as we do not support it. images = [{ @@ -1190,7 +1204,7 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix self.do_auth('devtable', 'password', namespace, repository, scopes=['push', 'pull']) # Build a fake manifest. - builder = SignedManifestBuilder(namespace, repository, tag_name) + builder = DockerSchema1ManifestBuilder(namespace, repository, tag_name) builder.add_layer('sha256:' + hashlib.sha256('invalid').hexdigest(), json.dumps({'id': 'foo'})) manifest = builder.build(_JWK) @@ -1210,7 +1224,7 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix self.do_auth('devtable', 'password', namespace, repository, scopes=['push', 'pull']) # Build a fake manifest. - builder = SignedManifestBuilder(namespace, repository, tag_name) + builder = DockerSchema1ManifestBuilder(namespace, repository, tag_name) builder.add_layer('sha256:' + hashlib.sha256('invalid').hexdigest(), json.dumps({'id': 'foo'})) manifest = builder.build(_JWK) From b775458d4b3f5f75b4387bdb907b55494d288049 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 29 Aug 2016 16:00:42 -0400 Subject: [PATCH 26/34] lifetimes on Tags should now be in milliseconds Fixes #1779 --- data/database.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/data/database.py b/data/database.py index 1f18b8d84..0e724c6b0 100644 --- a/data/database.py +++ b/data/database.py @@ -282,6 +282,7 @@ def uuid_generator(): get_epoch_timestamp = lambda: int(time.time()) +get_epoch_timestamp_ms = lambda: int(time.time() * 1000) def close_db_filter(_): @@ -1142,8 +1143,8 @@ class Tag(BaseModel): name = CharField() repository = ForeignKeyField(Repository) manifest_list = ForeignKeyField(ManifestList) - lifetime_start = IntegerField(default=get_epoch_timestamp) - lifetime_end = IntegerField(null=True, index=True) + lifetime_start = BigIntegerField(default=get_epoch_timestamp_ms) + lifetime_end = BigIntegerField(null=True, index=True) hidden = BooleanField(default=False) reverted = BooleanField(default=False) From c06d395f964e89cfe02656581f133e9a68699245 Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Tue, 30 Aug 2016 15:05:15 -0400 Subject: [PATCH 27/34] create interfaces for v1 and v2 data model --- data/interfaces/v1.py | 620 ++++++++++++++++++++----------- data/interfaces/v2.py | 777 ++++++++++++++++++++++++--------------- endpoints/v1/index.py | 23 +- endpoints/v1/registry.py | 70 ++-- endpoints/v1/tag.py | 17 +- endpoints/v2/blob.py | 34 +- endpoints/v2/catalog.py | 4 +- endpoints/v2/manifest.py | 38 +- endpoints/v2/tag.py | 6 +- endpoints/v2/v2auth.py | 8 +- image/__init__.py | 111 ------ image/appc/__init__.py | 2 +- image/common.py | 68 ++++ image/docker/squashed.py | 2 +- 14 files changed, 1048 insertions(+), 732 deletions(-) create mode 100644 image/common.py diff --git a/data/interfaces/v1.py b/data/interfaces/v1.py index cbffaa3ef..214ffee2c 100644 --- a/data/interfaces/v1.py +++ b/data/interfaces/v1.py @@ -1,263 +1,435 @@ +from collections import namedtuple + from app import app, storage as store from data import model from data.model import db_transaction from util.morecollections import AttrDict -from data.interfaces.common import repository_for_repo -def placement_locations_docker_v1(namespace_name, repo_name, image_id): - """ Returns all the placements for the image with the given V1 Docker ID, found under the - given repository or None if no image was found. + +class Repository(namedtuple('Repository', ['id', 'name', 'namespace_name', 'description', + 'is_public'])): """ - repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) - if repo_image is None or repo_image.storage is None: - return None - - return repo_image.storage.locations - - -def placement_locations_and_path_docker_v1(namespace_name, repo_name, image_id): - """ Returns a tuple of the placements and storage path location for the image with the - given V1 Docker ID, found under the given repository or None if no image was found. + Repository represents a namespaced collection of tags. """ - repo_image = model.image.get_repo_image_extended(namespace_name, repo_name, image_id) - if not repo_image or repo_image.storage is None: - return None, None - - return repo_image.storage.locations, model.storage.get_layer_path(repo_image.storage) -def docker_v1_metadata(namespace_name, repo_name, image_id): - """ Returns various pieces of metadata associated with an image with the given V1 Docker ID, - including the checksum and its V1 JSON metadata. +def _repository_for_repo(repo): """ - repo_image = model.image.get_repo_image(namespace_name, repo_name, image_id) - if repo_image is None: - return None - - return AttrDict({ - 'namespace_name': namespace_name, - 'repo_name': repo_name, - 'image_id': image_id, - 'checksum': repo_image.v1_checksum, - 'compat_json': repo_image.v1_json_metadata, - }) - - -def update_docker_v1_metadata(namespace_name, repo_name, image_id, created_date_str, comment, - command, compat_json, parent_image_id=None): - """ Updates various pieces of V1 metadata associated with a particular image. """ - parent_image = None - if parent_image_id is not None: - parent_image = model.image.get_repo_image(namespace_name, repo_name, parent_image_id) - - model.image.set_image_metadata(image_id, namespace_name, repo_name, created_date_str, comment, - command, compat_json, parent=parent_image) - - -def storage_exists(namespace_name, repo_name, image_id): - """ Returns whether storage already exists for the image with the V1 Docker ID under the - given repository. + Returns a Repository object representing the repo data model instance given. """ - repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) - if repo_image is None or repo_image.storage is None: - return False - - if repo_image.storage.uploading: - return False - - layer_path = model.storage.get_layer_path(repo_image.storage) - return store.exists(repo_image.storage.locations, layer_path) + return Repository( + id=repo.id, + name=repo.name, + namespace_name=repo.namespace_user.username, + description=repo.description, + is_public=model.repository.is_repository_public(repo) + ) -def store_docker_v1_checksums(namespace_name, repo_name, image_id, checksum, content_checksum): - """ Stores the various V1 checksums for the image with the V1 Docker ID. """ - repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) - if repo_image is None or repo_image.storage is None: - return +class DockerRegistryV1DataInterface(object): + """ + Interface that represents all data store interactions required by a Docker Registry v1. + """ - with db_transaction(): - repo_image.storage.content_checksum = content_checksum - repo_image.v1_checksum = checksum + @classmethod + def placement_locations_docker_v1(cls, namespace_name, repo_name, image_id): + """ + Returns all the placements for the image with the given V1 Docker ID, found under the given + repository or None if no image was found. + """ + raise NotImplementedError() + @classmethod + def placement_locations_and_path_docker_v1(cls, namespace_name, repo_name, image_id): + """ + Returns all the placements for the image with the given V1 Docker ID, found under the given + repository or None if no image was found. + """ + raise NotImplementedError() + + @classmethod + def docker_v1_metadata(cls, namespace_name, repo_name, image_id): + """ + Returns various pieces of metadata associated with an image with the given V1 Docker ID, + including the checksum and its V1 JSON metadata. + """ + raise NotImplementedError() + + @classmethod + def update_docker_v1_metadata(cls, namespace_name, repo_name, image_id, created_date_str, comment, + command, compat_json, parent_image_id=None): + """ + Updates various pieces of V1 metadata associated with a particular image. + """ + raise NotImplementedError() + + @classmethod + def storage_exists(cls, namespace_name, repo_name, image_id): + """ + Returns whether storage already exists for the image with the V1 Docker ID under the given + repository. + """ + raise NotImplementedError() + + @classmethod + def store_docker_v1_checksums(cls, namespace_name, repo_name, image_id, checksum, content_checksum): + """ + Stores the various V1 checksums for the image with the V1 Docker ID. + """ + raise NotImplementedError() + + @classmethod + def is_image_uploading(cls, namespace_name, repo_name, image_id): + """ + Returns whether the image with the V1 Docker ID is currently marked as uploading. + """ + raise NotImplementedError() + + @classmethod + def update_image_uploading(cls, namespace_name, repo_name, image_id, is_uploading): + """ Marks the image with the V1 Docker ID with the given uploading status. """ + raise NotImplementedError() + + @classmethod + def update_image_sizes(cls, namespace_name, repo_name, image_id, size, uncompressed_size): + """ + Updates the sizing information for the image with the given V1 Docker ID. + """ + raise NotImplementedError() + + @classmethod + def get_image_size(cls, namespace_name, repo_name, image_id): + """ + Returns the wire size of the image with the given Docker V1 ID. + """ + raise NotImplementedError() + + @classmethod + def create_bittorrent_pieces(cls, namespace_name, repo_name, image_id, pieces_bytes): + """ + Saves the BitTorrent piece hashes for the image with the given Docker V1 ID. + """ + raise NotImplementedError() + + @classmethod + def image_ancestry(cls, namespace_name, repo_name, image_id): + """ + Returns a list containing the full ancestry of Docker V1 IDs, in order, for the image with the + given Docker V1 ID. + """ + raise NotImplementedError() + + @classmethod + def repository_exists(cls, namespace_name, repo_name): + """ + Returns whether the repository with the given name and namespace exists. + """ + raise NotImplementedError() + + @classmethod + def create_or_link_image(cls, username, namespace_name, repo_name, image_id, storage_location): + """ + Adds the given image to the given repository, by either linking to an existing image visible to + the user with the given username, or creating a new one if no existing image matches. + """ + raise NotImplementedError() + + @classmethod + def create_temp_hidden_tag(cls, namespace_name, repo_name, image_id, expiration): + """ + Creates a hidden tag under the matching namespace pointing to the image with the given V1 Docker + ID. + """ + raise NotImplementedError() + + @classmethod + def list_tags(cls, namespace_name, repo_name): + """ + Returns all the tags defined in the repository with the given namespace and name. + """ + raise NotImplementedError() + + @classmethod + def create_or_update_tag(cls, namespace_name, repo_name, image_id, tag_name): + """ + Creates or updates a tag under the matching repository to point to the image with the given + Docker V1 ID. + """ + raise NotImplementedError() + + @classmethod + def find_image_id_by_tag(cls, namespace_name, repo_name, tag_name): + """ + Returns the Docker V1 image ID for the HEAD image for the tag with the given name under the + matching repository, or None if none. + """ + raise NotImplementedError() + + @classmethod + def delete_tag(cls, namespace_name, repo_name, tag_name): + """ Deletes the given tag from the given repository. """ + raise NotImplementedError() + + @classmethod + def load_token(cls, token): + """ + Loads the data associated with the given (deprecated) access token, and, if + found returns True. + """ + raise NotImplementedError() + + @classmethod + def verify_robot(cls, username, token): + """ + Returns True if the given robot username and token match an existing robot + account. + """ + raise NotImplementedError() + + @classmethod + def change_user_password(cls, user, new_password): + """ + Changes the password associated with the given user. + """ + raise NotImplementedError() + + @classmethod + def get_repository(cls, namespace_name, repo_name): + """ + Returns the repository with the given name under the given namespace or None + if none. + """ + raise NotImplementedError() + + @classmethod + def create_repository(cls, namespace_name, repo_name, user=None): + """ + Creates a new repository under the given namespace with the given name, for + the given user. + """ + raise NotImplementedError() + + @classmethod + def repository_is_public(cls, namespace_name, repo_name): + """ + Returns whether the repository with the given name under the given namespace + is public. If no matching repository was found, returns False. + """ + raise NotImplementedError() + + @classmethod + def validate_oauth_token(cls, token): + """ Returns whether the given OAuth token validates. """ + raise NotImplementedError() + + @classmethod + def get_sorted_matching_repositories(cls, search_term, only_public, can_read, limit): + """ + Returns a sorted list of repositories matching the given search term. + can_read is a callback that will be invoked for each repository found, to + filter results to only those visible to the current user (if any). + """ + raise NotImplementedError() + + + +class PreOCIModel(DockerRegistryV1DataInterface): + """ + PreOCIModel implements the data model for the v1 Docker Registry protocol using a database schema + before it was changed to support the OCI specification. + """ + @classmethod + def placement_locations_docker_v1(cls, namespace_name, repo_name, image_id): + repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) + if repo_image is None or repo_image.storage is None: + return None + return repo_image.storage.locations + + @classmethod + def placement_locations_and_path_docker_v1(cls, namespace_name, repo_name, image_id): + repo_image = model.image.get_repo_image_extended(namespace_name, repo_name, image_id) + if not repo_image or repo_image.storage is None: + return None, None + return repo_image.storage.locations, model.storage.get_layer_path(repo_image.storage) + + @classmethod + def docker_v1_metadata(cls, namespace_name, repo_name, image_id): + repo_image = model.image.get_repo_image(namespace_name, repo_name, image_id) + if repo_image is None: + return None + + return AttrDict({ + 'namespace_name': namespace_name, + 'repo_name': repo_name, + 'image_id': image_id, + 'checksum': repo_image.v1_checksum, + 'compat_json': repo_image.v1_json_metadata, + }) + + @classmethod + def update_docker_v1_metadata(cls, namespace_name, repo_name, image_id, created_date_str, comment, + command, compat_json, parent_image_id=None): + parent_image = None + if parent_image_id is not None: + parent_image = model.image.get_repo_image(namespace_name, repo_name, parent_image_id) + + model.image.set_image_metadata(image_id, namespace_name, repo_name, created_date_str, comment, + command, compat_json, parent=parent_image) + + @classmethod + def storage_exists(cls, namespace_name, repo_name, image_id): + repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) + if repo_image is None or repo_image.storage is None: + return False + + if repo_image.storage.uploading: + return False + + layer_path = model.storage.get_layer_path(repo_image.storage) + return store.exists(repo_image.storage.locations, layer_path) + + @classmethod + def store_docker_v1_checksums(cls, namespace_name, repo_name, image_id, checksum, content_checksum): + repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) + if repo_image is None or repo_image.storage is None: + return + + with db_transaction(): + repo_image.storage.content_checksum = content_checksum + repo_image.v1_checksum = checksum + repo_image.storage.save() + repo_image.save() + + @classmethod + def is_image_uploading(cls, namespace_name, repo_name, image_id): + repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) + if repo_image is None or repo_image.storage is None: + return False + return repo_image.storage.uploading + + @classmethod + def update_image_uploading(cls, namespace_name, repo_name, image_id, is_uploading): + repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) + if repo_image is None or repo_image.storage is None: + return + + repo_image.storage.uploading = is_uploading repo_image.storage.save() - repo_image.save() + return repo_image.storage + @classmethod + def update_image_sizes(cls, namespace_name, repo_name, image_id, size, uncompressed_size): + model.storage.set_image_storage_metadata(image_id, namespace_name, repo_name, size, + uncompressed_size) -def is_image_uploading(namespace_name, repo_name, image_id): - """ Returns whether the image with the V1 Docker ID is currently marked as uploading. """ - repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) - if repo_image is None or repo_image.storage is None: - return False + @classmethod + def get_image_size(cls, namespace_name, repo_name, image_id): + repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) + if repo_image is None or repo_image.storage is None: + return None + return repo_image.storage.image_size - return repo_image.storage.uploading + @classmethod + def create_bittorrent_pieces(cls, namespace_name, repo_name, image_id, pieces_bytes): + repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) + if repo_image is None or repo_image.storage is None: + return + model.storage.save_torrent_info(repo_image.storage, app.config['BITTORRENT_PIECE_SIZE'], + pieces_bytes) -def update_image_uploading(namespace_name, repo_name, image_id, is_uploading): - """ Marks the image with the V1 Docker ID with the given uploading status. """ - repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) - if repo_image is None or repo_image.storage is None: - return + @classmethod + def image_ancestry(cls, namespace_name, repo_name, image_id): + try: + image = model.image.get_image_by_id(namespace_name, repo_name, image_id) + except model.InvalidImageException: + return None - repo_image.storage.uploading = is_uploading - repo_image.storage.save() - return repo_image.storage + parents = model.image.get_parent_images(namespace_name, repo_name, image) + ancestry_docker_ids = [image.docker_image_id] + ancestry_docker_ids.extend([parent.docker_image_id for parent in parents]) + return ancestry_docker_ids + @classmethod + def repository_exists(cls, namespace_name, repo_name): + repo = model.repository.get_repository(namespace_name, repo_name) + return repo is not None -def update_image_sizes(namespace_name, repo_name, image_id, size, uncompressed_size): - """ Updates the sizing information for the image with the given V1 Docker ID. """ - model.storage.set_image_storage_metadata(image_id, namespace_name, repo_name, size, - uncompressed_size) + @classmethod + def create_or_link_image(cls, username, namespace_name, repo_name, image_id, storage_location): + repo = model.repository.get_repository(namespace_name, repo_name) + model.image.find_create_or_link_image(image_id, repo, username, {}, storage_location) + @classmethod + def create_temp_hidden_tag(cls, namespace_name, repo_name, image_id, expiration): + repo_image = model.image.get_repo_image(namespace_name, repo_name, image_id) + if repo_image is None: + return -def get_image_size(namespace_name, repo_name, image_id): - """ Returns the wire size of the image with the given Docker V1 ID. """ - repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) - if repo_image is None or repo_image.storage is None: - return None + repo = repo_image.repository + model.tag.create_temporary_hidden_tag(repo, repo_image, expiration) - return repo_image.storage.image_size + @classmethod + def list_tags(cls, namespace_name, repo_name): + return model.tag.list_repository_tags(namespace_name, repo_name) + @classmethod + def create_or_update_tag(cls, namespace_name, repo_name, image_id, tag_name): + model.tag.create_or_update_tag(namespace_name, repo_name, tag_name, image_id) -def create_bittorrent_pieces(namespace_name, repo_name, image_id, pieces_bytes): - """ Saves the bittorrent piece hashes for the image with the given Docker V1 ID. """ - repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) - if repo_image is None or repo_image.storage is None: - return + @classmethod + def find_image_id_by_tag(cls, namespace_name, repo_name, tag_name): + try: + tag_image = model.tag.get_tag_image(namespace_name, repo_name, tag_name) + except model.DataModelException: + return None - model.storage.save_torrent_info(repo_image.storage, app.config['BITTORRENT_PIECE_SIZE'], - pieces_bytes) + return tag_image.docker_image_id + @classmethod + def delete_tag(cls, namespace_name, repo_name, tag_name): + model.tag.delete_tag(namespace_name, repo_name, tag_name) -def image_ancestry(namespace_name, repo_name, image_id): - """ Returns a list containing the full ancestry of Docker V1 IDs, in order, for the image with - the givne Docker V1 ID. - """ - try: - image = model.image.get_image_by_id(namespace_name, repo_name, image_id) - except model.InvalidImageException: - return None + @classmethod + def load_token(cls, token): + try: + model.token.load_token_data(token) + return True + except model.InvalidTokenException: + return False - parents = model.image.get_parent_images(namespace_name, repo_name, image) - ancestry_docker_ids = [image.docker_image_id] - ancestry_docker_ids.extend([parent.docker_image_id for parent in parents]) - return ancestry_docker_ids + @classmethod + def verify_robot(cls, username, token): + try: + return bool(model.user.verify_robot(username, token)) + except model.InvalidRobotException: + return False + @classmethod + def change_user_password(cls, user, new_password): + model.user.change_password(user, new_password) -def repository_exists(namespace_name, repo_name): - """ Returns whether the repository with the given name and namespace exists. """ - repo = model.repository.get_repository(namespace_name, repo_name) - return repo is not None + @classmethod + def get_repository(cls, namespace_name, repo_name): + repo = model.repository.get_repository(namespace_name, repo_name) + if repo is None: + return None + return _repository_for_repo(repo) + @classmethod + def create_repository(cls, namespace_name, repo_name, user=None): + model.repository.create_repository(namespace_name, repo_name, user) -def create_or_link_image(username, namespace_name, repo_name, image_id, storage_location): - """ Adds the given image to the given repository, by either linking to an existing image - visible to the user with the given username, or creating a new one if no existing image - matches. - """ - repo = model.repository.get_repository(namespace_name, repo_name) - model.image.find_create_or_link_image(image_id, repo, username, {}, storage_location) + @classmethod + def repository_is_public(cls, namespace_name, repo_name): + return model.repository.repository_is_public(namespace_name, repo_name) + @classmethod + def validate_oauth_token(cls, token): + return bool(model.oauth.validate_access_token(token)) -def create_temp_hidden_tag(namespace_name, repo_name, image_id, expiration): - """ Creates a hidden tag under the matching namespace pointing to the image with the given V1 - Docker ID. - """ - repo_image = model.image.get_repo_image(namespace_name, repo_name, image_id) - if repo_image is None: - return - - repo = repo_image.repository - model.tag.create_temporary_hidden_tag(repo, repo_image, expiration) - - -def list_tags(namespace_name, repo_name): - """ Returns all the tags defined in the repository with the given namespace and name. """ - return model.tag.list_repository_tags(namespace_name, repo_name) - - -def create_or_update_tag(namespace_name, repo_name, image_id, tag_name): - """ Creates or updates a tag under the matching repository to point to the image with the given - Docker V1 ID. - """ - model.tag.create_or_update_tag(namespace_name, repo_name, tag_name, image_id) - - -def find_image_id_by_tag(namespace_name, repo_name, tag_name): - """ Returns the Docker V1 image ID for the HEAD image for the tag with the given name under - the matching repository, or None if none. - """ - try: - tag_image = model.tag.get_tag_image(namespace_name, repo_name, tag_name) - except model.DataModelException: - return None - - return tag_image.docker_image_id - - -def delete_tag(namespace_name, repo_name, tag_name): - """ Deletes the given tag from the given repository. """ - model.tag.delete_tag(namespace_name, repo_name, tag_name) - - -def load_token(token): - """ Loads the data associated with the given (deprecated) access token, and, if found - returns True. - """ - try: - model.token.load_token_data(token) - return True - except model.InvalidTokenException: - return False - - -def verify_robot(username, token): - """ Returns True if the given robot username and token match an existing robot - account. - """ - try: - return bool(model.user.verify_robot(username, token)) - except model.InvalidRobotException: - return False - - -def change_user_password(user, new_password): - """ Changes the password associated with the given user. """ - model.user.change_password(user, new_password) - - -def get_repository(namespace_name, repo_name): - """ Returns the repository with the given name under the given namespace or None if none. """ - repo = model.repository.get_repository(namespace_name, repo_name) - if repo is None: - return None - - return repository_for_repo(repo) - - -def create_repository(namespace_name, repo_name, user=None): - """ Creates a new repository under the given namespace with the given name, for the given user. - """ - model.repository.create_repository(namespace_name, repo_name, user) - - -def repository_is_public(namespace_name, repo_name): - """ Returns whether the repository with the given name under the given namespace is public. - If no matching repository was found, returns False. - """ - return model.repository.repository_is_public(namespace_name, repo_name) - - -def validate_oauth_token(token): - """ Returns whether the given OAuth token validates. """ - return bool(model.oauth.validate_access_token(token)) - - -def get_sorted_matching_repositories(search_term, only_public, can_read, limit): - """ Returns a sorted list of repositories matching the given search term. can_read is a callback - that will be invoked for each repository found, to filter results to only those visible to - the current user (if any). - """ - repos = model.repository.get_sorted_matching_repositories(search_term, only_public, can_read, - limit=limit) - - return [repository_for_repo(repo) for repo in repos] + @classmethod + def get_sorted_matching_repositories(cls, search_term, only_public, can_read, limit): + repos = model.repository.get_sorted_matching_repositories(search_term, only_public, can_read, + limit=limit) + return [_repository_for_repo(repo) for repo in repos] diff --git a/data/interfaces/v2.py b/data/interfaces/v2.py index ff3e151dd..5ef48798a 100644 --- a/data/interfaces/v2.py +++ b/data/interfaces/v2.py @@ -1,95 +1,74 @@ +from collections import namedtuple + +from namedlist import namedlist from peewee import IntegrityError from data import model, database from data.model import DataModelException -from image import Blob, BlobUpload, ManifestJSON, RepositoryReference, Tag from image.docker.v1 import DockerV1Metadata -from data.interfaces.common import repository_for_repo + _MEDIA_TYPE = "application/vnd.docker.distribution.manifest.v1+prettyjws" -def create_repository(namespace_name, repo_name, creating_user=None): - """ Creates a new repository under the specified namespace with the given name. The user supplied - is the user creating the repository, if any. + +class ManifestJSON(namedtuple('ManifestJSON', ['digest', 'json', 'media_type'])): """ - return model.repository.create_repository(namespace_name, repo_name, creating_user) - - -def repository_is_public(namespace_name, repo_name): - """ Returns true if the repository with the given name under the given namespace has public - visibility. + ManifestJSON represents a Manifest of any format. """ - return model.repository.repository_is_public(namespace_name, repo_name) - -def get_repository(namespace_name, repo_name): - """ Returns a repository tuple for the repository with the given name under the given namespace. - Returns None if no such repository was found. +class Tag(namedtuple('Tag', ['name', 'repository'])): """ - repo = model.repository.get_repository(namespace_name, repo_name) - if repo is None: - return None - - return repository_for_repo(repo) - - -def has_active_tag(namespace_name, repo_name, tag_name): - """ Returns whether there is an active tag for the tag with the given name under the matching - repository, if any, or None if none. + Tag represents a user-facing alias for referencing a set of Manifests. """ - try: - model.tag.get_active_tag(namespace_name, repo_name, tag_name) - return True - except database.RepositoryTag.DoesNotExist: - return False -def get_manifest_by_tag(namespace_name, repo_name, tag_name): - """ Returns the current manifest for the tag with the given name under the matching repository, - if any, or None if none. +class BlobUpload(namedlist('BlobUpload', ['uuid', 'byte_count', 'uncompressed_byte_count', + 'chunk_count', 'sha_state', 'location_name', + 'storage_metadata', 'piece_sha_state', 'piece_hashes', + 'repo_namespace_name', 'repo_name'])): """ - try: - manifest = model.tag.load_tag_manifest(namespace_name, repo_name, tag_name) - return ManifestJSON(digest=manifest.digest, json=manifest.json_data, media_type=_MEDIA_TYPE) - except model.InvalidManifestException: - return None - - -def get_manifest_by_digest(namespace_name, repo_name, digest): - """ Returns the manifest matching the given digest under the matching repository, if any, - or None if none. + BlobUpload represents the current state of an Blob being uploaded. """ - try: - manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, digest) - return ManifestJSON(digest=digest, json=manifest.json_data, media_type=_MEDIA_TYPE) - except model.InvalidManifestException: - return None -def delete_manifest_by_digest(namespace_name, repo_name, digest): - """ Deletes the manifest with the associated digest (if any) and returns all removed tags - that pointed to that manifest. If the manifest was not found, returns an empty list. +class Blob(namedtuple('Blob', ['uuid', 'digest', 'size', 'locations'])): + """ + Blob represents an opaque binary blob saved to the storage system. """ - tags = model.tag.delete_manifest_by_digest(namespace_name, repo_name, digest) - def _tag_view(tag): - return Tag( - name=tag.name, - repository=RepositoryReference( - id=tag.repository_id, - name=repo_name, - namespace_name=namespace_name, - ) - ) - return [_tag_view(tag) for tag in tags] +class RepositoryReference(namedtuple('RepositoryReference', ['id', 'name', 'namespace_name'])): + """ + RepositoryReference represents a reference to a Repository, without its full metadata. + """ + + +class Repository(namedtuple('Repository', ['id', 'name', 'namespace_name', 'description', + 'is_public'])): + """ + Repository represents a namespaced collection of tags. + """ + + +def _repository_for_repo(repo): + """ + Returns a Repository object representing the repo data model instance given. + """ + return Repository( + id=repo.id, + name=repo.name, + namespace_name=repo.namespace_user.username, + description=repo.description, + is_public=model.repository.is_repository_public(repo) + ) def _docker_v1_metadata(namespace_name, repo_name, repo_image): - """ Returns a DockerV1Metadata object for the given image under the repository with the given - namespace and name. Note that the namespace and name are passed here as an optimization, - and are *not checked* against the image. + """ + Returns a DockerV1Metadata object for the given image under the repository with the given + namespace and name. Note that the namespace and name are passed here as an optimization, and are + *not checked* against the image. """ return DockerV1Metadata( namespace_name=namespace_name, @@ -98,264 +77,474 @@ def _docker_v1_metadata(namespace_name, repo_name, repo_image): checksum=repo_image.v1_checksum, content_checksum=repo_image.storage.content_checksum, compat_json=repo_image.v1_json_metadata, - created=repo_image.created, comment=repo_image.comment, command=repo_image.command, - parent_image_id=None, # TODO: make sure this isn't needed anywhere, as it is expensive to lookup + # TODO: make sure this isn't needed anywhere, as it is expensive to lookup + parent_image_id=None, ) -def get_docker_v1_metadata_by_tag(namespace_name, repo_name, tag_name): - """ Returns the Docker V1 metadata associated with the tag with the given name under the - matching repository, if any. If none, returns None. +class DockerRegistryV2DataInterface(object): """ - try: - repo_image = model.tag.get_tag_image(namespace_name, repo_name, tag_name, include_storage=True) - return _docker_v1_metadata(namespace_name, repo_name, repo_image) - except DataModelException: - return None - - - -def get_docker_v1_metadata_by_image_id(namespace_name, repo_name, docker_image_ids): - """ Returns a map of Docker V1 metadata for each given image ID, matched under the repository - with the given namespace and name. Returns an empty map if the matching repository was not - found. + Interface that represents all data store interactions required by a Docker Registry v1. """ - repo = model.repository.get_repository(namespace_name, repo_name) - if repo is None: - return {} - images_query = model.image.lookup_repository_images(repo, docker_image_ids) - return {image.docker_image_id: _docker_v1_metadata(namespace_name, repo_name, image) - for image in images_query} + @classmethod + def create_repository(cls, namespace_name, repo_name, creating_user=None): + """ + Creates a new repository under the specified namespace with the given name. The user supplied is + the user creating the repository, if any. + """ + raise NotImplementedError() + + @classmethod + def repository_is_public(cls, namespace_name, repo_name): + """ + Returns true if the repository with the given name under the given namespace has public + visibility. + """ + raise NotImplementedError() + + @classmethod + def get_repository(cls, namespace_name, repo_name): + """ + Returns a repository tuple for the repository with the given name under the given namespace. + Returns None if no such repository was found. + """ + raise NotImplementedError() + + @classmethod + def has_active_tag(cls, namespace_name, repo_name, tag_name): + """ + Returns whether there is an active tag for the tag with the given name under the matching + repository, if any, or none if none. + """ + raise NotImplementedError() + + @classmethod + def get_manifest_by_tag(cls, namespace_name, repo_name, tag_name): + """ + Returns the current manifest for the tag with the given name under the matching repository, if + any, or None if none. + """ + raise NotImplementedError() + + @classmethod + def get_manifest_by_digest(cls, namespace_name, repo_name, digest): + """ + Returns the manifest matching the given digest under the matching repository, if any, or None if + none. + """ + raise NotImplementedError() + + @classmethod + def delete_manifest_by_digest(cls, namespace_name, repo_name, digest): + """ + Deletes the manifest with the associated digest (if any) and returns all removed tags that + pointed to that manifest. If the manifest was not found, returns an empty list. + """ + raise NotImplementedError() + + @classmethod + def get_docker_v1_metadata_by_tag(cls, namespace_name, repo_name, tag_name): + """ + Returns the Docker V1 metadata associated with the tag with the given name under the matching + repository, if any. If none, returns None. + """ + raise NotImplementedError() + + @classmethod + def get_docker_v1_metadata_by_image_id(cls, namespace_name, repo_name, docker_image_ids): + """ + Returns a map of Docker V1 metadata for each given image ID, matched under the repository with + the given namespace and name. Returns an empty map if the matching repository was not found. + """ + raise NotImplementedError() + + @classmethod + def get_parents_docker_v1_metadata(cls, namespace_name, repo_name, docker_image_id): + """ + Returns an ordered list containing the Docker V1 metadata for each parent of the image with the + given docker ID under the matching repository. Returns an empty list if the image was not found. + """ + raise NotImplementedError() + + @classmethod + def create_manifest_and_update_tag(cls, namespace_name, repo_name, tag_name, manifest_digest, + manifest_bytes): + """ + Creates a new manifest with the given digest and byte data, and assigns the tag with the given + name under the matching repository to it. + """ + raise NotImplementedError() + + @classmethod + def synthesize_v1_image(cls, repository, storage, image_id, created, comment, command, + compat_json, parent_image_id): + """ + Synthesizes a V1 image under the specified repository, pointing to the given storage and returns + the V1 metadata for the synthesized image. + """ + raise NotImplementedError() + + @classmethod + def save_manifest(cls, namespace_name, repo_name, tag_name, leaf_layer_docker_id, manifest_digest, + manifest_bytes): + """ + Saves a manifest pointing to the given leaf image, with the given manifest, under the matching + repository as a tag with the given name. + """ + raise NotImplementedError() + + @classmethod + def repository_tags(cls, namespace_name, repo_name, limit, offset): + """ + Returns the active tags under the repository with the given name and namespace. + """ + raise NotImplementedError() + + @classmethod + def get_visible_repositories(cls, username, limit, offset): + """ + Returns the repositories visible to the user with the given username, if any. + """ + raise NotImplementedError() + + @classmethod + def create_blob_upload(cls, namespace_name, repo_name, upload_uuid, location_name, + storage_metadata): + """ + Creates a blob upload under the matching repository with the given UUID and metadata. + Returns whether the matching repository exists. + """ + raise NotImplementedError() + + @classmethod + def blob_upload_by_uuid(cls, namespace_name, repo_name, upload_uuid): + """ + Searches for a blob upload with the given UUID under the given repository and returns it or None + if none. + """ + raise NotImplementedError() + + @classmethod + def update_blob_upload(cls, blob_upload): + """ + Saves any changes to the blob upload object given to the backing data store. + Fields that can change: + - uncompressed_byte_count + - piece_hashes + - piece_sha_state + - storage_metadata + - byte_count + - chunk_count + - sha_state + """ + raise NotImplementedError() + + @classmethod + def delete_blob_upload(cls, namespace_name, repo_name, uuid): + """ + Deletes the blob upload with the given uuid under the matching repository. If none, does + nothing. + """ + raise NotImplementedError() + + @classmethod + def create_blob_and_temp_tag(cls, namespace_name, repo_name, blob_digest, blob_upload, + expiration_sec): + """ + Creates a blob and links a temporary tag with the specified expiration to it under the matching + repository. + """ + raise NotImplementedError() + + @classmethod + def lookup_blobs_by_digest(cls, namespace_name, repo_name, digests): + """ + Returns all the blobs with matching digests found under the matching repository. If the + repository doesn't exist, returns {}. + """ + raise NotImplementedError() + + @classmethod + def get_blob_by_digest(cls, namespace_name, repo_name, digest): + """ + Returns the blob with the given digest under the matching repository or None if none. + """ + raise NotImplementedError() + + @classmethod + def save_bittorrent_pieces(cls, blob, piece_size, piece_bytes): + """ + Saves the BitTorrent piece hashes for the given blob. + """ + raise NotImplementedError() + + @classmethod + def get_blob_path(cls, blob): + """ + Once everything is moved over, this could be in util.registry and not even touch the database. + """ + raise NotImplementedError() -def get_parents_docker_v1_metadata(namespace_name, repo_name, docker_image_id): - """ Returns an ordered list containing the Docker V1 metadata for each parent of the image - with the given docker ID under the matching repository. Returns an empty list if the image - was not found. +class PreOCIModel(DockerRegistryV2DataInterface): """ - repo_image = model.image.get_repo_image(namespace_name, repo_name, docker_image_id) - if repo_image is None: - return [] - - parents = model.image.get_parent_images(namespace_name, repo_name, repo_image) - return [_docker_v1_metadata(namespace_name, repo_name, image) for image in parents] - - -def create_manifest_and_update_tag(namespace_name, repo_name, tag_name, manifest_digest, - manifest_bytes): - """ Creates a new manifest with the given digest and byte data, and assigns the tag with the - given name under the matching repository to it. + PreOCIModel implements the data model for the v2 Docker Registry protocol using a database schema + before it was changed to support the OCI specification. """ - try: - model.tag.associate_generated_tag_manifest(namespace_name, repo_name, tag_name, - manifest_digest, manifest_bytes) - except IntegrityError: - # It's already there! - pass + @classmethod + def create_repository(cls, namespace_name, repo_name, creating_user=None): + return model.repository.create_repository(namespace_name, repo_name, creating_user) + @classmethod + def repository_is_public(cls, namespace_name, repo_name): + return model.repository.repository_is_public(namespace_name, repo_name) -def synthesize_v1_image(repository, storage, image_id, created, comment, command, compat_json, - parent_image_id): - """ Synthesizes a V1 image under the specified repository, pointing to the given storage - and returns the V1 metadata for the synthesized image. - """ - repo = model.repository.get_repository(repository.namespace_name, repository.name) - if repo is None: - raise DataModelException('Unknown repository: %s/%s' % (repository.namespace_name, - repository.name)) + @classmethod + def get_repository(cls, namespace_name, repo_name): + repo = model.repository.get_repository(namespace_name, repo_name) + if repo is None: + return None + return _repository_for_repo(repo) - parent_image = None - if parent_image_id is not None: - parent_image = model.image.get_image(repo, parent_image_id) - if parent_image is None: - raise DataModelException('Unknown parent image: %s' % parent_image_id) + @classmethod + def has_active_tag(cls, namespace_name, repo_name, tag_name): + try: + model.tag.get_active_tag(namespace_name, repo_name, tag_name) + return True + except database.RepositoryTag.DoesNotExist: + return False - storage_obj = model.storage.get_storage_by_uuid(storage.uuid) - if storage_obj is None: - raise DataModelException('Unknown storage: %s' % storage.uuid) + @classmethod + def get_manifest_by_tag(cls, namespace_name, repo_name, tag_name): + try: + manifest = model.tag.load_tag_manifest(namespace_name, repo_name, tag_name) + return ManifestJSON(digest=manifest.digest, json=manifest.json_data, media_type=_MEDIA_TYPE) + except model.InvalidManifestException: + return None - repo_image = model.image.synthesize_v1_image(repo, storage_obj, image_id, created, comment, - command, compat_json, parent_image) - return _docker_v1_metadata(repo.namespace_user.username, repo.name, repo_image) + @classmethod + def get_manifest_by_digest(cls, namespace_name, repo_name, digest): + try: + manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, digest) + return ManifestJSON(digest=digest, json=manifest.json_data, media_type=_MEDIA_TYPE) + except model.InvalidManifestException: + return None - -def save_manifest(namespace_name, repo_name, tag_name, leaf_layer_docker_id, manifest_digest, - manifest_bytes): - """ Saves a manifest pointing to the given leaf image, with the given manifest, under the matching - repository as a tag with the given name. - """ - model.tag.store_tag_manifest(namespace_name, repo_name, tag_name, leaf_layer_docker_id, - manifest_digest, manifest_bytes) - - -def repository_tags(namespace_name, repo_name, limit, offset): - """ Returns the active tags under the repository with the given name and namespace. """ - tags_query = model.tag.list_repository_tags(namespace_name, repo_name) - tags_query = tags_query.limit(limit).offset(offset) - - def _tag_view(tag): - return Tag( - name=tag.name, - repository=RepositoryReference( - id=tag.repository_id, - name=repo_name, - namespace_name=namespace_name, + @classmethod + def delete_manifest_by_digest(cls, namespace_name, repo_name, digest): + def _tag_view(tag): + return Tag( + name=tag.name, + repository=RepositoryReference( + id=tag.repository_id, + name=repo_name, + namespace_name=namespace_name, + ) ) + + tags = model.tag.delete_manifest_by_digest(namespace_name, repo_name, digest) + return [_tag_view(tag) for tag in tags] + + @classmethod + def get_docker_v1_metadata_by_tag(cls, namespace_name, repo_name, tag_name): + try: + repo_img = model.tag.get_tag_image(namespace_name, repo_name, tag_name, include_storage=True) + return _docker_v1_metadata(namespace_name, repo_name, repo_img) + except DataModelException: + return None + + @classmethod + def get_docker_v1_metadata_by_image_id(cls, namespace_name, repo_name, docker_image_ids): + repo = model.repository.get_repository(namespace_name, repo_name) + if repo is None: + return {} + + images_query = model.image.lookup_repository_images(repo, docker_image_ids) + return {image.docker_image_id: _docker_v1_metadata(namespace_name, repo_name, image) + for image in images_query} + + @classmethod + def get_parents_docker_v1_metadata(cls, namespace_name, repo_name, docker_image_id): + repo_image = model.image.get_repo_image(namespace_name, repo_name, docker_image_id) + if repo_image is None: + return [] + + parents = model.image.get_parent_images(namespace_name, repo_name, repo_image) + return [_docker_v1_metadata(namespace_name, repo_name, image) for image in parents] + + @classmethod + def create_manifest_and_update_tag(cls, namespace_name, repo_name, tag_name, manifest_digest, + manifest_bytes): + try: + model.tag.associate_generated_tag_manifest(namespace_name, repo_name, tag_name, + manifest_digest, manifest_bytes) + except IntegrityError: + # It's already there! + pass + + @classmethod + def synthesize_v1_image(cls, repository, storage, image_id, created, comment, command, + compat_json, parent_image_id): + repo = model.repository.get_repository(repository.namespace_name, repository.name) + if repo is None: + raise DataModelException('Unknown repository: %s/%s' % (repository.namespace_name, + repository.name)) + + parent_image = None + if parent_image_id is not None: + parent_image = model.image.get_image(repo, parent_image_id) + if parent_image is None: + raise DataModelException('Unknown parent image: %s' % parent_image_id) + + storage_obj = model.storage.get_storage_by_uuid(storage.uuid) + if storage_obj is None: + raise DataModelException('Unknown storage: %s' % storage.uuid) + + repo_image = model.image.synthesize_v1_image(repo, storage_obj, image_id, created, comment, + command, compat_json, parent_image) + return _docker_v1_metadata(repo.namespace_user.username, repo.name, repo_image) + + @classmethod + def save_manifest(cls, namespace_name, repo_name, tag_name, leaf_layer_docker_id, manifest_digest, + manifest_bytes): + model.tag.store_tag_manifest(namespace_name, repo_name, tag_name, leaf_layer_docker_id, + manifest_digest, manifest_bytes) + + @classmethod + def repository_tags(cls, namespace_name, repo_name, limit, offset): + def _tag_view(tag): + return Tag( + name=tag.name, + repository=RepositoryReference( + id=tag.repository_id, + name=repo_name, + namespace_name=namespace_name, + ) + ) + + tags_query = model.tag.list_repository_tags(namespace_name, repo_name) + tags_query = tags_query.limit(limit).offset(offset) + return [_tag_view(tag) for tag in tags_query] + + @classmethod + def get_visible_repositories(cls, username, limit, offset): + query = model.repository.get_visible_repositories(username, include_public=(username is None)) + query = query.limit(limit).offset(offset) + return [_repository_for_repo(repo) for repo in query] + + @classmethod + def create_blob_upload(cls, namespace_name, repo_name, upload_uuid, location_name, + storage_metadata): + try: + model.blob.initiate_upload(namespace_name, repo_name, upload_uuid, location_name, + storage_metadata) + return True + except database.Repository.DoesNotExist: + return False + + @classmethod + def blob_upload_by_uuid(cls, namespace_name, repo_name, upload_uuid): + try: + found = model.blob.get_blob_upload(namespace_name, repo_name, upload_uuid) + except model.InvalidBlobUpload: + return None + + return BlobUpload( + repo_namespace_name=namespace_name, + repo_name=repo_name, + uuid=upload_uuid, + byte_count=found.byte_count, + uncompressed_byte_count=found.uncompressed_byte_count, + chunk_count=found.chunk_count, + sha_state=found.sha_state, + piece_sha_state=found.piece_sha_state, + piece_hashes=found.piece_hashes, + location_name=found.location.name, + storage_metadata=found.storage_metadata, ) - return [_tag_view(tag) for tag in tags_query] + @classmethod + def update_blob_upload(cls, blob_upload): + # Lookup the blob upload object. + try: + blob_upload_record = model.blob.get_blob_upload(blob_upload.repo_namespace_name, + blob_upload.repo_name, blob_upload.uuid) + except model.InvalidBlobUpload: + return + blob_upload_record.uncompressed_byte_count = blob_upload.uncompressed_byte_count + blob_upload_record.piece_hashes = blob_upload.piece_hashes + blob_upload_record.piece_sha_state = blob_upload.piece_sha_state + blob_upload_record.storage_metadata = blob_upload.storage_metadata + blob_upload_record.byte_count = blob_upload.byte_count + blob_upload_record.chunk_count = blob_upload.chunk_count + blob_upload_record.sha_state = blob_upload.sha_state + blob_upload_record.save() -def get_visible_repositories(username, limit, offset): - """ Returns the repositories visible to the user with the given username, if any. """ - query = model.repository.get_visible_repositories(username, include_public=(username is None)) - query = query.limit(limit).offset(offset) - return [repository_for_repo(repo) for repo in query] + @classmethod + def delete_blob_upload(cls, namespace_name, repo_name, uuid): + try: + found = model.blob.get_blob_upload(namespace_name, repo_name, uuid) + found.delete_instance() + except model.InvalidBlobUpload: + return - -def create_blob_upload(namespace_name, repo_name, upload_uuid, location_name, storage_metadata): - """ Creates a blob upload under the matching repository with the given UUID and metadata. - Returns whether the matching repository exists. - """ - try: - model.blob.initiate_upload(namespace_name, repo_name, upload_uuid, location_name, - storage_metadata) - return True - except database.Repository.DoesNotExist: - return False - - -def blob_upload_by_uuid(namespace_name, repo_name, upload_uuid): - """ Searches for a blob upload with the given UUID under the given repository and returns it - or None if none. - """ - try: - found = model.blob.get_blob_upload(namespace_name, repo_name, upload_uuid) - except model.InvalidBlobUpload: - return None - - return BlobUpload( - repo_namespace_name=namespace_name, - repo_name=repo_name, - uuid=upload_uuid, - byte_count=found.byte_count, - uncompressed_byte_count=found.uncompressed_byte_count, - chunk_count=found.chunk_count, - sha_state=found.sha_state, - piece_sha_state=found.piece_sha_state, - piece_hashes=found.piece_hashes, - location_name=found.location.name, - storage_metadata=found.storage_metadata, - ) - - -def update_blob_upload(blob_upload): - """ Saves any changes to the blob upload object given to the backing data store. - - Fields that can change: - - uncompressed_byte_count - - piece_hashes - - piece_sha_state - - storage_metadata - - byte_count - - chunk_count - - sha_state - """ - # Lookup the blob upload object. - try: - blob_upload_record = model.blob.get_blob_upload(blob_upload.repo_namespace_name, - blob_upload.repo_name, blob_upload.uuid) - except model.InvalidBlobUpload: - return - - blob_upload_record.uncompressed_byte_count = blob_upload.uncompressed_byte_count - blob_upload_record.piece_hashes = blob_upload.piece_hashes - blob_upload_record.piece_sha_state = blob_upload.piece_sha_state - blob_upload_record.storage_metadata = blob_upload.storage_metadata - blob_upload_record.byte_count = blob_upload.byte_count - blob_upload_record.chunk_count = blob_upload.chunk_count - blob_upload_record.sha_state = blob_upload.sha_state - blob_upload_record.save() - - -def delete_blob_upload(namespace_name, repo_name, uuid): - """ Deletes the blob upload with the given uuid under the matching repository. If none, does - nothing. - """ - try: - found = model.blob.get_blob_upload(namespace_name, repo_name, uuid) - except model.InvalidBlobUpload: - return - - found.delete_instance() - - -def create_blob_and_temp_tag(namespace_name, repo_name, blob_digest, blob_upload, expiration_sec): - """ Crates a blob and links a temporary tag with the specified expiration to it under the - matching repository. - """ - location_obj = model.storage.get_image_location_for_name(blob_upload.location_name) - blob_record = model.blob.store_blob_record_and_temp_link(namespace_name, repo_name, - blob_digest, - location_obj.id, - blob_upload.byte_count, - expiration_sec, - blob_upload.uncompressed_byte_count) - return Blob( - uuid=blob_record.uuid, - digest=blob_digest, - size=blob_upload.byte_count, - locations=[blob_upload.location_name], - ) - - -def lookup_blobs_by_digest(namespace_name, repo_name, digests): - """ Returns all the blobs with matching digests found under the matching repository. If the - repository doesn't exist, returns {}. - """ - repo = model.repository.get_repository(namespace_name, repo_name) - if repo is None: - return {} - - def _blob_view(blob_record): + @classmethod + def create_blob_and_temp_tag(cls, namespace_name, repo_name, blob_digest, blob_upload, + expiration_sec): + location_obj = model.storage.get_image_location_for_name(blob_upload.location_name) + blob_record = model.blob.store_blob_record_and_temp_link(namespace_name, repo_name, + blob_digest, + location_obj.id, + blob_upload.byte_count, + expiration_sec, + blob_upload.uncompressed_byte_count) return Blob( uuid=blob_record.uuid, - digest=blob_record.content_checksum, - size=blob_record.image_size, - locations=None, # Note: Locations is None in this case. + digest=blob_digest, + size=blob_upload.byte_count, + locations=[blob_upload.location_name], ) - query = model.storage.lookup_repo_storages_by_content_checksum(repo, digests) - return {storage.content_checksum: _blob_view(storage) for storage in query} + @classmethod + def lookup_blobs_by_digest(cls, namespace_name, repo_name, digests): + def _blob_view(blob_record): + return Blob( + uuid=blob_record.uuid, + digest=blob_record.content_checksum, + size=blob_record.image_size, + locations=None, # Note: Locations is None in this case. + ) + repo = model.repository.get_repository(namespace_name, repo_name) + if repo is None: + return {} + query = model.storage.lookup_repo_storages_by_content_checksum(repo, digests) + return {storage.content_checksum: _blob_view(storage) for storage in query} -def get_blob_by_digest(namespace_name, repo_name, digest): - """ Returns the blob with the given digest under the matching repository or None if none. """ - try: - blob_record = model.blob.get_repo_blob_by_digest(namespace_name, repo_name, digest) - return Blob( - uuid=blob_record.uuid, - digest=digest, - size=blob_record.image_size, - locations=blob_record.locations, - ) - except model.BlobDoesNotExist: - return None + @classmethod + def get_blob_by_digest(cls, namespace_name, repo_name, digest): + try: + blob_record = model.blob.get_repo_blob_by_digest(namespace_name, repo_name, digest) + return Blob( + uuid=blob_record.uuid, + digest=digest, + size=blob_record.image_size, + locations=blob_record.locations, + ) + except model.BlobDoesNotExist: + return None + @classmethod + def save_bittorrent_pieces(cls, blob, piece_size, piece_bytes): + blob_record = model.storage.get_storage_by_uuid(blob.uuid) + model.storage.save_torrent_info(blob_record, piece_size, piece_bytes) -def save_bittorrent_pieces(blob, piece_size, piece_bytes): - """ Saves the BitTorrent piece hashes for the given blob. """ - blob_record = model.storage.get_storage_by_uuid(blob.uuid) - model.storage.save_torrent_info(blob_record, piece_size, piece_bytes) - - -def get_blob_path(blob): - # Once everything is moved over, this could be in util.registry and not even - # touch the database. - blob_record = model.storage.get_storage_by_uuid(blob.uuid) - return model.storage.get_layer_path(blob_record) - + @classmethod + def get_blob_path(cls, blob): + blob_record = model.storage.get_storage_by_uuid(blob.uuid) + return model.storage.get_layer_path(blob_record) diff --git a/endpoints/v1/index.py b/endpoints/v1/index.py index 250731a11..c90ffc9d5 100644 --- a/endpoints/v1/index.py +++ b/endpoints/v1/index.py @@ -6,9 +6,8 @@ from functools import wraps from flask import request, make_response, jsonify, session -from data.interfaces import v1 +from data.interfaces.v1 import PreOCIModel as model from app import authentication, userevents, metric_queue -from app import authentication, userevents from auth.auth import process_auth, generate_signed_token from auth.auth_context import get_authenticated_user, get_validated_token, get_validated_oauth_token from auth.permissions import (ModifyRepositoryPermission, UserAdminPermission, @@ -86,17 +85,17 @@ def create_user(): success = make_response('"Username or email already exists"', 400) if username == '$token': - if v1.load_token(password): + if model.load_token(password): return success abort(400, 'Invalid access token.', issue='invalid-access-token') elif username == '$oauthtoken': - if v1.validate_oauth_token(password): + if model.validate_oauth_token(password): return success abort(400, 'Invalid oauth access token.', issue='invalid-oauth-access-token') elif '+' in username: - if v1.verify_robot(username, password): + if model.verify_robot(username, password): return success abort(400, 'Invalid robot account or password.', issue='robot-login-failure') @@ -147,7 +146,7 @@ def update_user(username): if 'password' in update_request: logger.debug('Updating user password') - v1.change_user_password(get_authenticated_user(), update_request['password']) + model.change_user_password(get_authenticated_user(), update_request['password']) return jsonify({ 'username': get_authenticated_user().username, @@ -167,7 +166,7 @@ def create_repository(namespace_name, repo_name): abort(400, message='Invalid repository name. Repository names cannot contain slashes.') logger.debug('Looking up repository %s/%s', namespace_name, repo_name) - repo = v1.get_repository(namespace_name, repo_name) + repo = model.get_repository(namespace_name, repo_name) logger.debug('Found repository %s/%s', namespace_name, repo_name) if not repo and get_authenticated_user() is None: @@ -195,7 +194,7 @@ def create_repository(namespace_name, repo_name): logger.debug('Creating repository %s/%s with owner: %s', namespace_name, repo_name, get_authenticated_user().username) - v1.create_repository(namespace_name, repo_name, get_authenticated_user()) + model.create_repository(namespace_name, repo_name, get_authenticated_user()) if get_authenticated_user(): user_event_data = { @@ -220,7 +219,7 @@ def update_images(namespace_name, repo_name): if permission.can(): logger.debug('Looking up repository') - repo = v1.get_repository(namespace_name, repo_name) + repo = model.get_repository(namespace_name, repo_name) if not repo: # Make sure the repo actually exists. abort(404, message='Unknown repository', issue='unknown-repo') @@ -250,10 +249,10 @@ def get_repository_images(namespace_name, repo_name): permission = ReadRepositoryPermission(namespace_name, repo_name) # TODO invalidate token? - if permission.can() or v1.repository_is_public(namespace_name, repo_name): + if permission.can() or model.repository_is_public(namespace_name, repo_name): # We can't rely on permissions to tell us if a repo exists anymore logger.debug('Looking up repository') - repo = v1.get_repository(namespace_name, repo_name) + repo = model.get_repository(namespace_name, repo_name) if not repo: abort(404, message='Unknown repository', issue='unknown-repo') @@ -319,7 +318,7 @@ def _conduct_repo_search(username, query, results): return ReadRepositoryPermission(repo.namespace_name, repo.name).can() only_public = username is None - matching_repos = v1.get_sorted_matching_repositories(query, only_public, can_read, limit=5) + matching_repos = model.get_sorted_matching_repositories(query, only_public, can_read, limit=5) for repo in matching_repos: results.append({ diff --git a/endpoints/v1/registry.py b/endpoints/v1/registry.py index 1a4ca4fc3..1f169db0a 100644 --- a/endpoints/v1/registry.py +++ b/endpoints/v1/registry.py @@ -14,7 +14,7 @@ from auth.permissions import (ReadRepositoryPermission, ModifyRepositoryPermission) from auth.registry_jwt_auth import get_granted_username from data import model, database -from data.interfaces import v1 +from data.interfaces.v1 import PreOCIModel as model from digest import checksums from endpoints.v1 import v1_bp from endpoints.decorators import anon_protect @@ -30,7 +30,7 @@ logger = logging.getLogger(__name__) def _finish_image(namespace, repository, image_id): # Checksum is ok, we remove the marker - blob_ref = v1.update_image_uploading(namespace, repository, image_id, False) + blob_ref = model.update_image_uploading(namespace, repository, image_id, False) # Send a job to the work queue to replicate the image layer. queue_storage_replication(namespace, blob_ref) @@ -41,7 +41,7 @@ def require_completion(f): @wraps(f) def wrapper(namespace, repository, *args, **kwargs): image_id = kwargs['image_id'] - if v1.is_image_uploading(namespace, repository, image_id): + if model.is_image_uploading(namespace, repository, image_id): abort(400, 'Image %(image_id)s is being uploaded, retry later', issue='upload-in-progress', image_id=image_id) return f(namespace, repository, *args, **kwargs) @@ -82,9 +82,9 @@ def head_image_layer(namespace, repository, image_id, headers): permission = ReadRepositoryPermission(namespace, repository) logger.debug('Checking repo permissions') - if permission.can() or model.repository.repository_is_public(namespace, repository): + if permission.can() or model.repository_is_public(namespace, repository): logger.debug('Looking up placement locations') - locations = v1.placement_locations_docker_v1(namespace, repository, image_id) + locations = model.placement_locations_docker_v1(namespace, repository, image_id) if locations is None: logger.debug('Could not find any blob placement locations') abort(404, 'Image %(image_id)s not found', issue='unknown-image', @@ -115,11 +115,9 @@ def get_image_layer(namespace, repository, image_id, headers): permission = ReadRepositoryPermission(namespace, repository) logger.debug('Checking repo permissions') - if permission.can() or model.repository.repository_is_public(namespace, repository): + if permission.can() or model.repository_is_public(namespace, repository): logger.debug('Looking up placement locations and path') - locations, path = v1.placement_locations_and_path_docker_v1(namespace, - repository, - image_id) + locations, path = model.placement_locations_and_path_docker_v1(namespace, repository, image_id) if not locations or not path: abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) @@ -154,7 +152,7 @@ def put_image_layer(namespace, repository, image_id): abort(403) logger.debug('Retrieving image') - if v1.storage_exists(namespace, repository, image_id): + if model.storage_exists(namespace, repository, image_id): exact_abort(409, 'Image already exists') logger.debug('Storing layer data') @@ -184,7 +182,7 @@ def put_image_layer(namespace, repository, image_id): sr.add_handler(piece_hasher.update) # Add a handler which computes the checksum. - v1_metadata = v1.docker_v1_metadata(namespace, repository, image_id) + v1_metadata = model.docker_v1_metadata(namespace, repository, image_id) h, sum_hndlr = checksums.simple_checksum_handler(v1_metadata.compat_json) sr.add_handler(sum_hndlr) @@ -193,7 +191,7 @@ def put_image_layer(namespace, repository, image_id): sr.add_handler(content_sum_hndlr) # Stream write the data to storage. - locations, path = v1.placement_locations_and_path_docker_v1(namespace, repository, image_id) + locations, path = model.placement_locations_and_path_docker_v1(namespace, repository, image_id) with database.CloseForLongOperation(app.config): try: store.stream_write(locations, path, sr) @@ -202,11 +200,11 @@ def put_image_layer(namespace, repository, image_id): abort(520, 'Image %(image_id)s could not be written. Please try again.', image_id=image_id) # Save the size of the image. - v1.update_image_sizes(namespace, repository, image_id, size_info.compressed_size, - size_info.uncompressed_size) + model.update_image_sizes(namespace, repository, image_id, size_info.compressed_size, + size_info.uncompressed_size) # Save the BitTorrent pieces. - v1.create_bittorrent_pieces(namespace, repository, image_id, piece_hasher.final_piece_hashes()) + model.create_bittorrent_pieces(namespace, repository, image_id, piece_hasher.final_piece_hashes()) # Append the computed checksum. csums = [] @@ -271,7 +269,7 @@ def put_image_checksum(namespace, repository, image_id): issue='missing-checksum-cookie', image_id=image_id) logger.debug('Looking up repo image') - v1_metadata = v1.docker_v1_metadata(namespace, repository, image_id) + v1_metadata = model.docker_v1_metadata(namespace, repository, image_id) if not v1_metadata: abort(404, 'Image not found: %(image_id)s', issue='unknown-image', image_id=image_id) @@ -280,7 +278,7 @@ def put_image_checksum(namespace, repository, image_id): abort(404, 'Image not found: %(image_id)s', issue='unknown-image', image_id=image_id) logger.debug('Marking image path') - if not v1.is_image_uploading(namespace, repository, image_id): + if not model.is_image_uploading(namespace, repository, image_id): abort(409, 'Cannot set checksum for image %(image_id)s', issue='image-write-error', image_id=image_id) @@ -291,7 +289,7 @@ def put_image_checksum(namespace, repository, image_id): if len(checksum_parts) != 2: abort(400, 'Invalid checksum format') - v1.store_docker_v1_checksums(namespace, repository, image_id, checksum, content_checksum) + model.store_docker_v1_checksums(namespace, repository, image_id, checksum, content_checksum) if checksum not in session.get('checksum', []): logger.debug('session checksums: %s', session.get('checksum', [])) @@ -315,16 +313,16 @@ def put_image_checksum(namespace, repository, image_id): def get_image_json(namespace, repository, image_id, headers): logger.debug('Checking repo permissions') permission = ReadRepositoryPermission(namespace, repository) - if not permission.can() and not model.repository.repository_is_public(namespace, repository): + if not permission.can() and not model.repository_is_public(namespace, repository): abort(403) logger.debug('Looking up repo image') - v1_metadata = v1.docker_v1_metadata(namespace, repository, image_id) + v1_metadata = model.docker_v1_metadata(namespace, repository, image_id) if v1_metadata is None: flask_abort(404) logger.debug('Looking up repo layer size') - size = v1.get_image_size(namespace, repository, image_id) + size = model.get_image_size(namespace, repository, image_id) if size is not None: # Note: X-Docker-Size is optional and we *can* end up with a NULL image_size, # so handle this case rather than failing. @@ -344,10 +342,10 @@ def get_image_json(namespace, repository, image_id, headers): def get_image_ancestry(namespace, repository, image_id, headers): logger.debug('Checking repo permissions') permission = ReadRepositoryPermission(namespace, repository) - if not permission.can() and not model.repository.repository_is_public(namespace, repository): + if not permission.can() and not model.repository_is_public(namespace, repository): abort(403) - ancestry_docker_ids = v1.image_ancestry(namespace, repository, image_id) + ancestry_docker_ids = model.image_ancestry(namespace, repository, image_id) if ancestry_docker_ids is None: abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) @@ -388,37 +386,39 @@ def put_image_json(namespace, repository, image_id): logger.debug('Looking up repo image') - if not v1.repository_exists(namespace, repository): + if not model.repository_exists(namespace, repository): abort(404, 'Repository does not exist: %(namespace)s/%(repository)s', issue='no-repo', namespace=namespace, repository=repository) - v1_metadata = v1.docker_v1_metadata(namespace, repository, image_id) + v1_metadata = model.docker_v1_metadata(namespace, repository, image_id) if v1_metadata is None: username = get_authenticated_user() and get_authenticated_user().username if not username: username = get_granted_username() - logger.debug('Image not found, creating or linking image with initiating user context: %s', username) - v1.create_or_link_image(username, namespace, repository, image_id, store.preferred_locations[0]) - v1_metadata = v1.docker_v1_metadata(namespace, repository, image_id) + logger.debug('Image not found, creating or linking image with initiating user context: %s', + username) + location_pref = store.preferred_locations[0] + model.create_or_link_image(username, namespace, repository, image_id, location_pref) + v1_metadata = model.docker_v1_metadata(namespace, repository, image_id) # Create a temporary tag to prevent this image from getting garbage collected while the push # is in progress. - v1.create_temp_hidden_tag(namespace, repository, image_id, - app.config['PUSH_TEMP_TAG_EXPIRATION_SEC']) + model.create_temp_hidden_tag(namespace, repository, image_id, + app.config['PUSH_TEMP_TAG_EXPIRATION_SEC']) parent_id = data.get('parent', None) if parent_id: logger.debug('Looking up parent image') - if v1.docker_v1_metadata(namespace, repository, parent_id) is None: + if model.docker_v1_metadata(namespace, repository, parent_id) is None: abort(400, 'Image %(image_id)s depends on non existing parent image %(parent_id)s', issue='invalid-request', image_id=image_id, parent_id=parent_id) logger.debug('Checking if image already exists') - if v1_metadata and not v1.is_image_uploading(namespace, repository, image_id): + if v1_metadata and not model.is_image_uploading(namespace, repository, image_id): exact_abort(409, 'Image already exists') - v1.update_image_uploading(namespace, repository, image_id, True) + model.update_image_uploading(namespace, repository, image_id, True) # If we reach that point, it means that this is a new image or a retry # on a failed push, save the metadata @@ -426,7 +426,7 @@ def put_image_json(namespace, repository, image_id): command = json.dumps(command_list) if command_list else None logger.debug('Setting image metadata') - v1.update_docker_v1_metadata(namespace, repository, image_id, data.get('created'), - data.get('comment'), command, uploaded_metadata, parent_id) + model.update_docker_v1_metadata(namespace, repository, image_id, data.get('created'), + data.get('comment'), command, uploaded_metadata, parent_id) return make_response('true', 200) diff --git a/endpoints/v1/tag.py b/endpoints/v1/tag.py index 822e55cb1..07902309d 100644 --- a/endpoints/v1/tag.py +++ b/endpoints/v1/tag.py @@ -9,7 +9,7 @@ from auth.auth import process_auth from auth.permissions import (ReadRepositoryPermission, ModifyRepositoryPermission) from data import model -from data.interfaces import v1 +from data.interfaces.v1 import PreOCIModel as model from endpoints.common import parse_repository_name from endpoints.decorators import anon_protect from endpoints.v1 import v1_bp @@ -26,8 +26,8 @@ logger = logging.getLogger(__name__) def get_tags(namespace_name, repo_name): permission = ReadRepositoryPermission(namespace_name, repo_name) - if permission.can() or model.repository.repository_is_public(namespace_name, repo_name): - tags = v1.list_tags(namespace_name, repo_name) + if permission.can() or model.repository_is_public(namespace_name, repo_name): + tags = model.list_tags(namespace_name, repo_name) tag_map = {tag.name: tag.image.docker_image_id for tag in tags} return jsonify(tag_map) @@ -41,8 +41,8 @@ def get_tags(namespace_name, repo_name): def get_tag(namespace_name, repo_name, tag): permission = ReadRepositoryPermission(namespace_name, repo_name) - if permission.can() or model.repository.repository_is_public(namespace_name, repo_name): - image_id = v1.find_image_id_by_tag(namespace_name, repo_name, tag) + if permission.can() or model.repository_is_public(namespace_name, repo_name): + image_id = model.find_image_id_by_tag(namespace_name, repo_name, tag) if image_id is None: abort(404) @@ -65,7 +65,7 @@ def put_tag(namespace_name, repo_name, tag): abort(400, TAG_ERROR) image_id = json.loads(request.data) - v1.create_or_update_tag(namespace_name, repo_name, image_id, tag) + model.create_or_update_tag(namespace_name, repo_name, image_id, tag) # Store the updated tag. if 'pushed_tags' not in session: @@ -86,9 +86,8 @@ def delete_tag(namespace_name, repo_name, tag): permission = ModifyRepositoryPermission(namespace_name, repo_name) if permission.can(): - v1.delete_tag(namespace_name, repo_name, tag) - track_and_log('delete_tag', model.repository.get_repository(namespace_name, repo_name), - tag=tag) + model.delete_tag(namespace_name, repo_name, tag) + track_and_log('delete_tag', model.get_repository(namespace_name, repo_name), tag=tag) return make_response('Deleted', 200) abort(403) diff --git a/endpoints/v2/blob.py b/endpoints/v2/blob.py index 504a7d83e..1e1a2bd30 100644 --- a/endpoints/v2/blob.py +++ b/endpoints/v2/blob.py @@ -8,7 +8,7 @@ import resumablehashlib from app import storage, app from auth.registry_jwt_auth import process_registry_jwt_auth from data import database -from data.interfaces import v2 +from data.interfaces.v2 import PreOCIModel as model from digest import digest_tools from endpoints.common import parse_repository_name from endpoints.v2 import v2_bp, require_repo_read, require_repo_write, get_input_stream @@ -42,7 +42,7 @@ class _InvalidRangeHeader(Exception): @cache_control(max_age=31436000) def check_blob_exists(namespace_name, repo_name, digest): # Find the blob. - blob = v2.get_blob_by_digest(namespace_name, repo_name, digest) + blob = model.get_blob_by_digest(namespace_name, repo_name, digest) if blob is None: raise BlobUnknown() @@ -69,7 +69,7 @@ def check_blob_exists(namespace_name, repo_name, digest): @cache_control(max_age=31536000) def download_blob(namespace_name, repo_name, digest): # Find the blob. - blob = v2.get_blob_by_digest(namespace_name, repo_name, digest) + blob = model.get_blob_by_digest(namespace_name, repo_name, digest) if blob is None: raise BlobUnknown() @@ -81,7 +81,7 @@ def download_blob(namespace_name, repo_name, digest): headers['Accept-Ranges'] = 'bytes' # Find the storage path for the blob. - path = v2.get_blob_path(blob) + path = model.get_blob_path(blob) # Short-circuit by redirecting if the storage supports it. logger.debug('Looking up the direct download URL for path: %s', path) @@ -115,8 +115,8 @@ def start_blob_upload(namespace_name, repo_name): # Begin the blob upload process in the database and storage. location_name = storage.preferred_locations[0] new_upload_uuid, upload_metadata = storage.initiate_chunked_upload(location_name) - repository_exists = v2.create_blob_upload(namespace_name, repo_name, new_upload_uuid, - location_name, upload_metadata) + repository_exists = model.create_blob_upload(namespace_name, repo_name, new_upload_uuid, + location_name, upload_metadata) if not repository_exists: raise NameUnknown() @@ -135,7 +135,7 @@ def start_blob_upload(namespace_name, repo_name): # The user plans to send us the entire body right now. # Find the upload. - blob_upload = v2.blob_upload_by_uuid(namespace_name, repo_name, new_upload_uuid) + blob_upload = model.blob_upload_by_uuid(namespace_name, repo_name, new_upload_uuid) if blob_upload is None: raise BlobUploadUnknown() @@ -146,7 +146,7 @@ def start_blob_upload(namespace_name, repo_name): _abort_range_not_satisfiable(blob_upload.byte_count, new_upload_uuid) # Save the upload state to the database. - v2.update_blob_upload(updated_blob_upload) + model.update_blob_upload(updated_blob_upload) # Finalize the upload process in the database and storage. _finish_upload(namespace_name, repo_name, updated_blob_upload, digest) @@ -168,7 +168,7 @@ def start_blob_upload(namespace_name, repo_name): @require_repo_write @anon_protect def fetch_existing_upload(namespace_name, repo_name, upload_uuid): - blob_upload = v2.blob_upload_by_uuid(namespace_name, repo_name, upload_uuid) + blob_upload = model.blob_upload_by_uuid(namespace_name, repo_name, upload_uuid) if blob_upload is None: raise BlobUploadUnknown() @@ -188,7 +188,7 @@ def fetch_existing_upload(namespace_name, repo_name, upload_uuid): @anon_protect def upload_chunk(namespace_name, repo_name, upload_uuid): # Find the upload. - blob_upload = v2.blob_upload_by_uuid(namespace_name, repo_name, upload_uuid) + blob_upload = model.blob_upload_by_uuid(namespace_name, repo_name, upload_uuid) if blob_upload is None: raise BlobUploadUnknown() @@ -199,7 +199,7 @@ def upload_chunk(namespace_name, repo_name, upload_uuid): _abort_range_not_satisfiable(blob_upload.byte_count, upload_uuid) # Save the upload state to the database. - v2.update_blob_upload(updated_blob_upload) + model.update_blob_upload(updated_blob_upload) # Write the response to the client. return Response( @@ -224,7 +224,7 @@ def monolithic_upload_or_last_chunk(namespace_name, repo_name, upload_uuid): raise BlobUploadInvalid(detail={'reason': 'Missing digest arg on monolithic upload'}) # Find the upload. - blob_upload = v2.blob_upload_by_uuid(namespace_name, repo_name, upload_uuid) + blob_upload = model.blob_upload_by_uuid(namespace_name, repo_name, upload_uuid) if blob_upload is None: raise BlobUploadUnknown() @@ -254,13 +254,13 @@ def monolithic_upload_or_last_chunk(namespace_name, repo_name, upload_uuid): @require_repo_write @anon_protect def cancel_upload(namespace_name, repo_name, upload_uuid): - blob_upload = v2.blob_upload_by_uuid(namespace_name, repo_name, upload_uuid) + blob_upload = model.blob_upload_by_uuid(namespace_name, repo_name, upload_uuid) if blob_upload is None: raise BlobUploadUnknown() # We delete the record for the upload first, since if the partial upload in # storage fails to delete, it doesn't break anything. - v2.delete_blob_upload(namespace_name, repo_name, upload_uuid) + model.delete_blob_upload(namespace_name, repo_name, upload_uuid) storage.cancel_chunked_upload({blob_upload.location_name}, blob_upload.uuid, blob_upload.storage_metadata) @@ -471,7 +471,7 @@ def _finalize_blob_database(namespace_name, repo_name, blob_upload, digest, alre database's perspective. """ # Create the blob and temporarily tag it. - blob_storage = v2.create_blob_and_temp_tag( + blob_storage = model.create_blob_and_temp_tag( namespace_name, repo_name, digest, @@ -482,10 +482,10 @@ def _finalize_blob_database(namespace_name, repo_name, blob_upload, digest, alre # If it doesn't already exist, create the BitTorrent pieces for the blob. if blob_upload.piece_sha_state is not None and not already_existed: piece_bytes = blob_upload.piece_hashes + blob_upload.piece_sha_state.digest() - v2.save_bittorrent_pieces(blob_storage, app.config['BITTORRENT_PIECE_SIZE'], piece_bytes) + model.save_bittorrent_pieces(blob_storage, app.config['BITTORRENT_PIECE_SIZE'], piece_bytes) # Delete the blob upload. - v2.delete_blob_upload(namespace_name, repo_name, blob_upload.uuid) + model.delete_blob_upload(namespace_name, repo_name, blob_upload.uuid) def _finish_upload(namespace_name, repo_name, blob_upload, digest): diff --git a/endpoints/v2/catalog.py b/endpoints/v2/catalog.py index 9586c7909..4a145fe3f 100644 --- a/endpoints/v2/catalog.py +++ b/endpoints/v2/catalog.py @@ -3,7 +3,7 @@ from flask import jsonify from auth.registry_jwt_auth import process_registry_jwt_auth, get_granted_entity from endpoints.decorators import anon_protect from endpoints.v2 import v2_bp, paginate -from data.interfaces import v2 +from data.interfaces.v2 import PreOCIModel as model @v2_bp.route('/_catalog', methods=['GET']) @process_registry_jwt_auth() @@ -15,7 +15,7 @@ def catalog_search(limit, offset, pagination_callback): if entity: username = entity.user.username - visible_repositories = v2.get_visible_repositories(username, limit+1, offset) + visible_repositories = model.get_visible_repositories(username, limit+1, offset) response = jsonify({ 'repositories': ['%s/%s' % (repo.namespace_name, repo.name) for repo in visible_repositories][0:limit], diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index 0fdd5cb51..9fdbe6ed1 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -8,8 +8,7 @@ import features from app import docker_v2_signing_key, app, metric_queue from auth.registry_jwt_auth import process_registry_jwt_auth -from data import model -from data.interfaces import v2 +from data.interfaces.v2 import PreOCIModel as model from digest import digest_tools from endpoints.common import parse_repository_name from endpoints.decorators import anon_protect @@ -24,6 +23,7 @@ from image.docker.schema2 import DOCKER_SCHEMA2_CONTENT_TYPES from util.registry.replication import queue_storage_replication from util.names import VALID_TAG_PATTERN + logger = logging.getLogger(__name__) @@ -37,9 +37,9 @@ MANIFEST_TAGNAME_ROUTE = BASE_MANIFEST_ROUTE.format(VALID_TAG_PATTERN) @require_repo_read @anon_protect def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref): - manifest = v2.get_manifest_by_tag(namespace_name, repo_name, manifest_ref) + manifest = model.get_manifest_by_tag(namespace_name, repo_name, manifest_ref) if manifest is None: - has_tag = v2.has_active_tag(namespace_name, repo_name, manifest_ref) + has_tag = model.has_active_tag(namespace_name, repo_name, manifest_ref) if not has_tag: raise ManifestUnknown() @@ -47,7 +47,7 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref): if manifest is None: raise ManifestUnknown() - repo = v2.get_repository(namespace_name, repo_name) + repo = model.get_repository(namespace_name, repo_name) if repo is not None: track_and_log('pull_repo', repo, analytics_name='pull_repo_100x', analytics_sample=0.01) metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2']) @@ -65,12 +65,12 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref): @require_repo_read @anon_protect def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref): - manifest = v2.get_manifest_by_digest(namespace_name, repo_name, manifest_ref) + manifest = model.get_manifest_by_digest(namespace_name, repo_name, manifest_ref) if manifest is None: # Without a tag name to reference, we can't make an attempt to generate the manifest raise ManifestUnknown() - repo = v2.get_repository(namespace_name, repo_name) + repo = model.get_repository(namespace_name, repo_name) if repo is not None: track_and_log('pull_repo', repo) metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2']) @@ -137,7 +137,7 @@ def _write_manifest(namespace_name, repo_name, manifest): raise NameInvalid() # Ensure that the repository exists. - repo = v2.get_repository(namespace_name, repo_name) + repo = model.get_repository(namespace_name, repo_name) if repo is None: raise NameInvalid() @@ -145,7 +145,7 @@ def _write_manifest(namespace_name, repo_name, manifest): raise ManifestInvalid(detail={'message': 'manifest does not reference any layers'}) # Ensure all the blobs in the manifest exist. - storage_map = v2.lookup_blobs_by_digest(namespace_name, repo_name, manifest.checksums) + storage_map = model.lookup_blobs_by_digest(namespace_name, repo_name, manifest.checksums) for layer in manifest.layers: digest_str = str(layer.digest) if digest_str not in storage_map: @@ -154,13 +154,13 @@ def _write_manifest(namespace_name, repo_name, manifest): # Lookup all the images and their parent images (if any) inside the manifest. # This will let us know which v1 images we need to synthesize and which ones are invalid. all_image_ids = list(manifest.parent_image_ids | manifest.image_ids) - images_map = v2.get_docker_v1_metadata_by_image_id(namespace_name, repo_name, all_image_ids) + images_map = model.get_docker_v1_metadata_by_image_id(namespace_name, repo_name, all_image_ids) # Rewrite any v1 image IDs that do not match the checksum in the database. try: rewritten_images = list(manifest.rewrite_invalid_image_ids(images_map)) for rewritten_image in rewritten_images: - v1_metadata = v2.synthesize_v1_image( + model.synthesize_v1_image( repo, storage_map[rewritten_image.content_checksum], rewritten_image.image_id, @@ -175,8 +175,8 @@ def _write_manifest(namespace_name, repo_name, manifest): # Store the manifest pointing to the tag. leaf_layer_id = rewritten_images[-1].image_id - v2.save_manifest(namespace_name, repo_name, manifest.tag, leaf_layer_id, manifest.digest, - manifest.bytes) + model.save_manifest(namespace_name, repo_name, manifest.tag, leaf_layer_id, manifest.digest, + manifest.bytes) # Queue all blob manifests for replication. # TODO(jschorr): Find a way to optimize this insertion. @@ -213,7 +213,7 @@ def delete_manifest_by_digest(namespace_name, repo_name, manifest_ref): Note: there is no equivalent method for deleting by tag name because it is forbidden by the spec. """ - tags = v2.delete_manifest_by_digest(namespace_name, repo_name, manifest_ref) + tags = model.delete_manifest_by_digest(namespace_name, repo_name, manifest_ref) if not tags: raise ManifestUnknown() @@ -225,9 +225,9 @@ def delete_manifest_by_digest(namespace_name, repo_name, manifest_ref): def _generate_and_store_manifest(namespace_name, repo_name, tag_name): # Find the v1 metadata for this image and its parents. - v1_metadata = v2.get_docker_v1_metadata_by_tag(namespace_name, repo_name, tag_name) - parents_v1_metadata = v2.get_parents_docker_v1_metadata(namespace_name, repo_name, - v1_metadata.image_id) + v1_metadata = model.get_docker_v1_metadata_by_tag(namespace_name, repo_name, tag_name) + parents_v1_metadata = model.get_parents_docker_v1_metadata(namespace_name, repo_name, + v1_metadata.image_id) # If the manifest is being generated under the library namespace, then we make its namespace # empty. @@ -248,6 +248,6 @@ def _generate_and_store_manifest(namespace_name, repo_name, tag_name): manifest = builder.build(docker_v2_signing_key) # Write the manifest to the DB. - v2.create_manifest_and_update_tag(namespace_name, repo_name, tag_name, manifest.digest, - manifest.bytes) + model.create_manifest_and_update_tag(namespace_name, repo_name, tag_name, manifest.digest, + manifest.bytes) return manifest diff --git a/endpoints/v2/tag.py b/endpoints/v2/tag.py index 8fd3d32ab..6f2180b7c 100644 --- a/endpoints/v2/tag.py +++ b/endpoints/v2/tag.py @@ -5,7 +5,7 @@ from endpoints.common import parse_repository_name from endpoints.v2 import v2_bp, require_repo_read, paginate from endpoints.v2.errors import NameUnknown from endpoints.decorators import anon_protect -from data.interfaces import v2 +from data.interfaces.v2 import PreOCIModel as model @v2_bp.route('//tags/list', methods=['GET']) @parse_repository_name() @@ -14,11 +14,11 @@ from data.interfaces import v2 @anon_protect @paginate() def list_all_tags(namespace_name, repo_name, limit, offset, pagination_callback): - repo = v2.get_repository(namespace_name, repo_name) + repo = model.get_repository(namespace_name, repo_name) if repo is None: raise NameUnknown() - tags = v2.repository_tags(namespace_name, repo_name, limit, offset) + tags = model.repository_tags(namespace_name, repo_name, limit, offset) response = jsonify({ 'name': '{0}/{1}'.format(namespace_name, repo_name), 'tags': [tag.name for tag in tags], diff --git a/endpoints/v2/v2auth.py b/endpoints/v2/v2auth.py index 2398bde99..14e0b93fa 100644 --- a/endpoints/v2/v2auth.py +++ b/endpoints/v2/v2auth.py @@ -11,7 +11,7 @@ from auth.permissions import (ModifyRepositoryPermission, ReadRepositoryPermissi CreateRepositoryPermission) from endpoints.v2 import v2_bp from endpoints.decorators import anon_protect -from data.interfaces import v2 +from data.interfaces.v2 import PreOCIModel as model from util.cache import no_cache from util.names import parse_namespace_repository, REPOSITORY_NAME_REGEX from util.security.registry_jwt import generate_bearer_token, build_context_and_subject @@ -96,7 +96,7 @@ def generate_registry_jwt(): if user is not None or token is not None: # Lookup the repository. If it exists, make sure the entity has modify # permission. Otherwise, make sure the entity has create permission. - repo = v2.get_repository(namespace, reponame) + repo = model.get_repository(namespace, reponame) if repo: if ModifyRepositoryPermission(namespace, reponame).can(): final_actions.append('push') @@ -105,7 +105,7 @@ def generate_registry_jwt(): else: if CreateRepositoryPermission(namespace).can() and user is not None: logger.debug('Creating repository: %s/%s', namespace, reponame) - v2.create_repository(namespace, reponame, user) + model.create_repository(namespace, reponame, user) final_actions.append('push') else: logger.debug('No permission to create repository %s/%s', namespace, reponame) @@ -113,7 +113,7 @@ def generate_registry_jwt(): if 'pull' in actions: # Grant pull if the user can read the repo or it is public. if (ReadRepositoryPermission(namespace, reponame).can() or - v2.repository_is_public(namespace, reponame)): + model.repository_is_public(namespace, reponame)): final_actions.append('pull') else: logger.debug('No permission to pull repository %s/%s', namespace, reponame) diff --git a/image/__init__.py b/image/__init__.py index 1ea1f42bb..e69de29bb 100644 --- a/image/__init__.py +++ b/image/__init__.py @@ -1,111 +0,0 @@ -import tarfile - -from collections import namedtuple -from namedlist import namedlist - -from util.registry.gzipwrap import GzipWrap - - -class ManifestJSON(namedtuple('ManifestJSON', ['digest', 'json', 'media_type'])): - """ - ManifestJSON represents a Manifest of any format. - """ - -class RepositoryReference(namedtuple('RepositoryReference', ['id', 'name', 'namespace_name'])): - """ - RepositoryReference represents a reference to a Repository, without its full metadata. - """ - - -class Repository(namedtuple('Repository', ['id', 'name', 'namespace_name', 'description', - 'is_public'])): - """ - Repository represents a collection of tags. - """ - - -class Tag(namedtuple('Tag', ['name', 'repository'])): - """ - Tag represents a user-facing alias for referencing a set of Manifests. - """ - - -class BlobUpload(namedlist('BlobUpload', ['uuid', 'byte_count', 'uncompressed_byte_count', - 'chunk_count', 'sha_state', 'location_name', - 'storage_metadata', 'piece_sha_state', 'piece_hashes', - 'repo_namespace_name', 'repo_name'])): - """ - BlobUpload represents the current state of an Blob being uploaded. - """ - - -class Blob(namedtuple('Blob', ['uuid', 'digest', 'size', 'locations'])): - """ - Blob represents an opaque binary blob saved to the storage system. - """ - - -class TarImageFormatter(object): - """ - Base class for classes which produce a tar containing image and layer data. - """ - - def build_stream(self, namespace, repository, tag, synthetic_image_id, layer_json, - get_image_iterator, get_layer_iterator, get_image_json): - """ - Builds and streams a synthetic .tar.gz that represents the formatted tar created by this class's - implementation. - """ - return GzipWrap(self.stream_generator(namespace, repository, tag, - synthetic_image_id, layer_json, - get_image_iterator, get_layer_iterator, - get_image_json)) - - def stream_generator(self, namespace, repository, tag, synthetic_image_id, - layer_json, get_image_iterator, get_layer_iterator, get_image_json): - raise NotImplementedError - - def tar_file(self, name, contents, mtime=None): - """ - Returns the tar binary representation for a file with the given name and file contents. - """ - length = len(contents) - tar_data = self.tar_file_header(name, length, mtime=mtime) - tar_data += contents - tar_data += self.tar_file_padding(length) - return tar_data - - def tar_file_padding(self, length): - """ - Returns tar file padding for file data of the given length. - """ - if length % 512 != 0: - return '\0' * (512 - (length % 512)) - - return '' - - def tar_file_header(self, name, file_size, mtime=None): - """ - Returns tar file header data for a file with the given name and size. - """ - info = tarfile.TarInfo(name=name) - info.type = tarfile.REGTYPE - info.size = file_size - - if mtime is not None: - info.mtime = mtime - return info.tobuf() - - def tar_folder(self, name, mtime=None): - """ - Returns tar file header data for a folder with the given name. - """ - info = tarfile.TarInfo(name=name) - info.type = tarfile.DIRTYPE - - if mtime is not None: - info.mtime = mtime - - # allow the directory to be readable by non-root users - info.mode = 0755 - return info.tobuf() diff --git a/image/appc/__init__.py b/image/appc/__init__.py index 592825e43..e26f0d3e6 100644 --- a/image/appc/__init__.py +++ b/image/appc/__init__.py @@ -6,7 +6,7 @@ from uuid import uuid4 from app import app from util.registry.streamlayerformat import StreamLayerMerger -from image import TarImageFormatter +from image.common import TarImageFormatter ACNAME_REGEX = re.compile(r'[^a-z-]+') diff --git a/image/common.py b/image/common.py new file mode 100644 index 000000000..28b628abf --- /dev/null +++ b/image/common.py @@ -0,0 +1,68 @@ +import tarfile +from util.registry.gzipwrap import GzipWrap + + +class TarImageFormatter(object): + """ + Base class for classes which produce a tar containing image and layer data. + """ + + def build_stream(self, namespace, repository, tag, synthetic_image_id, layer_json, + get_image_iterator, get_layer_iterator, get_image_json): + """ + Builds and streams a synthetic .tar.gz that represents the formatted tar created by this class's + implementation. + """ + return GzipWrap(self.stream_generator(namespace, repository, tag, + synthetic_image_id, layer_json, + get_image_iterator, get_layer_iterator, + get_image_json)) + + def stream_generator(self, namespace, repository, tag, synthetic_image_id, + layer_json, get_image_iterator, get_layer_iterator, get_image_json): + raise NotImplementedError + + def tar_file(self, name, contents, mtime=None): + """ + Returns the tar binary representation for a file with the given name and file contents. + """ + length = len(contents) + tar_data = self.tar_file_header(name, length, mtime=mtime) + tar_data += contents + tar_data += self.tar_file_padding(length) + return tar_data + + def tar_file_padding(self, length): + """ + Returns tar file padding for file data of the given length. + """ + if length % 512 != 0: + return '\0' * (512 - (length % 512)) + + return '' + + def tar_file_header(self, name, file_size, mtime=None): + """ + Returns tar file header data for a file with the given name and size. + """ + info = tarfile.TarInfo(name=name) + info.type = tarfile.REGTYPE + info.size = file_size + + if mtime is not None: + info.mtime = mtime + return info.tobuf() + + def tar_folder(self, name, mtime=None): + """ + Returns tar file header data for a folder with the given name. + """ + info = tarfile.TarInfo(name=name) + info.type = tarfile.DIRTYPE + + if mtime is not None: + info.mtime = mtime + + # allow the directory to be readable by non-root users + info.mode = 0755 + return info.tobuf() diff --git a/image/docker/squashed.py b/image/docker/squashed.py index d3c886185..bf209eb1e 100644 --- a/image/docker/squashed.py +++ b/image/docker/squashed.py @@ -4,7 +4,7 @@ import math import calendar from app import app -from image import TarImageFormatter +from image.common import TarImageFormatter from util.registry.gzipwrap import GZIP_BUFFER_SIZE from util.registry.streamlayerformat import StreamLayerMerger From 2e5a94bc0b6c40d536e2c09b02f8fafcc0ef034c Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Wed, 31 Aug 2016 14:31:43 -0400 Subject: [PATCH 28/34] create key server data interface --- data/interfaces/key_server.py | 122 ++++++++++++++++++ .../{key_server.py => keyserver/__init__.py} | 31 ++--- test/test_endpoints.py | 6 +- web.py | 2 +- 4 files changed, 140 insertions(+), 21 deletions(-) create mode 100644 data/interfaces/key_server.py rename endpoints/{key_server.py => keyserver/__init__.py} (84%) diff --git a/data/interfaces/key_server.py b/data/interfaces/key_server.py new file mode 100644 index 000000000..81cf43312 --- /dev/null +++ b/data/interfaces/key_server.py @@ -0,0 +1,122 @@ +from collections import namedtuple + +import data.model + + + +class ServiceKey(namedtuple('ServiceKey', ['name', 'kid', 'service', 'jwk', 'metadata', + 'created_date', 'expiration_date', 'rotation_duration', + 'approval'])): + """ + Service Key represents a public key (JWK) being used by an instance of a particular service to + authenticate with other services. + """ + pass + + +class ServiceKeyException(Exception): + pass + + +class ServiceKeyDoesNotExist(ServiceKeyException): + pass + + +# TODO(jzelinskie): make this interface support superuser API +class KeyServerDataInterface(object): + """ + Interface that represents all data store interactions required by a JWT key service. + """ + + @classmethod + def list_service_keys(cls, service): + """ + Returns a list of service keys or an empty list if the service does not exist. + """ + raise NotImplementedError() + + @classmethod + def get_service_key(cls, signer_kid, service=None, alive_only=None, approved_only=None): + """ + Returns a service kid with the given kid or raises ServiceKeyNotFound. + """ + raise NotImplementedError() + + @classmethod + def create_service_key(cls, name, kid, service, jwk, metadata, expiration_date, + rotation_duration=None): + """ + Stores a service key. + """ + raise NotImplementedError() + + @classmethod + def replace_service_key(cls, old_kid, kid, jwk, metadata, expiration_date): + """ + Replaces a service with a new key or raises ServiceKeyNotFound. + """ + raise NotImplementedError() + + @classmethod + def delete_service_key(cls, kid): + """ + Deletes and returns a service key with the given kid or raises ServiceKeyNotFound. + """ + raise NotImplementedError() + + +class PreOCIModel(KeyServerDataInterface): + """ + PreOCIModel implements the data model for JWT key service using a database schema before it was + changed to support the OCI specification. + """ + @classmethod + def _db_key_to_servicekey(cls, key): + """ + Converts the database model of a service key into a ServiceKey. + """ + return ServiceKey( + name=key.name, + kid=key.kid, + service=key.service, + jwk=key.jwk, + metadata=key.metadata, + created_date=key.created_date, + expiration_date=key.expiration_date, + rotation_duration=key.rotation_duration, + approval=key.approval, + ) + + @classmethod + def list_service_keys(cls, service): + return data.model.service_keys.list_service_keys(service) + + @classmethod + def get_service_key(cls, signer_kid, service=None, alive_only=True, approved_only=True): + try: + key = data.model.service_keys.get_service_key(signer_kid, service, alive_only, approved_only) + return cls._db_key_to_servicekey(key) + except data.model.ServiceKeyDoesNotExist: + raise ServiceKeyDoesNotExist() + + @classmethod + def create_service_key(cls, name, kid, service, jwk, metadata, expiration_date, + rotation_duration=None): + key = data.model.service_keys.create_service_key(name, kid, service, jwk, metadata, + expiration_date, rotation_duration) + return cls._db_key_to_servicekey(key) + + @classmethod + def replace_service_key(cls, old_kid, kid, jwk, metadata, expiration_date): + try: + data.model.service_keys.replace_service_key(old_kid, kid, jwk, metadata, expiration_date) + except data.model.ServiceKeyDoesNotExist: + raise ServiceKeyDoesNotExist() + + @classmethod + def delete_service_key(cls, kid): + try: + key = data.model.service_keys.delete_service_key(kid) + return cls._db_key_to_servicekey(key) + except data.model.ServiceKeyDoesNotExist: + raise ServiceKeyDoesNotExist() diff --git a/endpoints/key_server.py b/endpoints/keyserver/__init__.py similarity index 84% rename from endpoints/key_server.py rename to endpoints/keyserver/__init__.py index 70c0da0eb..b5e74b171 100644 --- a/endpoints/key_server.py +++ b/endpoints/keyserver/__init__.py @@ -4,11 +4,9 @@ from datetime import datetime, timedelta from flask import Blueprint, jsonify, abort, request, make_response from jwt import get_unverified_header -import data.model -import data.model.service_keys -from data.model.log import log_action - from app import app +from data.interfaces.key_server import PreOCIModel as model, ServiceKeyDoesNotExist +from data.model.log import log_action from util.security import jwtutil @@ -38,7 +36,7 @@ def _validate_jwt(encoded_jwt, jwk, service): try: jwtutil.decode(encoded_jwt, public_key, algorithms=['RS256'], - audience=JWT_AUDIENCE, issuer=service) + audience=JWT_AUDIENCE, issuer=service) except jwtutil.InvalidTokenError: logger.exception('JWT validation failure') abort(400) @@ -55,23 +53,22 @@ def _signer_kid(encoded_jwt, allow_none=False): def _lookup_service_key(service, signer_kid, approved_only=True): try: - return data.model.service_keys.get_service_key(signer_kid, service=service, - approved_only=approved_only) - except data.model.ServiceKeyDoesNotExist: + return model.get_service_key(signer_kid, service=service, approved_only=approved_only) + except ServiceKeyDoesNotExist: abort(403) @key_server.route('/services//keys', methods=['GET']) def list_service_keys(service): - keys = data.model.service_keys.list_service_keys(service) + keys = model.list_service_keys(service) return jsonify({'keys': [key.jwk for key in keys]}) @key_server.route('/services//keys/', methods=['GET']) def get_service_key(service, kid): try: - key = data.model.service_keys.get_service_key(kid, alive_only=False, approved_only=False) - except data.model.ServiceKeyDoesNotExist: + key = model.get_service_key(kid, alive_only=False, approved_only=False) + except ServiceKeyDoesNotExist: abort(404) if key.approval is None: @@ -119,8 +116,8 @@ def put_service_key(service, kid): if kid == signer_kid or signer_kid is None: # The key is self-signed. Create a new instance and await approval. _validate_jwt(encoded_jwt, jwk, service) - data.model.service_keys.create_service_key('', kid, service, jwk, metadata, expiration_date, - rotation_duration=rotation_duration) + model.create_service_key('', kid, service, jwk, metadata, expiration_date, + rotation_duration=rotation_duration) key_log_metadata = { 'kid': kid, @@ -143,8 +140,8 @@ def put_service_key(service, kid): _validate_jwt(encoded_jwt, signer_jwk, service) try: - data.model.service_keys.replace_service_key(signer_key.kid, kid, jwk, metadata, expiration_date) - except data.model.ServiceKeyDoesNotExist: + model.replace_service_key(signer_key.kid, kid, jwk, metadata, expiration_date) + except ServiceKeyDoesNotExist: abort(404) key_log_metadata = { @@ -180,8 +177,8 @@ def delete_service_key(service, kid): _validate_jwt(encoded_jwt, signer_key.jwk, service) try: - data.model.service_keys.delete_service_key(kid) - except data.model.ServiceKeyDoesNotExist: + model.delete_service_key(kid) + except ServiceKeyDoesNotExist: abort(404) key_log_metadata = { diff --git a/test/test_endpoints.py b/test/test_endpoints.py index d8430375b..a139ee575 100644 --- a/test/test_endpoints.py +++ b/test/test_endpoints.py @@ -18,7 +18,7 @@ from jwkest.jwk import RSAKey from app import app from data import model from data.database import ServiceKeyApprovalType -from endpoints import key_server +from endpoints import keyserver from endpoints.api import api, api_bp from endpoints.api.user import Signin from endpoints.web import web as web_bp @@ -28,7 +28,7 @@ from test.helpers import assert_action_logged try: app.register_blueprint(web_bp, url_prefix='') - app.register_blueprint(key_server.key_server, url_prefix='') + app.register_blueprint(keyserver.key_server, url_prefix='') except ValueError: # This blueprint was already registered pass @@ -355,7 +355,7 @@ class KeyServerTestCase(EndpointTestCase): def _get_test_jwt_payload(self): return { 'iss': 'sample_service', - 'aud': key_server.JWT_AUDIENCE, + 'aud': keyserver.JWT_AUDIENCE, 'exp': int(time.time()) + 60, 'iat': int(time.time()), 'nbf': int(time.time()), diff --git a/web.py b/web.py index b33c76383..237829c0f 100644 --- a/web.py +++ b/web.py @@ -7,7 +7,7 @@ from endpoints.api import api_bp from endpoints.bitbuckettrigger import bitbuckettrigger from endpoints.githubtrigger import githubtrigger from endpoints.gitlabtrigger import gitlabtrigger -from endpoints.key_server import key_server +from endpoints.keyserver import key_server from endpoints.oauthlogin import oauthlogin from endpoints.realtime import realtime from endpoints.web import web From c35413d4f61276edf8a2c6f2f7cde2bd31f36b57 Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Wed, 31 Aug 2016 15:18:01 -0400 Subject: [PATCH 29/34] add boilerplate for verbs data interface --- data/interfaces/verbs.py | 18 ++++++++++++++++++ endpoints/verbs.py | 5 +++-- 2 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 data/interfaces/verbs.py diff --git a/data/interfaces/verbs.py b/data/interfaces/verbs.py new file mode 100644 index 000000000..ba7af25f7 --- /dev/null +++ b/data/interfaces/verbs.py @@ -0,0 +1,18 @@ +class VerbsDataInterface(object): + """ + Interface that represents all data store interactions required by the registry's custom HTTP + verbs. + """ + @classmethod + def repository_is_public(cls, namespace_name, repo_name): + """ + Returns a boolean for whether the repository with the given name and namespace is public. + """ + raise NotImplementedError() + + +class PreOCIModel(VerbsDataInterface): + """ + PreOCIModel implements the data model for the registry's custom HTTP verbs using a database schema + before it was changed to support the OCI specification. + """ diff --git a/endpoints/verbs.py b/endpoints/verbs.py index 933fc9b0a..1f0124a38 100644 --- a/endpoints/verbs.py +++ b/endpoints/verbs.py @@ -31,8 +31,9 @@ logger = logging.getLogger(__name__) def _open_stream(formatter, namespace, repository, tag, synthetic_image_id, image_json, repo_image, handlers): - """ This method generates a stream of data which will be replicated and read from the queue files. - This method runs in a separate process. + """ + This method generates a stream of data which will be replicated and read from the queue files. + This method runs in a separate process. """ # For performance reasons, we load the full image list here, cache it, then disconnect from # the database. From 783c9e7a7396d55890b00126b14dd7a15ab1960f Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Wed, 31 Aug 2016 16:08:01 -0400 Subject: [PATCH 30/34] stop exporting experimental database models --- data/database.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/data/database.py b/data/database.py index 0e724c6b0..9726adf5b 100644 --- a/data/database.py +++ b/data/database.py @@ -1173,5 +1173,12 @@ class BitTorrentPieces(BaseModel): ) -is_model = lambda x: inspect.isclass(x) and issubclass(x, BaseModel) and x is not BaseModel +beta_classes = set([ManifestLayerScan, Tag, BlobPlacementLocation, ManifestLayer, ManifestList, + BitTorrentPieces, MediaType, Label, ManifestBlob, BlobUploading, Blob, + ManifestLayerDockerV1, BlobPlacementLocationPreference, ManifestListManifest, + Manifest, DerivedImage, BlobPlacement]) +is_model = lambda x: (inspect.isclass(x) and + issubclass(x, BaseModel) and + x is not BaseModel and + x not in beta_classes) all_models = [model[1] for model in inspect.getmembers(sys.modules[__name__], is_model)] From 3c8b87e0861f77c59d5010c9574f30614495ae97 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Thu, 1 Sep 2016 19:00:11 -0400 Subject: [PATCH 31/34] Fix verbs in manifestlist All registry_tests now pass --- data/database.py | 10 +- data/interfaces/common.py | 12 - data/interfaces/v1.py | 31 +-- data/interfaces/v2.py | 92 ++++--- data/interfaces/verbs.py | 304 ++++++++++++++++++++++ data/model/image.py | 1 - endpoints/building.py | 10 +- endpoints/v1/registry.py | 5 +- endpoints/v2/manifest.py | 14 +- endpoints/{verbs.py => verbs/__init__.py} | 197 ++++++-------- image/appc/__init__.py | 14 +- image/common.py | 15 +- image/docker/schema1.py | 6 +- image/docker/squashed.py | 17 +- test/registry_tests.py | 2 +- test/test_manifests.py | 16 +- util/secscan/analyzer.py | 10 +- util/secscan/notifier.py | 8 +- 18 files changed, 517 insertions(+), 247 deletions(-) delete mode 100644 data/interfaces/common.py rename endpoints/{verbs.py => verbs/__init__.py} (62%) diff --git a/data/database.py b/data/database.py index 9726adf5b..d94c42b6f 100644 --- a/data/database.py +++ b/data/database.py @@ -957,7 +957,7 @@ class ServiceKey(BaseModel): rotation_duration = IntegerField(null=True) approval = ForeignKeyField(ServiceKeyApproval, null=True) - +''' class MediaType(BaseModel): """ MediaType is an enumeration of the possible formats of various objects in the data model. """ name = CharField(index=True, unique=True) @@ -1122,6 +1122,7 @@ class ManifestLayerScan(BaseModel): class DerivedImage(BaseModel): """ DerivedImage represents a Manifest transcoded into an alternative format. """ + uuid = CharField(default=uuid_generator, unique=True) source_manifest = ForeignKeyField(Manifest) derived_manifest_json = JSONField() media_type = ForeignKeyField(MediaType) @@ -1177,8 +1178,7 @@ beta_classes = set([ManifestLayerScan, Tag, BlobPlacementLocation, ManifestLayer BitTorrentPieces, MediaType, Label, ManifestBlob, BlobUploading, Blob, ManifestLayerDockerV1, BlobPlacementLocationPreference, ManifestListManifest, Manifest, DerivedImage, BlobPlacement]) -is_model = lambda x: (inspect.isclass(x) and - issubclass(x, BaseModel) and - x is not BaseModel and - x not in beta_classes) +''' + +is_model = lambda x: inspect.isclass(x) and issubclass(x, BaseModel) and x is not BaseModel all_models = [model[1] for model in inspect.getmembers(sys.modules[__name__], is_model)] diff --git a/data/interfaces/common.py b/data/interfaces/common.py deleted file mode 100644 index f0812515c..000000000 --- a/data/interfaces/common.py +++ /dev/null @@ -1,12 +0,0 @@ -from image import Repository -from data import model - -def repository_for_repo(repo): - """ Returns a Repository object representing the repo data model instance given. """ - return Repository( - id=repo.id, - name=repo.name, - namespace_name=repo.namespace_user.username, - description=repo.description, - is_public=model.repository.is_repository_public(repo) - ) diff --git a/data/interfaces/v1.py b/data/interfaces/v1.py index 214ffee2c..a9e29dc89 100644 --- a/data/interfaces/v1.py +++ b/data/interfaces/v1.py @@ -1,8 +1,7 @@ -from collections import namedtuple - from app import app, storage as store from data import model from data.model import db_transaction +from collections import namedtuple from util.morecollections import AttrDict @@ -13,19 +12,6 @@ class Repository(namedtuple('Repository', ['id', 'name', 'namespace_name', 'desc """ -def _repository_for_repo(repo): - """ - Returns a Repository object representing the repo data model instance given. - """ - return Repository( - id=repo.id, - name=repo.name, - namespace_name=repo.namespace_user.username, - description=repo.description, - is_public=model.repository.is_repository_public(repo) - ) - - class DockerRegistryV1DataInterface(object): """ Interface that represents all data store interactions required by a Docker Registry v1. @@ -409,12 +395,23 @@ class PreOCIModel(DockerRegistryV1DataInterface): def change_user_password(cls, user, new_password): model.user.change_password(user, new_password) + @classmethod + def _repository_for_repo(cls, repo): + """ Returns a Repository object representing the repo data model instance given. """ + return Repository( + id=repo.id, + name=repo.name, + namespace_name=repo.namespace_user.username, + description=repo.description, + is_public=model.repository.is_repository_public(repo) + ) + @classmethod def get_repository(cls, namespace_name, repo_name): repo = model.repository.get_repository(namespace_name, repo_name) if repo is None: return None - return _repository_for_repo(repo) + return cls._repository_for_repo(repo) @classmethod def create_repository(cls, namespace_name, repo_name, user=None): @@ -432,4 +429,4 @@ class PreOCIModel(DockerRegistryV1DataInterface): def get_sorted_matching_repositories(cls, search_term, only_public, can_read, limit): repos = model.repository.get_sorted_matching_repositories(search_term, only_public, can_read, limit=limit) - return [_repository_for_repo(repo) for repo in repos] + return [cls._repository_for_repo(repo) for repo in repos] diff --git a/data/interfaces/v2.py b/data/interfaces/v2.py index 5ef48798a..891fe08fd 100644 --- a/data/interfaces/v2.py +++ b/data/interfaces/v2.py @@ -7,10 +7,15 @@ from data import model, database from data.model import DataModelException from image.docker.v1 import DockerV1Metadata - _MEDIA_TYPE = "application/vnd.docker.distribution.manifest.v1+prettyjws" +class Repository(namedtuple('Repository', ['id', 'name', 'namespace_name', 'description', + 'is_public'])): + """ + Repository represents a namespaced collection of tags. + """ + class ManifestJSON(namedtuple('ManifestJSON', ['digest', 'json', 'media_type'])): """ ManifestJSON represents a Manifest of any format. @@ -44,47 +49,6 @@ class RepositoryReference(namedtuple('RepositoryReference', ['id', 'name', 'name """ -class Repository(namedtuple('Repository', ['id', 'name', 'namespace_name', 'description', - 'is_public'])): - """ - Repository represents a namespaced collection of tags. - """ - - -def _repository_for_repo(repo): - """ - Returns a Repository object representing the repo data model instance given. - """ - return Repository( - id=repo.id, - name=repo.name, - namespace_name=repo.namespace_user.username, - description=repo.description, - is_public=model.repository.is_repository_public(repo) - ) - - -def _docker_v1_metadata(namespace_name, repo_name, repo_image): - """ - Returns a DockerV1Metadata object for the given image under the repository with the given - namespace and name. Note that the namespace and name are passed here as an optimization, and are - *not checked* against the image. - """ - return DockerV1Metadata( - namespace_name=namespace_name, - repo_name=repo_name, - image_id=repo_image.docker_image_id, - checksum=repo_image.v1_checksum, - content_checksum=repo_image.storage.content_checksum, - compat_json=repo_image.v1_json_metadata, - created=repo_image.created, - comment=repo_image.comment, - command=repo_image.command, - # TODO: make sure this isn't needed anywhere, as it is expensive to lookup - parent_image_id=None, - ) - - class DockerRegistryV2DataInterface(object): """ Interface that represents all data store interactions required by a Docker Registry v1. @@ -303,12 +267,23 @@ class PreOCIModel(DockerRegistryV2DataInterface): def repository_is_public(cls, namespace_name, repo_name): return model.repository.repository_is_public(namespace_name, repo_name) + @classmethod + def _repository_for_repo(cls, repo): + """ Returns a Repository object representing the repo data model instance given. """ + return Repository( + id=repo.id, + name=repo.name, + namespace_name=repo.namespace_user.username, + description=repo.description, + is_public=model.repository.is_repository_public(repo) + ) + @classmethod def get_repository(cls, namespace_name, repo_name): repo = model.repository.get_repository(namespace_name, repo_name) if repo is None: return None - return _repository_for_repo(repo) + return cls._repository_for_repo(repo) @classmethod def has_active_tag(cls, namespace_name, repo_name, tag_name): @@ -349,11 +324,32 @@ class PreOCIModel(DockerRegistryV2DataInterface): tags = model.tag.delete_manifest_by_digest(namespace_name, repo_name, digest) return [_tag_view(tag) for tag in tags] + @classmethod + def _docker_v1_metadata(cls, namespace_name, repo_name, repo_image): + """ + Returns a DockerV1Metadata object for the given image under the repository with the given + namespace and name. Note that the namespace and name are passed here as an optimization, and are + *not checked* against the image. + """ + return DockerV1Metadata( + namespace_name=namespace_name, + repo_name=repo_name, + image_id=repo_image.docker_image_id, + checksum=repo_image.v1_checksum, + content_checksum=repo_image.storage.content_checksum, + compat_json=repo_image.v1_json_metadata, + created=repo_image.created, + comment=repo_image.comment, + command=repo_image.command, + # TODO: make sure this isn't needed anywhere, as it is expensive to lookup + parent_image_id=None, + ) + @classmethod def get_docker_v1_metadata_by_tag(cls, namespace_name, repo_name, tag_name): try: repo_img = model.tag.get_tag_image(namespace_name, repo_name, tag_name, include_storage=True) - return _docker_v1_metadata(namespace_name, repo_name, repo_img) + return cls._docker_v1_metadata(namespace_name, repo_name, repo_img) except DataModelException: return None @@ -364,7 +360,7 @@ class PreOCIModel(DockerRegistryV2DataInterface): return {} images_query = model.image.lookup_repository_images(repo, docker_image_ids) - return {image.docker_image_id: _docker_v1_metadata(namespace_name, repo_name, image) + return {image.docker_image_id: cls._docker_v1_metadata(namespace_name, repo_name, image) for image in images_query} @classmethod @@ -374,7 +370,7 @@ class PreOCIModel(DockerRegistryV2DataInterface): return [] parents = model.image.get_parent_images(namespace_name, repo_name, repo_image) - return [_docker_v1_metadata(namespace_name, repo_name, image) for image in parents] + return [cls._docker_v1_metadata(namespace_name, repo_name, image) for image in parents] @classmethod def create_manifest_and_update_tag(cls, namespace_name, repo_name, tag_name, manifest_digest, @@ -406,7 +402,7 @@ class PreOCIModel(DockerRegistryV2DataInterface): repo_image = model.image.synthesize_v1_image(repo, storage_obj, image_id, created, comment, command, compat_json, parent_image) - return _docker_v1_metadata(repo.namespace_user.username, repo.name, repo_image) + return cls._docker_v1_metadata(repo.namespace_user.username, repo.name, repo_image) @classmethod def save_manifest(cls, namespace_name, repo_name, tag_name, leaf_layer_docker_id, manifest_digest, @@ -434,7 +430,7 @@ class PreOCIModel(DockerRegistryV2DataInterface): def get_visible_repositories(cls, username, limit, offset): query = model.repository.get_visible_repositories(username, include_public=(username is None)) query = query.limit(limit).offset(offset) - return [_repository_for_repo(repo) for repo in query] + return [cls._repository_for_repo(repo) for repo in query] @classmethod def create_blob_upload(cls, namespace_name, repo_name, upload_uuid, location_name, diff --git a/data/interfaces/verbs.py b/data/interfaces/verbs.py index ba7af25f7..826b1729b 100644 --- a/data/interfaces/verbs.py +++ b/data/interfaces/verbs.py @@ -1,3 +1,36 @@ +from collections import namedtuple +from data import model +from image.docker.v1 import DockerV1Metadata + +import json + +class DerivedImage(namedtuple('DerivedImage', ['ref', 'blob', 'internal_source_image_db_id'])): + """ + DerivedImage represents a user-facing alias for an image which was derived from another image. + """ + +class RepositoryReference(namedtuple('RepositoryReference', ['id', 'name', 'namespace_name'])): + """ + RepositoryReference represents a reference to a Repository, without its full metadata. + """ + +class ImageWithBlob(namedtuple('Image', ['image_id', 'blob', 'compat_metadata', 'repository', + 'internal_db_id', 'v1_metadata'])): + """ + ImageWithBlob represents a user-facing alias for referencing an image, along with its blob. + """ + +class Blob(namedtuple('Blob', ['uuid', 'size', 'uncompressed_size', 'uploading', 'locations'])): + """ + Blob represents an opaque binary blob saved to the storage system. + """ + +class TorrentInfo(namedtuple('TorrentInfo', ['piece_length', 'pieces'])): + """ + TorrentInfo represents the torrent piece information associated with a blob. + """ + + class VerbsDataInterface(object): """ Interface that represents all data store interactions required by the registry's custom HTTP @@ -10,9 +43,280 @@ class VerbsDataInterface(object): """ raise NotImplementedError() + @classmethod + def get_manifest_layers_with_blobs(cls, repo_image): + """ + Returns the full set of manifest layers and their associated blobs starting at the given + repository image and working upwards to the root image. + """ + raise NotImplementedError() + + @classmethod + def get_blob_path(cls, blob): + """ + Returns the storage path for the given blob. + """ + raise NotImplementedError() + + @classmethod + def get_derived_image_signature(cls, derived_image, signer_name): + """ + Returns the signature associated with the derived image and a specific signer or None if none. + """ + raise NotImplementedError() + + @classmethod + def set_derived_image_signature(cls, derived_image, signer_name, signature): + """ + Sets the calculated signature for the given derived image and signer to that specified. + """ + raise NotImplementedError() + + @classmethod + def delete_derived_image(cls, derived_image): + """ + Deletes a derived image and all of its storage. + """ + raise NotImplementedError() + + @classmethod + def set_blob_size(cls, blob, size): + """ + Sets the size field on a blob to the value specified. + """ + raise NotImplementedError() + + @classmethod + def get_repo_blob_by_digest(cls, namespace_name, repo_name, digest): + """ + Returns the blob with the given digest under the matching repository or None if none. + """ + raise NotImplementedError() + + @classmethod + def get_torrent_info(cls, blob): + """ + Returns the torrent information associated with the given blob or None if none. + """ + raise NotImplementedError() + + @classmethod + def set_torrent_info(cls, blob, piece_length, pieces): + """ + Sets the torrent infomation associated with the given blob to that specified. + """ + raise NotImplementedError() + + @classmethod + def lookup_derived_image(cls, repo_image, verb, varying_metadata=None): + """ + Looks up the derived image for the given repository image, verb and optional varying metadata + and returns it or None if none. + """ + raise NotImplementedError() + + @classmethod + def lookup_or_create_derived_image(cls, repo_image, verb, location, varying_metadata=None): + """ + Looks up the derived image for the given repository image, verb and optional varying metadata + and returns it. If none exists, a new derived image is created. + """ + raise NotImplementedError() + + @classmethod + def get_tag_image(cls, namespace_name, repo_name, tag_name): + """ + Returns the image associated with the live tag with the given name under the matching repository + or None if none. + """ + raise NotImplementedError() + class PreOCIModel(VerbsDataInterface): """ PreOCIModel implements the data model for the registry's custom HTTP verbs using a database schema before it was changed to support the OCI specification. """ + + @classmethod + def repository_is_public(cls, namespace_name, repo_name): + return model.repository.repository_is_public(namespace_name, repo_name) + + @classmethod + def _docker_v1_metadata(cls, namespace_name, repo_name, repo_image): + """ + Returns a DockerV1Metadata object for the given image under the repository with the given + namespace and name. Note that the namespace and name are passed here as an optimization, and are + *not checked* against the image. Also note that we only fill in the localized data needed by + verbs. + """ + return DockerV1Metadata( + namespace_name=namespace_name, + repo_name=repo_name, + image_id=repo_image.docker_image_id, + checksum=repo_image.v1_checksum, + compat_json=repo_image.v1_json_metadata, + created=repo_image.created, + comment=repo_image.comment, + command=repo_image.command, + + # Note: These are not needed in verbs and are expensive to load, so we just skip them. + content_checksum=None, + parent_image_id=None, + ) + + @classmethod + def get_manifest_layers_with_blobs(cls, repo_image): + repo_image_record = model.image.get_image_by_id(repo_image.repository.namespace_name, + repo_image.repository.name, + repo_image.image_id) + + parents = model.image.get_parent_images_with_placements(repo_image.repository.namespace_name, + repo_image.repository.name, + repo_image_record) + + yield repo_image + + for parent in parents: + metadata = {} + try: + metadata = json.loads(parent.v1_json_metadata) + except ValueError: + pass + + yield ImageWithBlob( + image_id=parent.docker_image_id, + blob=cls._blob(parent.storage), + repository=repo_image.repository, + compat_metadata=metadata, + v1_metadata=cls._docker_v1_metadata(repo_image.repository.namespace_name, + repo_image.repository.name, parent), + internal_db_id=parent.id, + ) + + @classmethod + def get_derived_image_signature(cls, derived_image, signer_name): + storage = model.storage.get_storage_by_uuid(derived_image.blob.uuid) + signature_entry = model.storage.lookup_storage_signature(storage, signer_name) + if signature_entry is None: + return None + + return signature_entry.signature + + @classmethod + def set_derived_image_signature(cls, derived_image, signer_name, signature): + storage = model.storage.get_storage_by_uuid(derived_image.blob.uuid) + signature_entry = model.storage.find_or_create_storage_signature(storage, signer_name) + signature_entry.signature = signature + signature_entry.uploading = False + signature_entry.save() + + @classmethod + def delete_derived_image(cls, derived_image): + model.image.delete_derived_storage_by_uuid(derived_image.blob.uuid) + + @classmethod + def set_blob_size(cls, blob, size): + storage_entry = model.storage.get_storage_by_uuid(blob.uuid) + storage_entry.image_size = size + storage_entry.uploading = False + storage_entry.save() + + @classmethod + def get_blob_path(cls, blob): + blob_record = model.storage.get_storage_by_uuid(blob.uuid) + return model.storage.get_layer_path(blob_record) + + @classmethod + def get_repo_blob_by_digest(cls, namespace_name, repo_name, digest): + try: + blob_record = model.blob.get_repo_blob_by_digest(namespace_name, repo_name, digest) + except model.BlobDoesNotExist: + return None + + return cls._blob(blob_record) + + @classmethod + def get_torrent_info(cls, blob): + blob_record = model.storage.get_storage_by_uuid(blob.uuid) + + try: + torrent_info = model.storage.get_torrent_info(blob_record) + except model.TorrentInfoDoesNotExist: + return None + + return TorrentInfo( + pieces=torrent_info.pieces, + piece_length=torrent_info.piece_length, + ) + + @classmethod + def set_torrent_info(cls, blob, piece_length, pieces): + blob_record = model.storage.get_storage_by_uuid(blob.uuid) + model.storage.save_torrent_info(blob_record, piece_length, pieces) + + @classmethod + def lookup_derived_image(cls, repo_image, verb, varying_metadata=None): + blob_record = model.image.find_derived_storage_for_image(repo_image.internal_db_id, verb, + varying_metadata) + if blob_record is None: + return None + + return cls._derived_image(blob_record, repo_image) + + @classmethod + def _derived_image(cls, blob_record, repo_image): + return DerivedImage( + ref=repo_image.internal_db_id, + blob=cls._blob(blob_record), + internal_source_image_db_id=repo_image.internal_db_id, + ) + + @classmethod + def _blob(cls, blob_record): + if hasattr(blob_record, 'locations'): + locations = blob_record.locations + else: + locations = model.storage.get_storage_locations(blob_record.uuid) + + return Blob( + uuid=blob_record.uuid, + size=blob_record.image_size, + uncompressed_size=blob_record.uncompressed_size, + uploading=blob_record.uploading, + locations=locations, + ) + + @classmethod + def lookup_or_create_derived_image(cls, repo_image, verb, location, varying_metadata=None): + blob_record = model.image.find_or_create_derived_storage(repo_image.internal_db_id, verb, location, + varying_metadata) + return cls._derived_image(blob_record, repo_image) + + @classmethod + def get_tag_image(cls, namespace_name, repo_name, tag_name): + try: + found = model.tag.get_tag_image(namespace_name, repo_name, tag_name, include_storage=True) + except model.DataModelException: + return None + + metadata = {} + try: + metadata = json.loads(found.v1_json_metadata) + except ValueError: + pass + + return ImageWithBlob( + image_id=found.docker_image_id, + blob=cls._blob(found.storage), + repository=RepositoryReference( + namespace_name=namespace_name, + name=repo_name, + id=found.repository_id, + ), + compat_metadata=metadata, + v1_metadata=cls._docker_v1_metadata(namespace_name, repo_name, found), + internal_db_id=found.id, + ) + + diff --git a/data/model/image.py b/data/model/image.py index b636c64fe..2fb46842c 100644 --- a/data/model/image.py +++ b/data/model/image.py @@ -513,7 +513,6 @@ def find_or_create_derived_storage(source_image, transformation_name, preferred_ if existing is not None: return existing - logger.debug('Creating storage dervied from source image: %s', source_image.id) uniqueness_hash = _get_uniqueness_hash(varying_metadata) trans = ImageStorageTransformation.get(name=transformation_name) new_storage = storage.create_v1_storage(preferred_location) diff --git a/endpoints/building.py b/endpoints/building.py index 93961bfc8..977a964a3 100644 --- a/endpoints/building.py +++ b/endpoints/building.py @@ -9,7 +9,7 @@ from data.database import db from auth.auth_context import get_authenticated_user from endpoints.notificationhelper import spawn_notification from util.names import escape_tag - +from util.morecollections import AttrDict logger = logging.getLogger(__name__) @@ -72,7 +72,13 @@ def start_build(repository, prepared_build, pull_robot_name=None): model.log.log_action('build_dockerfile', repository.namespace_user.username, ip=request.remote_addr, metadata=event_log_metadata, repository=repository) - spawn_notification(repository, 'build_queued', event_log_metadata, + # TODO(jzelinskie): remove when more endpoints have been converted to using interfaces + repo = AttrDict({ + 'namespace_name': repository.namespace_user.username, + 'name': repository.name, + }) + + spawn_notification(repo, 'build_queued', event_log_metadata, subpage='build/%s' % build_request.uuid, pathargs=['build', build_request.uuid]) diff --git a/endpoints/v1/registry.py b/endpoints/v1/registry.py index 1f169db0a..2e0aa85bc 100644 --- a/endpoints/v1/registry.py +++ b/endpoints/v1/registry.py @@ -155,6 +155,10 @@ def put_image_layer(namespace, repository, image_id): if model.storage_exists(namespace, repository, image_id): exact_abort(409, 'Image already exists') + v1_metadata = model.docker_v1_metadata(namespace, repository, image_id) + if v1_metadata is None: + abort(404) + logger.debug('Storing layer data') input_stream = request.stream @@ -182,7 +186,6 @@ def put_image_layer(namespace, repository, image_id): sr.add_handler(piece_hasher.update) # Add a handler which computes the checksum. - v1_metadata = model.docker_v1_metadata(namespace, repository, image_id) h, sum_hndlr = checksums.simple_checksum_handler(v1_metadata.compat_json) sr.add_handler(sum_hndlr) diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index 9fdbe6ed1..4acc6abc2 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -104,7 +104,7 @@ def write_manifest_by_tagname(namespace_name, repo_name, manifest_ref): if manifest.tag != manifest_ref: raise TagInvalid() - return _write_manifest(namespace_name, repo_name, manifest) + return _write_manifest_and_log(namespace_name, repo_name, manifest) @v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['PUT']) @@ -113,16 +113,16 @@ def write_manifest_by_tagname(namespace_name, repo_name, manifest_ref): @process_registry_jwt_auth(scopes=['pull', 'push']) @require_repo_write @anon_protect -def write_manifest_by_digest(namespace_name, repo_name, digest): +def write_manifest_by_digest(namespace_name, repo_name, manifest_ref): try: manifest = DockerSchema1Manifest(request.data) except ManifestException as me: raise ManifestInvalid(detail={'message': me.message}) - if manifest.digest != digest: + if manifest.digest != manifest_ref: raise ManifestInvalid(detail={'message': 'manifest digest mismatch'}) - return _write_manifest(namespace_name, repo_name, manifest) + return _write_manifest_and_log(namespace_name, repo_name, manifest) def _write_manifest(namespace_name, repo_name, manifest): @@ -178,6 +178,12 @@ def _write_manifest(namespace_name, repo_name, manifest): model.save_manifest(namespace_name, repo_name, manifest.tag, leaf_layer_id, manifest.digest, manifest.bytes) + return repo, storage_map + + +def _write_manifest_and_log(namespace_name, repo_name, manifest): + repo, storage_map = _write_manifest(namespace_name, repo_name, manifest) + # Queue all blob manifests for replication. # TODO(jschorr): Find a way to optimize this insertion. if features.STORAGE_REPLICATION: diff --git a/endpoints/verbs.py b/endpoints/verbs/__init__.py similarity index 62% rename from endpoints/verbs.py rename to endpoints/verbs/__init__.py index 1f0124a38..30f41a9ae 100644 --- a/endpoints/verbs.py +++ b/endpoints/verbs/__init__.py @@ -1,5 +1,4 @@ import logging -import json import hashlib from flask import redirect, Blueprint, abort, send_file, make_response, request @@ -10,7 +9,8 @@ from app import app, signer, storage, metric_queue from auth.auth import process_auth from auth.auth_context import get_authenticated_user from auth.permissions import ReadRepositoryPermission -from data import model, database +from data import database +from data.interfaces.verbs import PreOCIModel as model from endpoints.common import route_show_if, parse_repository_name from endpoints.decorators import anon_protect from endpoints.trackhelper import track_and_log @@ -29,8 +29,7 @@ verbs = Blueprint('verbs', __name__) logger = logging.getLogger(__name__) -def _open_stream(formatter, namespace, repository, tag, synthetic_image_id, image_json, repo_image, - handlers): +def _open_stream(formatter, namespace, repository, tag, derived_image_id, repo_image, handlers): """ This method generates a stream of data which will be replicated and read from the queue files. This method runs in a separate process. @@ -38,12 +37,7 @@ def _open_stream(formatter, namespace, repository, tag, synthetic_image_id, imag # For performance reasons, we load the full image list here, cache it, then disconnect from # the database. with database.UseThenDisconnect(app.config): - image_list = list(model.image.get_parent_images_with_placements(namespace, repository, - repo_image)) - image_list.insert(0, repo_image) - - def get_image_json(image): - return json.loads(image.v1_json_metadata) + image_list = list(model.get_manifest_layers_with_blobs(repo_image)) def get_next_image(): for current_image in image_list: @@ -52,18 +46,16 @@ def _open_stream(formatter, namespace, repository, tag, synthetic_image_id, imag def get_next_layer(): # Re-Initialize the storage engine because some may not respond well to forking (e.g. S3) store = Storage(app, metric_queue) - for current_image_entry in image_list: - current_image_path = model.storage.get_layer_path(current_image_entry.storage) - current_image_stream = store.stream_read_file(current_image_entry.storage.locations, + for current_image in image_list: + current_image_path = model.get_blob_path(current_image.blob) + current_image_stream = store.stream_read_file(current_image.blob.locations, current_image_path) - current_image_id = current_image_entry.id - logger.debug('Returning image layer %s (%s): %s', current_image_id, - current_image_entry.docker_image_id, current_image_path) + logger.debug('Returning image layer %s: %s', current_image.image_id, current_image_path) yield current_image_stream - stream = formatter.build_stream(namespace, repository, tag, synthetic_image_id, image_json, - get_next_image, get_next_layer, get_image_json) + stream = formatter.build_stream(namespace, repository, tag, repo_image, derived_image_id, + get_next_image, get_next_layer) for handler_fn in handlers: stream = wrap_with_handler(stream, handler_fn) @@ -71,75 +63,58 @@ def _open_stream(formatter, namespace, repository, tag, synthetic_image_id, imag return stream.read -def _sign_synthetic_image(verb, linked_storage_uuid, queue_file): +def _sign_derived_image(verb, derived_image, queue_file): """ Read from the queue file and sign the contents which are generated. This method runs in a separate process. """ signature = None try: signature = signer.detached_sign(queue_file) except: - logger.exception('Exception when signing %s image %s', verb, linked_storage_uuid) + logger.exception('Exception when signing %s deriving image %s', verb, derived_image.ref) return # Setup the database (since this is a new process) and then disconnect immediately # once the operation completes. if not queue_file.raised_exception: with database.UseThenDisconnect(app.config): - try: - derived = model.storage.get_storage_by_uuid(linked_storage_uuid) - except model.storage.InvalidImageException: - return - - signature_entry = model.storage.find_or_create_storage_signature(derived, signer.name) - signature_entry.signature = signature - signature_entry.uploading = False - signature_entry.save() + model.set_derived_image_signature(derived_image, signer.name, signature) -def _write_synthetic_image_to_storage(verb, linked_storage_uuid, linked_locations, queue_file): +def _write_derived_image_to_storage(verb, derived_image, queue_file): """ Read from the generated stream and write it back to the storage engine. This method runs in a separate process. """ def handle_exception(ex): - logger.debug('Exception when building %s image %s: %s', verb, linked_storage_uuid, ex) + logger.debug('Exception when building %s derived image %s: %s', verb, derived_image.ref, ex) with database.UseThenDisconnect(app.config): - model.image.delete_derived_storage_by_uuid(linked_storage_uuid) + model.delete_derived_image(derived_image) queue_file.add_exception_handler(handle_exception) # Re-Initialize the storage engine because some may not respond well to forking (e.g. S3) store = Storage(app, metric_queue) - image_path = store.v1_image_layer_path(linked_storage_uuid) - store.stream_write(linked_locations, image_path, queue_file) + image_path = model.get_blob_path(derived_image.blob) + store.stream_write(derived_image.blob.locations, image_path, queue_file) queue_file.close() - if not queue_file.raised_exception: - # Setup the database (since this is a new process) and then disconnect immediately - # once the operation completes. - with database.UseThenDisconnect(app.config): - done_uploading = model.storage.get_storage_by_uuid(linked_storage_uuid) - done_uploading.uploading = False - done_uploading.save() - -def _torrent_for_storage(storage_ref, is_public): - """ Returns a response containing the torrent file contents for the given storage. May abort +def _torrent_for_blob(blob, is_public): + """ Returns a response containing the torrent file contents for the given blob. May abort with an error if the state is not valid (e.g. non-public, non-user request). """ # Make sure the storage has a size. - if not storage_ref.image_size: + if not blob.size: abort(404) # Lookup the torrent information for the storage. - try: - torrent_info = model.storage.get_torrent_info(storage_ref) - except model.TorrentInfoDoesNotExist: + torrent_info = model.get_torrent_info(blob) + if torrent_info is None: abort(404) # Lookup the webseed path for the storage. - path = model.storage.get_layer_path(storage_ref) - webseed = storage.get_direct_download_url(storage_ref.locations, path, + path = model.get_blob_path(blob) + webseed = storage.get_direct_download_url(blob.locations, path, expires_in=app.config['BITTORRENT_WEBSEED_LIFETIME']) if webseed is None: # We cannot support webseeds for storages that cannot provide direct downloads. @@ -147,17 +122,17 @@ def _torrent_for_storage(storage_ref, is_public): # Build the filename for the torrent. if is_public: - name = public_torrent_filename(storage_ref.uuid) + name = public_torrent_filename(blob.uuid) else: user = get_authenticated_user() if not user: abort(403) - name = per_user_torrent_filename(user.uuid, storage_ref.uuid) + name = per_user_torrent_filename(user.uuid, blob.uuid) # Return the torrent file. - torrent_file = make_torrent(name, webseed, storage_ref.image_size, - torrent_info.piece_length, torrent_info.pieces) + torrent_file = make_torrent(name, webseed, blob.size, torrent_info.piece_length, + torrent_info.pieces) headers = {'Content-Type': 'application/x-bittorrent', 'Content-Disposition': 'attachment; filename={0}.torrent'.format(name)} @@ -173,60 +148,46 @@ def _torrent_repo_verb(repo_image, tag, verb, **kwargs): # Lookup an *existing* derived storage for the verb. If the verb's image storage doesn't exist, # we cannot create it here, so we 406. - derived = model.image.find_derived_storage_for_image(repo_image, verb, - varying_metadata={'tag': tag}) - if not derived: + derived_image = model.lookup_derived_image(repo_image, verb, varying_metadata={'tag': tag}) + if derived_image is None: abort(406) # Return the torrent. - public_repo = model.repository.is_repository_public(repo_image.repository) - torrent = _torrent_for_storage(derived, public_repo) + public_repo = model.repository_is_public(repo_image.repository.namespace_name, + repo_image.repository.name) + torrent = _torrent_for_blob(derived_image.blob, public_repo) # Log the action. track_and_log('repo_verb', repo_image.repository, tag=tag, verb=verb, torrent=True, **kwargs) - return torrent -def _verify_repo_verb(store, namespace, repository, tag, verb, checker=None): +def _verify_repo_verb(_, namespace, repository, tag, verb, checker=None): permission = ReadRepositoryPermission(namespace, repository) - - if not permission.can() and not model.repository.repository_is_public(namespace, repository): + if not permission.can() and not model.repository_is_public(namespace, repository): abort(403) # Lookup the requested tag. - try: - tag_image = model.tag.get_tag_image(namespace, repository, tag) - except model.DataModelException: - abort(404) - - # Lookup the tag's image and storage. - repo_image = model.image.get_repo_image_extended(namespace, repository, tag_image.docker_image_id) - if not repo_image: + tag_image = model.get_tag_image(namespace, repository, tag) + if tag_image is None: abort(404) # If there is a data checker, call it first. - image_json = None - if checker is not None: - image_json = json.loads(repo_image.v1_json_metadata) - - if not checker(image_json): + if not checker(tag_image): logger.debug('Check mismatch on %s/%s:%s, verb %s', namespace, repository, tag, verb) abort(404) - return (repo_image, tag_image, image_json) + return tag_image def _repo_verb_signature(namespace, repository, tag, verb, checker=None, **kwargs): # Verify that the image exists and that we have access to it. - result = _verify_repo_verb(storage, namespace, repository, tag, verb, checker) - (repo_image, _, _) = result + repo_image = _verify_repo_verb(storage, namespace, repository, tag, verb, checker) - # Lookup the derived image storage for the verb. - derived = model.image.find_derived_storage_for_image(repo_image, verb, - varying_metadata={'tag': tag}) - if derived is None or derived.uploading: + # derived_image the derived image storage for the verb. + derived_image = model.lookup_derived_image(repo_image, verb, varying_metadata={'tag': tag}) + if derived_image is None or derived_image.blob.uploading: return make_response('', 202) # Check if we have a valid signer configured. @@ -234,18 +195,17 @@ def _repo_verb_signature(namespace, repository, tag, verb, checker=None, **kwarg abort(404) # Lookup the signature for the verb. - signature_entry = model.storage.lookup_storage_signature(derived, signer.name) - if signature_entry is None: + signature_value = model.get_derived_image_signature(derived_image, signer.name) + if signature_value is None: abort(404) # Return the signature. - return make_response(signature_entry.signature) + return make_response(signature_value) def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker=None, **kwargs): # Verify that the image exists and that we have access to it. - result = _verify_repo_verb(storage, namespace, repository, tag, verb, checker) - (repo_image, tag_image, image_json) = result + repo_image = _verify_repo_verb(storage, namespace, repository, tag, verb, checker) # Check for torrent. If found, we return a torrent for the repo verb image (if the derived # image already exists). @@ -257,36 +217,30 @@ def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker= track_and_log('repo_verb', repo_image.repository, tag=tag, verb=verb, **kwargs) metric_queue.repository_pull.Inc(labelvalues=[namespace, repository, verb]) - # Lookup/create the derived image storage for the verb and repo image. - derived = model.image.find_or_create_derived_storage(repo_image, verb, + # Lookup/create the derived image for the verb and repo image. + derived_image = model.lookup_or_create_derived_image(repo_image, verb, storage.preferred_locations[0], varying_metadata={'tag': tag}) - - if not derived.uploading: - logger.debug('Derived %s image %s exists in storage', verb, derived.uuid) - derived_layer_path = model.storage.get_layer_path(derived) + if not derived_image.blob.uploading: + logger.debug('Derived %s image %s exists in storage', verb, derived_image.ref) + derived_layer_path = model.get_blob_path(derived_image.blob) is_head_request = request.method == 'HEAD' - download_url = storage.get_direct_download_url(derived.locations, derived_layer_path, + download_url = storage.get_direct_download_url(derived_image.blob.locations, derived_layer_path, head=is_head_request) if download_url: - logger.debug('Redirecting to download URL for derived %s image %s', verb, derived.uuid) + logger.debug('Redirecting to download URL for derived %s image %s', verb, derived_image.ref) return redirect(download_url) # Close the database handle here for this process before we send the long download. database.close_db_filter(None) - logger.debug('Sending cached derived %s image %s', verb, derived.uuid) - return send_file(storage.stream_read_file(derived.locations, derived_layer_path)) + logger.debug('Sending cached derived %s image %s', verb, derived_image.ref) + return send_file(storage.stream_read_file(derived_image.blob.locations, derived_layer_path)) + logger.debug('Building and returning derived %s image %s', verb, derived_image.ref) - logger.debug('Building and returning derived %s image %s', verb, derived.uuid) - - # Load the image's JSON layer. - if not image_json: - image_json = json.loads(repo_image.v1_json_metadata) - - # Calculate a synthetic image ID. - synthetic_image_id = hashlib.sha256(tag_image.docker_image_id + ':' + verb).hexdigest() + # Calculate a derived image ID. + derived_image_id = hashlib.sha256(repo_image.image_id + ':' + verb).hexdigest() def _cleanup(): # Close any existing DB connection once the process has exited. @@ -296,16 +250,14 @@ def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker= def _store_metadata_and_cleanup(): with database.UseThenDisconnect(app.config): - model.storage.save_torrent_info(derived, app.config['BITTORRENT_PIECE_SIZE'], - hasher.final_piece_hashes()) - derived.image_size = hasher.hashed_bytes - derived.save() + model.set_torrent_info(derived_image.blob, app.config['BITTORRENT_PIECE_SIZE'], + hasher.final_piece_hashes()) + model.set_blob_size(derived_image.blob, hasher.hashed_bytes) # Create a queue process to generate the data. The queue files will read from the process # and send the results to the client and storage. handlers = [hasher.update] - args = (formatter, namespace, repository, tag, synthetic_image_id, image_json, repo_image, - handlers) + args = (formatter, namespace, repository, tag, derived_image_id, repo_image, handlers) queue_process = QueueProcess(_open_stream, 8 * 1024, 10 * 1024 * 1024, # 8K/10M chunk/max args, finished=_store_metadata_and_cleanup) @@ -322,12 +274,12 @@ def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker= queue_process.run() # Start the storage saving. - storage_args = (verb, derived.uuid, derived.locations, storage_queue_file) - QueueProcess.run_process(_write_synthetic_image_to_storage, storage_args, finished=_cleanup) + storage_args = (verb, derived_image, storage_queue_file) + QueueProcess.run_process(_write_derived_image_to_storage, storage_args, finished=_cleanup) if sign and signer.name: - signing_args = (verb, derived.uuid, signing_queue_file) - QueueProcess.run_process(_sign_synthetic_image, signing_args, finished=_cleanup) + signing_args = (verb, derived_image, signing_queue_file) + QueueProcess.run_process(_sign_derived_image, signing_args, finished=_cleanup) # Close the database handle here for this process before we send the long download. database.close_db_filter(None) @@ -337,7 +289,9 @@ def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker= def os_arch_checker(os, arch): - def checker(image_json): + def checker(repo_image): + image_json = repo_image.compat_metadata + # Verify the architecture and os. operating_system = image_json.get('os', 'linux') if operating_system != os: @@ -391,7 +345,7 @@ def get_squashed_tag(namespace, repository, tag): @parse_repository_name() def get_tag_torrent(namespace_name, repo_name, digest): permission = ReadRepositoryPermission(namespace_name, repo_name) - public_repo = model.repository.repository_is_public(namespace_name, repo_name) + public_repo = model.repository_is_public(namespace_name, repo_name) if not permission.can() and not public_repo: abort(403) @@ -400,10 +354,9 @@ def get_tag_torrent(namespace_name, repo_name, digest): # We can not generate a private torrent cluster without a user uuid (e.g. token auth) abort(403) - try: - blob = model.blob.get_repo_blob_by_digest(namespace_name, repo_name, digest) - except model.BlobDoesNotExist: + blob = model.get_repo_blob_by_digest(namespace_name, repo_name, digest) + if blob is None: abort(404) metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'torrent']) - return _torrent_for_storage(blob, public_repo) + return _torrent_for_blob(blob, public_repo) diff --git a/image/appc/__init__.py b/image/appc/__init__.py index e26f0d3e6..f3a958636 100644 --- a/image/appc/__init__.py +++ b/image/appc/__init__.py @@ -17,10 +17,10 @@ class AppCImageFormatter(TarImageFormatter): Image formatter which produces an tarball according to the AppC specification. """ - def stream_generator(self, namespace, repository, tag, synthetic_image_id, - layer_json, get_image_iterator, get_layer_iterator, get_image_json): + def stream_generator(self, namespace, repository, tag, repo_image, + synthetic_image_id, get_image_iterator, get_layer_iterator): image_mtime = 0 - created = next(get_image_iterator()).created + created = next(get_image_iterator()).v1_metadata.created if created is not None: image_mtime = calendar.timegm(created.utctimetuple()) @@ -29,7 +29,7 @@ class AppCImageFormatter(TarImageFormatter): # rootfs - The root file system # Yield the manifest. - manifest = self._build_manifest(namespace, repository, tag, layer_json, synthetic_image_id) + manifest = self._build_manifest(namespace, repository, tag, repo_image, synthetic_image_id) yield self.tar_file('manifest', manifest, mtime=image_mtime) # Yield the merged layer dtaa. @@ -168,9 +168,9 @@ class AppCImageFormatter(TarImageFormatter): return volumes @staticmethod - def _build_manifest(namespace, repository, tag, docker_layer_data, synthetic_image_id): - """ Builds an ACI manifest from the docker layer data. """ - + def _build_manifest(namespace, repository, tag, repo_image, synthetic_image_id): + """ Builds an ACI manifest of an existing repository image. """ + docker_layer_data = repo_image.compat_metadata config = docker_layer_data.get('config', {}) source_url = "%s://%s/%s/%s:%s" % (app.config['PREFERRED_URL_SCHEME'], diff --git a/image/common.py b/image/common.py index 28b628abf..733c51afc 100644 --- a/image/common.py +++ b/image/common.py @@ -7,19 +7,18 @@ class TarImageFormatter(object): Base class for classes which produce a tar containing image and layer data. """ - def build_stream(self, namespace, repository, tag, synthetic_image_id, layer_json, - get_image_iterator, get_layer_iterator, get_image_json): + def build_stream(self, namespace, repository, tag, repo_image, synthetic_image_id, + get_image_iterator, get_layer_iterator): """ Builds and streams a synthetic .tar.gz that represents the formatted tar created by this class's implementation. """ - return GzipWrap(self.stream_generator(namespace, repository, tag, - synthetic_image_id, layer_json, - get_image_iterator, get_layer_iterator, - get_image_json)) + return GzipWrap(self.stream_generator(namespace, repository, tag, repo_image, + synthetic_image_id, get_image_iterator, + get_layer_iterator)) - def stream_generator(self, namespace, repository, tag, synthetic_image_id, - layer_json, get_image_iterator, get_layer_iterator, get_image_json): + def stream_generator(self, namespace, repository, tag, repo_image, synthetic_image_id, + get_image_iterator, get_layer_iterator): raise NotImplementedError def tar_file(self, name, contents, mtime=None): diff --git a/image/docker/schema1.py b/image/docker/schema1.py index 23c49d61b..6e54ef3f4 100644 --- a/image/docker/schema1.py +++ b/image/docker/schema1.py @@ -88,7 +88,11 @@ class DockerSchema1Manifest(object): self._layers = None self._bytes = manifest_bytes - self._parsed = json.loads(manifest_bytes) + try: + self._parsed = json.loads(manifest_bytes) + except ValueError as ve: + raise MalformedSchema1Manifest('malformed manifest data: %s' % ve) + self._signatures = self._parsed[DOCKER_SCHEMA1_SIGNATURES_KEY] self._tag = self._parsed[DOCKER_SCHEMA1_REPO_TAG_KEY] diff --git a/image/docker/squashed.py b/image/docker/squashed.py index bf209eb1e..b0bc10530 100644 --- a/image/docker/squashed.py +++ b/image/docker/squashed.py @@ -28,10 +28,10 @@ class SquashedDockerImageFormatter(TarImageFormatter): # daemon dies when trying to load the entire tar into memory. SIZE_MULTIPLIER = 1.2 - def stream_generator(self, namespace, repository, tag, synthetic_image_id, - layer_json, get_image_iterator, get_layer_iterator, get_image_json): + def stream_generator(self, namespace, repository, tag, repo_image, synthetic_image_id, + get_image_iterator, get_layer_iterator): image_mtime = 0 - created = next(get_image_iterator()).created + created = next(get_image_iterator()).v1_metadata.created if created is not None: image_mtime = calendar.timegm(created.utctimetuple()) @@ -58,7 +58,7 @@ class SquashedDockerImageFormatter(TarImageFormatter): yield self.tar_folder(synthetic_image_id, mtime=image_mtime) # Yield the JSON layer data. - layer_json = SquashedDockerImageFormatter._build_layer_json(layer_json, synthetic_image_id) + layer_json = SquashedDockerImageFormatter._build_layer_json(repo_image, synthetic_image_id) yield self.tar_file(synthetic_image_id + '/json', json.dumps(layer_json), mtime=image_mtime) # Yield the VERSION file. @@ -70,10 +70,10 @@ class SquashedDockerImageFormatter(TarImageFormatter): # In V1 we have the actual uncompressed size, which is needed for back compat with # older versions of Docker. # In V2, we use the size given in the image JSON. - if image.storage.uncompressed_size: - estimated_file_size += image.storage.uncompressed_size + if image.blob.uncompressed_size: + estimated_file_size += image.blob.uncompressed_size else: - image_json = get_image_json(image) + image_json = image.compat_metadata estimated_file_size += image_json.get('Size', 0) * SquashedDockerImageFormatter.SIZE_MULTIPLIER # Make sure the estimated file size is an integer number of bytes. @@ -112,7 +112,8 @@ class SquashedDockerImageFormatter(TarImageFormatter): @staticmethod - def _build_layer_json(layer_json, synthetic_image_id): + def _build_layer_json(repo_image, synthetic_image_id): + layer_json = repo_image.compat_metadata updated_json = copy.deepcopy(layer_json) updated_json['id'] = synthetic_image_id diff --git a/test/registry_tests.py b/test/registry_tests.py index d59a1637c..a5c42fb56 100644 --- a/test/registry_tests.py +++ b/test/registry_tests.py @@ -1862,7 +1862,7 @@ class SquashingTests(RegistryTestCaseMixin, V1RegistryPushMixin, LiveServerTestC self.do_push('devtable', 'newrepo', 'devtable', 'password', images=initial_images) initial_image_id = '91081df45b58dc62dd207441785eef2b895f0383fbe601c99a3cf643c79957dc' - # Try to pull the torrent of the squashed image. This should fail with a 404 since the + # Try to pull the torrent of the squashed image. This should fail with a 406 since the # squashed image doesn't yet exist. self.conduct('GET', '/c1/squash/devtable/newrepo/latest', auth=('devtable', 'password'), headers=dict(accept='application/x-bittorrent'), diff --git a/test/test_manifests.py b/test/test_manifests.py index 03f2ff539..262aa810a 100644 --- a/test/test_manifests.py +++ b/test/test_manifests.py @@ -1,11 +1,11 @@ import unittest -import time import hashlib from app import app, storage, docker_v2_signing_key from initdb import setup_database_for_testing, finished_database_for_testing from data import model, database -from endpoints.v2.manifest import _write_manifest_itself, SignedManifestBuilder +from endpoints.v2.manifest import _write_manifest +from image.docker.schema1 import DockerSchema1ManifestBuilder ADMIN_ACCESS_USER = 'devtable' @@ -69,11 +69,11 @@ class TestManifests(unittest.TestCase): model.blob.store_blob_record_and_temp_link(ADMIN_ACCESS_USER, REPO, first_blob_sha, location, 0, 0, 0) # Push the first manifest. - first_manifest = (SignedManifestBuilder(ADMIN_ACCESS_USER, REPO, FIRST_TAG) + first_manifest = (DockerSchema1ManifestBuilder(ADMIN_ACCESS_USER, REPO, FIRST_TAG) .add_layer(first_blob_sha, '{"id": "first"}') .build(docker_v2_signing_key)) - _write_manifest_itself(ADMIN_ACCESS_USER, REPO, first_manifest) + _write_manifest(ADMIN_ACCESS_USER, REPO, first_manifest) # Delete all temp tags and perform GC. self._perform_cleanup() @@ -91,12 +91,12 @@ class TestManifests(unittest.TestCase): model.blob.store_blob_record_and_temp_link(ADMIN_ACCESS_USER, REPO, third_blob_sha, location, 0, 0, 0) # Push the second manifest. - second_manifest = (SignedManifestBuilder(ADMIN_ACCESS_USER, REPO, SECOND_TAG) + second_manifest = (DockerSchema1ManifestBuilder(ADMIN_ACCESS_USER, REPO, SECOND_TAG) .add_layer(third_blob_sha, '{"id": "second", "parent": "first"}') .add_layer(second_blob_sha, '{"id": "first"}') .build(docker_v2_signing_key)) - _write_manifest_itself(ADMIN_ACCESS_USER, REPO, second_manifest) + _write_manifest(ADMIN_ACCESS_USER, REPO, second_manifest) # Delete all temp tags and perform GC. self._perform_cleanup() @@ -120,12 +120,12 @@ class TestManifests(unittest.TestCase): model.blob.store_blob_record_and_temp_link(ADMIN_ACCESS_USER, REPO, fourth_blob_sha, location, 0, 0, 0) # Push the third manifest. - third_manifest = (SignedManifestBuilder(ADMIN_ACCESS_USER, REPO, THIRD_TAG) + third_manifest = (DockerSchema1ManifestBuilder(ADMIN_ACCESS_USER, REPO, THIRD_TAG) .add_layer(third_blob_sha, '{"id": "second", "parent": "first"}') .add_layer(fourth_blob_sha, '{"id": "first"}') # Note the change in BLOB from the second manifest. .build(docker_v2_signing_key)) - _write_manifest_itself(ADMIN_ACCESS_USER, REPO, third_manifest) + _write_manifest(ADMIN_ACCESS_USER, REPO, third_manifest) # Delete all temp tags and perform GC. self._perform_cleanup() diff --git a/util/secscan/analyzer.py b/util/secscan/analyzer.py index d178bbff9..3b2fa39fa 100644 --- a/util/secscan/analyzer.py +++ b/util/secscan/analyzer.py @@ -10,6 +10,7 @@ from data.database import Image, ExternalNotificationEvent from data.model.tag import filter_tags_have_repository_event, get_tags_for_image from data.model.image import set_secscan_status, get_image_with_storage_and_parent_base from util.secscan.api import APIRequestFailure +from util.morecollections import AttrDict logger = logging.getLogger(__name__) @@ -132,6 +133,13 @@ class LayerAnalyzer(object): }, } - spawn_notification(tags[0].repository, 'vulnerability_found', event_data) + # TODO(jzelinskie): remove when more endpoints have been converted to using + # interfaces + repository = AttrDict({ + 'namespace_name': tags[0].repository.namespace_user.username, + 'name': tags[0].repository.name, + }) + + spawn_notification(repository, 'vulnerability_found', event_data) return True, set_status diff --git a/util/secscan/notifier.py b/util/secscan/notifier.py index e3e3ce9c4..908e5668a 100644 --- a/util/secscan/notifier.py +++ b/util/secscan/notifier.py @@ -10,6 +10,7 @@ from data.database import (Image, ImageStorage, ExternalNotificationEvent, Repos from endpoints.notificationhelper import spawn_notification from util.secscan import PRIORITY_LEVELS from util.secscan.api import APIRequestFailure +from util.morecollections import AttrDict logger = logging.getLogger(__name__) @@ -101,7 +102,12 @@ def process_notification_data(notification_data): }, } - spawn_notification(repository_map[repository_id], 'vulnerability_found', event_data) + # TODO(jzelinskie): remove when more endpoints have been converted to using interfaces + repository = AttrDict({ + 'namespace_name': repository_map[repository_id].namespace_user.username, + 'name': repository_map[repository_id].name, + }) + spawn_notification(repository, 'vulnerability_found', event_data) return True From ca883e56622b478ecbaab9c39a00314992774bad Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Tue, 20 Sep 2016 22:09:25 -0400 Subject: [PATCH 32/34] port label support to refactored v2 registry --- data/database.py | 17 ++++++++++++++++- data/interfaces/v2.py | 34 +++++++++++++++++++++++++++++++--- data/model/label.py | 3 ++- endpoints/v2/manifest.py | 18 ++++++++++++++---- image/docker/schema1.py | 5 +++-- 5 files changed, 66 insertions(+), 11 deletions(-) diff --git a/data/database.py b/data/database.py index d94c42b6f..f3c5912b8 100644 --- a/data/database.py +++ b/data/database.py @@ -957,7 +957,7 @@ class ServiceKey(BaseModel): rotation_duration = IntegerField(null=True) approval = ForeignKeyField(ServiceKeyApproval, null=True) -''' + class MediaType(BaseModel): """ MediaType is an enumeration of the possible formats of various objects in the data model. """ name = CharField(index=True, unique=True) @@ -992,6 +992,21 @@ class TagManifestLabel(BaseModel): (('annotated', 'label'), True), ) + +''' + +class ManifestLabel(BaseModel): + repository = ForeignKeyField(Repository, index=True) + annotated = ForeignKeyField(Manifest, index=True) + label = ForeignKeyField(Label) + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('repository', 'annotated', 'label'), True), + ) + class Blob(BaseModel): """ Blob represents a content-addressable object stored outside of the database. """ digest = CharField(index=True, unique=True) diff --git a/data/interfaces/v2.py b/data/interfaces/v2.py index 891fe08fd..dce6baa28 100644 --- a/data/interfaces/v2.py +++ b/data/interfaces/v2.py @@ -4,7 +4,7 @@ from namedlist import namedlist from peewee import IntegrityError from data import model, database -from data.model import DataModelException +from data.model import DataModelException, TagAlreadyCreatedException from image.docker.v1 import DockerV1Metadata _MEDIA_TYPE = "application/vnd.docker.distribution.manifest.v1+prettyjws" @@ -48,6 +48,11 @@ class RepositoryReference(namedtuple('RepositoryReference', ['id', 'name', 'name RepositoryReference represents a reference to a Repository, without its full metadata. """ +class Label(namedtuple('Label', ['key', 'value', 'source_type', 'media_type'])): + """ + Label represents a key-value pair that describes a particular Manifest. + """ + class DockerRegistryV2DataInterface(object): """ @@ -158,6 +163,8 @@ class DockerRegistryV2DataInterface(object): """ Saves a manifest pointing to the given leaf image, with the given manifest, under the matching repository as a tag with the given name. + + Returns a boolean whether or not the tag was newly created or not. """ raise NotImplementedError() @@ -246,6 +253,14 @@ class DockerRegistryV2DataInterface(object): """ raise NotImplementedError() + @classmethod + def create_manifest_labels(cls, namespace_name, repo_name, manifest_digest, labels): + """ + Creates a new labels for the provided manifest. + """ + raise NotImplementedError() + + @classmethod def get_blob_path(cls, blob): """ @@ -407,8 +422,10 @@ class PreOCIModel(DockerRegistryV2DataInterface): @classmethod def save_manifest(cls, namespace_name, repo_name, tag_name, leaf_layer_docker_id, manifest_digest, manifest_bytes): - model.tag.store_tag_manifest(namespace_name, repo_name, tag_name, leaf_layer_docker_id, - manifest_digest, manifest_bytes) + (_, newly_created) = model.tag.store_tag_manifest(namespace_name, repo_name, tag_name, + leaf_layer_docker_id, manifest_digest, + manifest_bytes) + return newly_created @classmethod def repository_tags(cls, namespace_name, repo_name, limit, offset): @@ -540,6 +557,17 @@ class PreOCIModel(DockerRegistryV2DataInterface): blob_record = model.storage.get_storage_by_uuid(blob.uuid) model.storage.save_torrent_info(blob_record, piece_size, piece_bytes) + @classmethod + def create_manifest_labels(cls, namespace_name, repo_name, manifest_digest, labels): + if not labels: + # No point in doing anything more. + return + + tag_manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, manifest_digest) + for label in labels: + model.label.create_manifest_label(tag_manifest, label.key, label.value, label.source_type, + label.media_type) + @classmethod def get_blob_path(cls, blob): blob_record = model.storage.get_storage_by_uuid(blob.uuid) diff --git a/data/model/label.py b/data/model/label.py index ad5eadc7d..bd783e168 100644 --- a/data/model/label.py +++ b/data/model/label.py @@ -1,11 +1,12 @@ import logging +from cachetools import lru_cache + from data.database import Label, TagManifestLabel, MediaType, LabelSourceType, db_transaction from data.model import InvalidLabelKeyException, InvalidMediaTypeException, DataModelException from data.model._basequery import prefix_search from util.validation import validate_label_key from util.validation import is_json -from cachetools import lru_cache logger = logging.getLogger(__name__) diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index 4acc6abc2..2cc716103 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -8,7 +8,7 @@ import features from app import docker_v2_signing_key, app, metric_queue from auth.registry_jwt_auth import process_registry_jwt_auth -from data.interfaces.v2 import PreOCIModel as model +from data.interfaces.v2 import PreOCIModel as model, Label from digest import digest_tools from endpoints.common import parse_repository_name from endpoints.decorators import anon_protect @@ -20,8 +20,9 @@ from endpoints.notificationhelper import spawn_notification from image.docker import ManifestException from image.docker.schema1 import DockerSchema1Manifest, DockerSchema1ManifestBuilder from image.docker.schema2 import DOCKER_SCHEMA2_CONTENT_TYPES -from util.registry.replication import queue_storage_replication from util.names import VALID_TAG_PATTERN +from util.registry.replication import queue_storage_replication +from util.validation import is_json logger = logging.getLogger(__name__) @@ -175,8 +176,14 @@ def _write_manifest(namespace_name, repo_name, manifest): # Store the manifest pointing to the tag. leaf_layer_id = rewritten_images[-1].image_id - model.save_manifest(namespace_name, repo_name, manifest.tag, leaf_layer_id, manifest.digest, - manifest.bytes) + newly_created = model.save_manifest(namespace_name, repo_name, manifest.tag, leaf_layer_id, + manifest.digest, manifest.bytes) + if newly_created: + labels = [] + for key, value in manifest.layers[-1].v1_metadata.labels.iteritems(): + media_type = 'application/json' if is_json(value) else 'text/plain' + labels.append(Label(key=key, value=value, source_type='manifest', media_type=media_type)) + model.create_manifest_labels(namespace_name, repo_name, manifest.digest, labels) return repo, storage_map @@ -257,3 +264,6 @@ def _generate_and_store_manifest(namespace_name, repo_name, tag_name): model.create_manifest_and_update_tag(namespace_name, repo_name, tag_name, manifest.digest, manifest.bytes) return manifest + +def _determine_media_type(value): + media_type_name = 'application/json' if is_json(value) else 'text/plain' diff --git a/image/docker/schema1.py b/image/docker/schema1.py index 6e54ef3f4..72b1aa8d2 100644 --- a/image/docker/schema1.py +++ b/image/docker/schema1.py @@ -76,7 +76,7 @@ class Schema1Layer(namedtuple('Schema1Layer', ['digest', 'v1_metadata', 'raw_v1_ class Schema1V1Metadata(namedtuple('Schema1V1Metadata', ['image_id', 'parent_image_id', 'created', - 'comment', 'command'])): + 'comment', 'command', 'labels'])): """ Represents the necessary data extracted from the v1 compatibility string in a given layer of a Manifest. @@ -207,9 +207,10 @@ class DockerSchema1Manifest(object): if not 'id' in v1_metadata: raise MalformedSchema1Manifest('id field missing from v1Compatibility JSON') + labels = v1_metadata.get('config', {}).get('Labels', {}) or {} extracted = Schema1V1Metadata(v1_metadata['id'], v1_metadata.get('parent'), v1_metadata.get('created'), v1_metadata.get('comment'), - command) + command, labels) yield Schema1Layer(image_digest, extracted, metadata_string) @property From a1a930b83308d942bd1b62df0379ff47b64bd4a0 Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Fri, 23 Sep 2016 14:28:59 -0400 Subject: [PATCH 33/34] database: fix indices post-rebase --- data/database.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/data/database.py b/data/database.py index f3c5912b8..d93bba30f 100644 --- a/data/database.py +++ b/data/database.py @@ -1007,6 +1007,7 @@ class ManifestLabel(BaseModel): (('repository', 'annotated', 'label'), True), ) + class Blob(BaseModel): """ Blob represents a content-addressable object stored outside of the database. """ digest = CharField(index=True, unique=True) @@ -1169,6 +1170,7 @@ class Tag(BaseModel): read_slaves = (read_slave,) indexes = ( (('repository', 'name'), False), + (('repository', 'name', 'hidden') False), # This unique index prevents deadlocks when concurrently moving and deleting tags (('repository', 'name', 'lifetime_end'), True), @@ -1185,7 +1187,7 @@ class BitTorrentPieces(BaseModel): database = db read_slaves = (read_slave,) indexes = ( - (('annotated', 'label'), True), + (('blob', 'piece_length'), True), ) From 44eca10c0506133f62a73d089d2f307cbdc89822 Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Fri, 23 Sep 2016 17:50:09 -0400 Subject: [PATCH 34/34] update interfaces to use ABC --- data/interfaces/key_server.py | 96 +++++----- data/interfaces/v1.py | 294 ++++++++++++++--------------- data/interfaces/v2.py | 321 +++++++++++++++----------------- data/interfaces/verbs.py | 244 ++++++++++++------------ endpoints/keyserver/__init__.py | 2 +- endpoints/v1/index.py | 2 +- endpoints/v1/registry.py | 2 +- endpoints/v1/tag.py | 2 +- endpoints/v2/blob.py | 2 +- endpoints/v2/catalog.py | 2 +- endpoints/v2/manifest.py | 2 +- endpoints/v2/tag.py | 2 +- endpoints/v2/v2auth.py | 2 +- endpoints/verbs/__init__.py | 2 +- 14 files changed, 467 insertions(+), 508 deletions(-) diff --git a/data/interfaces/key_server.py b/data/interfaces/key_server.py index 81cf43312..b9b6d324b 100644 --- a/data/interfaces/key_server.py +++ b/data/interfaces/key_server.py @@ -1,7 +1,9 @@ +from abc import ABCMeta, abstractmethod from collections import namedtuple -import data.model +from six import add_metaclass +import data.model class ServiceKey(namedtuple('ServiceKey', ['name', 'kid', 'service', 'jwk', 'metadata', @@ -22,47 +24,48 @@ class ServiceKeyDoesNotExist(ServiceKeyException): pass -# TODO(jzelinskie): make this interface support superuser API +# TODO(jzelinskie): maybe make this interface support superuser API +@add_metaclass(ABCMeta) class KeyServerDataInterface(object): """ Interface that represents all data store interactions required by a JWT key service. """ - @classmethod - def list_service_keys(cls, service): + @abstractmethod + def list_service_keys(self, service): """ Returns a list of service keys or an empty list if the service does not exist. """ - raise NotImplementedError() + pass - @classmethod - def get_service_key(cls, signer_kid, service=None, alive_only=None, approved_only=None): + @abstractmethod + def get_service_key(self, signer_kid, service=None, alive_only=None, approved_only=None): """ Returns a service kid with the given kid or raises ServiceKeyNotFound. """ - raise NotImplementedError() + pass - @classmethod - def create_service_key(cls, name, kid, service, jwk, metadata, expiration_date, + @abstractmethod + def create_service_key(self, name, kid, service, jwk, metadata, expiration_date, rotation_duration=None): """ Stores a service key. """ - raise NotImplementedError() + pass - @classmethod - def replace_service_key(cls, old_kid, kid, jwk, metadata, expiration_date): + @abstractmethod + def replace_service_key(self, old_kid, kid, jwk, metadata, expiration_date): """ Replaces a service with a new key or raises ServiceKeyNotFound. """ - raise NotImplementedError() + pass - @classmethod - def delete_service_key(cls, kid): + @abstractmethod + def delete_service_key(self, kid): """ Deletes and returns a service key with the given kid or raises ServiceKeyNotFound. """ - raise NotImplementedError() + pass class PreOCIModel(KeyServerDataInterface): @@ -70,53 +73,52 @@ class PreOCIModel(KeyServerDataInterface): PreOCIModel implements the data model for JWT key service using a database schema before it was changed to support the OCI specification. """ - @classmethod - def _db_key_to_servicekey(cls, key): - """ - Converts the database model of a service key into a ServiceKey. - """ - return ServiceKey( - name=key.name, - kid=key.kid, - service=key.service, - jwk=key.jwk, - metadata=key.metadata, - created_date=key.created_date, - expiration_date=key.expiration_date, - rotation_duration=key.rotation_duration, - approval=key.approval, - ) - - @classmethod - def list_service_keys(cls, service): + def list_service_keys(self, service): return data.model.service_keys.list_service_keys(service) - @classmethod - def get_service_key(cls, signer_kid, service=None, alive_only=True, approved_only=True): + def get_service_key(self, signer_kid, service=None, alive_only=True, approved_only=True): try: key = data.model.service_keys.get_service_key(signer_kid, service, alive_only, approved_only) - return cls._db_key_to_servicekey(key) + return _db_key_to_servicekey(key) except data.model.ServiceKeyDoesNotExist: raise ServiceKeyDoesNotExist() - @classmethod - def create_service_key(cls, name, kid, service, jwk, metadata, expiration_date, + def create_service_key(self, name, kid, service, jwk, metadata, expiration_date, rotation_duration=None): key = data.model.service_keys.create_service_key(name, kid, service, jwk, metadata, expiration_date, rotation_duration) - return cls._db_key_to_servicekey(key) + return _db_key_to_servicekey(key) - @classmethod - def replace_service_key(cls, old_kid, kid, jwk, metadata, expiration_date): + def replace_service_key(self, old_kid, kid, jwk, metadata, expiration_date): try: data.model.service_keys.replace_service_key(old_kid, kid, jwk, metadata, expiration_date) except data.model.ServiceKeyDoesNotExist: raise ServiceKeyDoesNotExist() - @classmethod - def delete_service_key(cls, kid): + def delete_service_key(self, kid): try: key = data.model.service_keys.delete_service_key(kid) - return cls._db_key_to_servicekey(key) + return _db_key_to_servicekey(key) except data.model.ServiceKeyDoesNotExist: raise ServiceKeyDoesNotExist() + + +pre_oci_model = PreOCIModel() + + +def _db_key_to_servicekey(key): + """ + Converts the Pre-OCI database model of a service key into a ServiceKey. + """ + return ServiceKey( + name=key.name, + kid=key.kid, + service=key.service, + jwk=key.jwk, + metadata=key.metadata, + created_date=key.created_date, + expiration_date=key.expiration_date, + rotation_duration=key.rotation_duration, + approval=key.approval, + ) + diff --git a/data/interfaces/v1.py b/data/interfaces/v1.py index a9e29dc89..8aeb2a008 100644 --- a/data/interfaces/v1.py +++ b/data/interfaces/v1.py @@ -1,7 +1,11 @@ +from abc import ABCMeta, abstractmethod +from collections import namedtuple + +from six import add_metaclass + from app import app, storage as store from data import model from data.model import db_transaction -from collections import namedtuple from util.morecollections import AttrDict @@ -12,211 +16,216 @@ class Repository(namedtuple('Repository', ['id', 'name', 'namespace_name', 'desc """ +@add_metaclass(ABCMeta) class DockerRegistryV1DataInterface(object): """ Interface that represents all data store interactions required by a Docker Registry v1. """ - @classmethod - def placement_locations_docker_v1(cls, namespace_name, repo_name, image_id): + @abstractmethod + def placement_locations_docker_v1(self, namespace_name, repo_name, image_id): """ Returns all the placements for the image with the given V1 Docker ID, found under the given repository or None if no image was found. """ - raise NotImplementedError() + pass - @classmethod - def placement_locations_and_path_docker_v1(cls, namespace_name, repo_name, image_id): + @abstractmethod + def placement_locations_and_path_docker_v1(self, namespace_name, repo_name, image_id): """ Returns all the placements for the image with the given V1 Docker ID, found under the given repository or None if no image was found. """ - raise NotImplementedError() + pass - @classmethod - def docker_v1_metadata(cls, namespace_name, repo_name, image_id): + @abstractmethod + def docker_v1_metadata(self, namespace_name, repo_name, image_id): """ Returns various pieces of metadata associated with an image with the given V1 Docker ID, including the checksum and its V1 JSON metadata. """ - raise NotImplementedError() + pass - @classmethod - def update_docker_v1_metadata(cls, namespace_name, repo_name, image_id, created_date_str, comment, - command, compat_json, parent_image_id=None): + @abstractmethod + def update_docker_v1_metadata(self, namespace_name, repo_name, image_id, created_date_str, + comment, command, compat_json, parent_image_id=None): """ Updates various pieces of V1 metadata associated with a particular image. """ - raise NotImplementedError() + pass - @classmethod - def storage_exists(cls, namespace_name, repo_name, image_id): + @abstractmethod + def storage_exists(self, namespace_name, repo_name, image_id): """ Returns whether storage already exists for the image with the V1 Docker ID under the given repository. """ - raise NotImplementedError() + pass - @classmethod - def store_docker_v1_checksums(cls, namespace_name, repo_name, image_id, checksum, content_checksum): + @abstractmethod + def store_docker_v1_checksums(self, namespace_name, repo_name, image_id, checksum, + content_checksum): """ Stores the various V1 checksums for the image with the V1 Docker ID. """ - raise NotImplementedError() + pass - @classmethod - def is_image_uploading(cls, namespace_name, repo_name, image_id): + @abstractmethod + def is_image_uploading(self, namespace_name, repo_name, image_id): """ Returns whether the image with the V1 Docker ID is currently marked as uploading. """ - raise NotImplementedError() + pass - @classmethod - def update_image_uploading(cls, namespace_name, repo_name, image_id, is_uploading): - """ Marks the image with the V1 Docker ID with the given uploading status. """ - raise NotImplementedError() + @abstractmethod + def update_image_uploading(self, namespace_name, repo_name, image_id, is_uploading): + """ + Marks the image with the V1 Docker ID with the given uploading status. + """ + pass - @classmethod - def update_image_sizes(cls, namespace_name, repo_name, image_id, size, uncompressed_size): + @abstractmethod + def update_image_sizes(self, namespace_name, repo_name, image_id, size, uncompressed_size): """ Updates the sizing information for the image with the given V1 Docker ID. """ - raise NotImplementedError() + pass - @classmethod - def get_image_size(cls, namespace_name, repo_name, image_id): + @abstractmethod + def get_image_size(self, namespace_name, repo_name, image_id): """ Returns the wire size of the image with the given Docker V1 ID. """ - raise NotImplementedError() + pass - @classmethod - def create_bittorrent_pieces(cls, namespace_name, repo_name, image_id, pieces_bytes): + @abstractmethod + def create_bittorrent_pieces(self, namespace_name, repo_name, image_id, pieces_bytes): """ Saves the BitTorrent piece hashes for the image with the given Docker V1 ID. """ - raise NotImplementedError() + pass - @classmethod - def image_ancestry(cls, namespace_name, repo_name, image_id): + @abstractmethod + def image_ancestry(self, namespace_name, repo_name, image_id): """ Returns a list containing the full ancestry of Docker V1 IDs, in order, for the image with the given Docker V1 ID. """ - raise NotImplementedError() + pass - @classmethod - def repository_exists(cls, namespace_name, repo_name): + @abstractmethod + def repository_exists(self, namespace_name, repo_name): """ Returns whether the repository with the given name and namespace exists. """ - raise NotImplementedError() + pass - @classmethod - def create_or_link_image(cls, username, namespace_name, repo_name, image_id, storage_location): + @abstractmethod + def create_or_link_image(self, username, namespace_name, repo_name, image_id, storage_location): """ Adds the given image to the given repository, by either linking to an existing image visible to the user with the given username, or creating a new one if no existing image matches. """ - raise NotImplementedError() + pass - @classmethod - def create_temp_hidden_tag(cls, namespace_name, repo_name, image_id, expiration): + @abstractmethod + def create_temp_hidden_tag(self, namespace_name, repo_name, image_id, expiration): """ Creates a hidden tag under the matching namespace pointing to the image with the given V1 Docker ID. """ - raise NotImplementedError() + pass - @classmethod - def list_tags(cls, namespace_name, repo_name): + @abstractmethod + def list_tags(self, namespace_name, repo_name): """ Returns all the tags defined in the repository with the given namespace and name. """ - raise NotImplementedError() + pass - @classmethod - def create_or_update_tag(cls, namespace_name, repo_name, image_id, tag_name): + @abstractmethod + def create_or_update_tag(self, namespace_name, repo_name, image_id, tag_name): """ Creates or updates a tag under the matching repository to point to the image with the given Docker V1 ID. """ - raise NotImplementedError() + pass - @classmethod - def find_image_id_by_tag(cls, namespace_name, repo_name, tag_name): + @abstractmethod + def find_image_id_by_tag(self, namespace_name, repo_name, tag_name): """ Returns the Docker V1 image ID for the HEAD image for the tag with the given name under the matching repository, or None if none. """ - raise NotImplementedError() + pass - @classmethod - def delete_tag(cls, namespace_name, repo_name, tag_name): - """ Deletes the given tag from the given repository. """ - raise NotImplementedError() + @abstractmethod + def delete_tag(self, namespace_name, repo_name, tag_name): + """ + Deletes the given tag from the given repository. + """ + pass - @classmethod - def load_token(cls, token): + @abstractmethod + def load_token(self, token): """ Loads the data associated with the given (deprecated) access token, and, if found returns True. """ - raise NotImplementedError() + pass - @classmethod - def verify_robot(cls, username, token): + @abstractmethod + def verify_robot(self, username, token): """ Returns True if the given robot username and token match an existing robot account. """ - raise NotImplementedError() + pass - @classmethod - def change_user_password(cls, user, new_password): + @abstractmethod + def change_user_password(self, user, new_password): """ Changes the password associated with the given user. """ - raise NotImplementedError() + pass - @classmethod - def get_repository(cls, namespace_name, repo_name): + @abstractmethod + def get_repository(self, namespace_name, repo_name): """ Returns the repository with the given name under the given namespace or None if none. """ - raise NotImplementedError() + pass - @classmethod - def create_repository(cls, namespace_name, repo_name, user=None): + @abstractmethod + def create_repository(self, namespace_name, repo_name, user=None): """ Creates a new repository under the given namespace with the given name, for the given user. """ - raise NotImplementedError() + pass - @classmethod - def repository_is_public(cls, namespace_name, repo_name): + @abstractmethod + def repository_is_public(self, namespace_name, repo_name): """ Returns whether the repository with the given name under the given namespace is public. If no matching repository was found, returns False. """ - raise NotImplementedError() + pass - @classmethod - def validate_oauth_token(cls, token): + @abstractmethod + def validate_oauth_token(self, token): """ Returns whether the given OAuth token validates. """ - raise NotImplementedError() + pass - @classmethod - def get_sorted_matching_repositories(cls, search_term, only_public, can_read, limit): + @abstractmethod + def get_sorted_matching_repositories(self, search_term, only_public, can_read, limit): """ Returns a sorted list of repositories matching the given search term. can_read is a callback that will be invoked for each repository found, to filter results to only those visible to the current user (if any). """ - raise NotImplementedError() - + pass class PreOCIModel(DockerRegistryV1DataInterface): @@ -224,22 +233,19 @@ class PreOCIModel(DockerRegistryV1DataInterface): PreOCIModel implements the data model for the v1 Docker Registry protocol using a database schema before it was changed to support the OCI specification. """ - @classmethod - def placement_locations_docker_v1(cls, namespace_name, repo_name, image_id): + def placement_locations_docker_v1(self, namespace_name, repo_name, image_id): repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) if repo_image is None or repo_image.storage is None: return None return repo_image.storage.locations - @classmethod - def placement_locations_and_path_docker_v1(cls, namespace_name, repo_name, image_id): + def placement_locations_and_path_docker_v1(self, namespace_name, repo_name, image_id): repo_image = model.image.get_repo_image_extended(namespace_name, repo_name, image_id) if not repo_image or repo_image.storage is None: return None, None return repo_image.storage.locations, model.storage.get_layer_path(repo_image.storage) - @classmethod - def docker_v1_metadata(cls, namespace_name, repo_name, image_id): + def docker_v1_metadata(self, namespace_name, repo_name, image_id): repo_image = model.image.get_repo_image(namespace_name, repo_name, image_id) if repo_image is None: return None @@ -252,9 +258,8 @@ class PreOCIModel(DockerRegistryV1DataInterface): 'compat_json': repo_image.v1_json_metadata, }) - @classmethod - def update_docker_v1_metadata(cls, namespace_name, repo_name, image_id, created_date_str, comment, - command, compat_json, parent_image_id=None): + def update_docker_v1_metadata(self, namespace_name, repo_name, image_id, created_date_str, + comment, command, compat_json, parent_image_id=None): parent_image = None if parent_image_id is not None: parent_image = model.image.get_repo_image(namespace_name, repo_name, parent_image_id) @@ -262,8 +267,7 @@ class PreOCIModel(DockerRegistryV1DataInterface): model.image.set_image_metadata(image_id, namespace_name, repo_name, created_date_str, comment, command, compat_json, parent=parent_image) - @classmethod - def storage_exists(cls, namespace_name, repo_name, image_id): + def storage_exists(self, namespace_name, repo_name, image_id): repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) if repo_image is None or repo_image.storage is None: return False @@ -274,8 +278,8 @@ class PreOCIModel(DockerRegistryV1DataInterface): layer_path = model.storage.get_layer_path(repo_image.storage) return store.exists(repo_image.storage.locations, layer_path) - @classmethod - def store_docker_v1_checksums(cls, namespace_name, repo_name, image_id, checksum, content_checksum): + def store_docker_v1_checksums(self, namespace_name, repo_name, image_id, checksum, + content_checksum): repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) if repo_image is None or repo_image.storage is None: return @@ -286,15 +290,13 @@ class PreOCIModel(DockerRegistryV1DataInterface): repo_image.storage.save() repo_image.save() - @classmethod - def is_image_uploading(cls, namespace_name, repo_name, image_id): + def is_image_uploading(self, namespace_name, repo_name, image_id): repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) if repo_image is None or repo_image.storage is None: return False return repo_image.storage.uploading - @classmethod - def update_image_uploading(cls, namespace_name, repo_name, image_id, is_uploading): + def update_image_uploading(self, namespace_name, repo_name, image_id, is_uploading): repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) if repo_image is None or repo_image.storage is None: return @@ -303,20 +305,17 @@ class PreOCIModel(DockerRegistryV1DataInterface): repo_image.storage.save() return repo_image.storage - @classmethod - def update_image_sizes(cls, namespace_name, repo_name, image_id, size, uncompressed_size): + def update_image_sizes(self, namespace_name, repo_name, image_id, size, uncompressed_size): model.storage.set_image_storage_metadata(image_id, namespace_name, repo_name, size, uncompressed_size) - @classmethod - def get_image_size(cls, namespace_name, repo_name, image_id): + def get_image_size(self, namespace_name, repo_name, image_id): repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) if repo_image is None or repo_image.storage is None: return None return repo_image.storage.image_size - @classmethod - def create_bittorrent_pieces(cls, namespace_name, repo_name, image_id, pieces_bytes): + def create_bittorrent_pieces(self, namespace_name, repo_name, image_id, pieces_bytes): repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) if repo_image is None or repo_image.storage is None: return @@ -324,8 +323,7 @@ class PreOCIModel(DockerRegistryV1DataInterface): model.storage.save_torrent_info(repo_image.storage, app.config['BITTORRENT_PIECE_SIZE'], pieces_bytes) - @classmethod - def image_ancestry(cls, namespace_name, repo_name, image_id): + def image_ancestry(self, namespace_name, repo_name, image_id): try: image = model.image.get_image_by_id(namespace_name, repo_name, image_id) except model.InvalidImageException: @@ -336,18 +334,15 @@ class PreOCIModel(DockerRegistryV1DataInterface): ancestry_docker_ids.extend([parent.docker_image_id for parent in parents]) return ancestry_docker_ids - @classmethod - def repository_exists(cls, namespace_name, repo_name): + def repository_exists(self, namespace_name, repo_name): repo = model.repository.get_repository(namespace_name, repo_name) return repo is not None - @classmethod - def create_or_link_image(cls, username, namespace_name, repo_name, image_id, storage_location): + def create_or_link_image(self, username, namespace_name, repo_name, image_id, storage_location): repo = model.repository.get_repository(namespace_name, repo_name) model.image.find_create_or_link_image(image_id, repo, username, {}, storage_location) - @classmethod - def create_temp_hidden_tag(cls, namespace_name, repo_name, image_id, expiration): + def create_temp_hidden_tag(self, namespace_name, repo_name, image_id, expiration): repo_image = model.image.get_repo_image(namespace_name, repo_name, image_id) if repo_image is None: return @@ -355,16 +350,13 @@ class PreOCIModel(DockerRegistryV1DataInterface): repo = repo_image.repository model.tag.create_temporary_hidden_tag(repo, repo_image, expiration) - @classmethod - def list_tags(cls, namespace_name, repo_name): + def list_tags(self, namespace_name, repo_name): return model.tag.list_repository_tags(namespace_name, repo_name) - @classmethod - def create_or_update_tag(cls, namespace_name, repo_name, image_id, tag_name): + def create_or_update_tag(self, namespace_name, repo_name, image_id, tag_name): model.tag.create_or_update_tag(namespace_name, repo_name, tag_name, image_id) - @classmethod - def find_image_id_by_tag(cls, namespace_name, repo_name, tag_name): + def find_image_id_by_tag(self, namespace_name, repo_name, tag_name): try: tag_image = model.tag.get_tag_image(namespace_name, repo_name, tag_name) except model.DataModelException: @@ -372,61 +364,55 @@ class PreOCIModel(DockerRegistryV1DataInterface): return tag_image.docker_image_id - @classmethod - def delete_tag(cls, namespace_name, repo_name, tag_name): + def delete_tag(self, namespace_name, repo_name, tag_name): model.tag.delete_tag(namespace_name, repo_name, tag_name) - @classmethod - def load_token(cls, token): + def load_token(self, token): try: model.token.load_token_data(token) return True except model.InvalidTokenException: return False - @classmethod - def verify_robot(cls, username, token): + def verify_robot(self, username, token): try: return bool(model.user.verify_robot(username, token)) except model.InvalidRobotException: return False - @classmethod - def change_user_password(cls, user, new_password): + def change_user_password(self, user, new_password): model.user.change_password(user, new_password) - @classmethod - def _repository_for_repo(cls, repo): - """ Returns a Repository object representing the repo data model instance given. """ - return Repository( - id=repo.id, - name=repo.name, - namespace_name=repo.namespace_user.username, - description=repo.description, - is_public=model.repository.is_repository_public(repo) - ) - - @classmethod - def get_repository(cls, namespace_name, repo_name): + def get_repository(self, namespace_name, repo_name): repo = model.repository.get_repository(namespace_name, repo_name) if repo is None: return None - return cls._repository_for_repo(repo) + return _repository_for_repo(repo) - @classmethod - def create_repository(cls, namespace_name, repo_name, user=None): + def create_repository(self, namespace_name, repo_name, user=None): model.repository.create_repository(namespace_name, repo_name, user) - @classmethod - def repository_is_public(cls, namespace_name, repo_name): + def repository_is_public(self, namespace_name, repo_name): return model.repository.repository_is_public(namespace_name, repo_name) - @classmethod - def validate_oauth_token(cls, token): + def validate_oauth_token(self, token): return bool(model.oauth.validate_access_token(token)) - @classmethod - def get_sorted_matching_repositories(cls, search_term, only_public, can_read, limit): + def get_sorted_matching_repositories(self, search_term, only_public, can_read, limit): repos = model.repository.get_sorted_matching_repositories(search_term, only_public, can_read, limit=limit) - return [cls._repository_for_repo(repo) for repo in repos] + return [_repository_for_repo(repo) for repo in repos] + + +def _repository_for_repo(repo): + """ Returns a Repository object representing the Pre-OCI data model instance of a repository. """ + return Repository( + id=repo.id, + name=repo.name, + namespace_name=repo.namespace_user.username, + description=repo.description, + is_public=model.repository.is_repository_public(repo) + ) + + +pre_oci_model = PreOCIModel() diff --git a/data/interfaces/v2.py b/data/interfaces/v2.py index dce6baa28..cb16334d6 100644 --- a/data/interfaces/v2.py +++ b/data/interfaces/v2.py @@ -1,10 +1,12 @@ +from abc import ABCMeta, abstractmethod from collections import namedtuple from namedlist import namedlist from peewee import IntegrityError +from six import add_metaclass from data import model, database -from data.model import DataModelException, TagAlreadyCreatedException +from data.model import DataModelException from image.docker.v1 import DockerV1Metadata _MEDIA_TYPE = "application/vnd.docker.distribution.manifest.v1+prettyjws" @@ -54,153 +56,153 @@ class Label(namedtuple('Label', ['key', 'value', 'source_type', 'media_type'])): """ +@add_metaclass(ABCMeta) class DockerRegistryV2DataInterface(object): """ Interface that represents all data store interactions required by a Docker Registry v1. """ - @classmethod - def create_repository(cls, namespace_name, repo_name, creating_user=None): + @abstractmethod + def create_repository(self, namespace_name, repo_name, creating_user=None): """ Creates a new repository under the specified namespace with the given name. The user supplied is the user creating the repository, if any. """ - raise NotImplementedError() + pass - @classmethod - def repository_is_public(cls, namespace_name, repo_name): + @abstractmethod + def repository_is_public(self, namespace_name, repo_name): """ Returns true if the repository with the given name under the given namespace has public visibility. """ - raise NotImplementedError() + pass - @classmethod - def get_repository(cls, namespace_name, repo_name): + @abstractmethod + def get_repository(self, namespace_name, repo_name): """ Returns a repository tuple for the repository with the given name under the given namespace. Returns None if no such repository was found. """ - raise NotImplementedError() + pass - @classmethod - def has_active_tag(cls, namespace_name, repo_name, tag_name): + @abstractmethod + def has_active_tag(self, namespace_name, repo_name, tag_name): """ Returns whether there is an active tag for the tag with the given name under the matching repository, if any, or none if none. """ - raise NotImplementedError() + pass - @classmethod - def get_manifest_by_tag(cls, namespace_name, repo_name, tag_name): + @abstractmethod + def get_manifest_by_tag(self, namespace_name, repo_name, tag_name): """ Returns the current manifest for the tag with the given name under the matching repository, if any, or None if none. """ - raise NotImplementedError() + pass - @classmethod - def get_manifest_by_digest(cls, namespace_name, repo_name, digest): + @abstractmethod + def get_manifest_by_digest(self, namespace_name, repo_name, digest): """ Returns the manifest matching the given digest under the matching repository, if any, or None if none. """ - raise NotImplementedError() + pass - @classmethod - def delete_manifest_by_digest(cls, namespace_name, repo_name, digest): + @abstractmethod + def delete_manifest_by_digest(self, namespace_name, repo_name, digest): """ Deletes the manifest with the associated digest (if any) and returns all removed tags that pointed to that manifest. If the manifest was not found, returns an empty list. """ - raise NotImplementedError() + pass - @classmethod - def get_docker_v1_metadata_by_tag(cls, namespace_name, repo_name, tag_name): + @abstractmethod + def get_docker_v1_metadata_by_tag(self, namespace_name, repo_name, tag_name): """ Returns the Docker V1 metadata associated with the tag with the given name under the matching repository, if any. If none, returns None. """ - raise NotImplementedError() + pass - @classmethod - def get_docker_v1_metadata_by_image_id(cls, namespace_name, repo_name, docker_image_ids): + @abstractmethod + def get_docker_v1_metadata_by_image_id(self, namespace_name, repo_name, docker_image_ids): """ Returns a map of Docker V1 metadata for each given image ID, matched under the repository with the given namespace and name. Returns an empty map if the matching repository was not found. """ - raise NotImplementedError() + pass - @classmethod - def get_parents_docker_v1_metadata(cls, namespace_name, repo_name, docker_image_id): + @abstractmethod + def get_parents_docker_v1_metadata(self, namespace_name, repo_name, docker_image_id): """ Returns an ordered list containing the Docker V1 metadata for each parent of the image with the given docker ID under the matching repository. Returns an empty list if the image was not found. """ - raise NotImplementedError() + pass - @classmethod - def create_manifest_and_update_tag(cls, namespace_name, repo_name, tag_name, manifest_digest, + @abstractmethod + def create_manifest_and_update_tag(self, namespace_name, repo_name, tag_name, manifest_digest, manifest_bytes): """ Creates a new manifest with the given digest and byte data, and assigns the tag with the given name under the matching repository to it. """ - raise NotImplementedError() + pass - @classmethod - def synthesize_v1_image(cls, repository, storage, image_id, created, comment, command, + @abstractmethod + def synthesize_v1_image(self, repository, storage, image_id, created, comment, command, compat_json, parent_image_id): """ Synthesizes a V1 image under the specified repository, pointing to the given storage and returns the V1 metadata for the synthesized image. """ - raise NotImplementedError() + pass - @classmethod - def save_manifest(cls, namespace_name, repo_name, tag_name, leaf_layer_docker_id, manifest_digest, - manifest_bytes): + @abstractmethod + def save_manifest(self, namespace_name, repo_name, tag_name, leaf_layer_docker_id, + manifest_digest, manifest_bytes): """ Saves a manifest pointing to the given leaf image, with the given manifest, under the matching repository as a tag with the given name. Returns a boolean whether or not the tag was newly created or not. """ - raise NotImplementedError() + pass - @classmethod - def repository_tags(cls, namespace_name, repo_name, limit, offset): + @abstractmethod + def repository_tags(self, namespace_name, repo_name, limit, offset): """ Returns the active tags under the repository with the given name and namespace. """ - raise NotImplementedError() + pass - @classmethod - def get_visible_repositories(cls, username, limit, offset): + @abstractmethod + def get_visible_repositories(self, username, limit, offset): """ Returns the repositories visible to the user with the given username, if any. """ - raise NotImplementedError() + pass - @classmethod - def create_blob_upload(cls, namespace_name, repo_name, upload_uuid, location_name, - storage_metadata): + @abstractmethod + def create_blob_upload(self, namespace_name, repo_name, upload_uuid, location_name, storage_metadata): """ Creates a blob upload under the matching repository with the given UUID and metadata. Returns whether the matching repository exists. """ - raise NotImplementedError() + pass - @classmethod - def blob_upload_by_uuid(cls, namespace_name, repo_name, upload_uuid): + @abstractmethod + def blob_upload_by_uuid(self, namespace_name, repo_name, upload_uuid): """ Searches for a blob upload with the given UUID under the given repository and returns it or None if none. """ - raise NotImplementedError() + pass - @classmethod - def update_blob_upload(cls, blob_upload): + @abstractmethod + def update_blob_upload(self, blob_upload): """ Saves any changes to the blob upload object given to the backing data store. Fields that can change: @@ -212,61 +214,53 @@ class DockerRegistryV2DataInterface(object): - chunk_count - sha_state """ - raise NotImplementedError() + pass - @classmethod - def delete_blob_upload(cls, namespace_name, repo_name, uuid): + @abstractmethod + def delete_blob_upload(self, namespace_name, repo_name, uuid): """ Deletes the blob upload with the given uuid under the matching repository. If none, does nothing. """ - raise NotImplementedError() + pass - @classmethod - def create_blob_and_temp_tag(cls, namespace_name, repo_name, blob_digest, blob_upload, + @abstractmethod + def create_blob_and_temp_tag(self, namespace_name, repo_name, blob_digest, blob_upload, expiration_sec): """ Creates a blob and links a temporary tag with the specified expiration to it under the matching repository. """ - raise NotImplementedError() + pass - @classmethod - def lookup_blobs_by_digest(cls, namespace_name, repo_name, digests): - """ - Returns all the blobs with matching digests found under the matching repository. If the - repository doesn't exist, returns {}. - """ - raise NotImplementedError() - - @classmethod - def get_blob_by_digest(cls, namespace_name, repo_name, digest): + @abstractmethod + def get_blob_by_digest(self, namespace_name, repo_name, digest): """ Returns the blob with the given digest under the matching repository or None if none. """ - raise NotImplementedError() + pass - @classmethod - def save_bittorrent_pieces(cls, blob, piece_size, piece_bytes): + @abstractmethod + def save_bittorrent_pieces(self, blob, piece_size, piece_bytes): """ Saves the BitTorrent piece hashes for the given blob. """ - raise NotImplementedError() + pass - @classmethod - def create_manifest_labels(cls, namespace_name, repo_name, manifest_digest, labels): + @abstractmethod + def create_manifest_labels(self, namespace_name, repo_name, manifest_digest, labels): """ Creates a new labels for the provided manifest. """ - raise NotImplementedError() + pass - @classmethod - def get_blob_path(cls, blob): + @abstractmethod + def get_blob_path(self, blob): """ Once everything is moved over, this could be in util.registry and not even touch the database. """ - raise NotImplementedError() + pass class PreOCIModel(DockerRegistryV2DataInterface): @@ -274,58 +268,40 @@ class PreOCIModel(DockerRegistryV2DataInterface): PreOCIModel implements the data model for the v2 Docker Registry protocol using a database schema before it was changed to support the OCI specification. """ - @classmethod - def create_repository(cls, namespace_name, repo_name, creating_user=None): + def create_repository(self, namespace_name, repo_name, creating_user=None): return model.repository.create_repository(namespace_name, repo_name, creating_user) - @classmethod - def repository_is_public(cls, namespace_name, repo_name): + def repository_is_public(self, namespace_name, repo_name): return model.repository.repository_is_public(namespace_name, repo_name) - @classmethod - def _repository_for_repo(cls, repo): - """ Returns a Repository object representing the repo data model instance given. """ - return Repository( - id=repo.id, - name=repo.name, - namespace_name=repo.namespace_user.username, - description=repo.description, - is_public=model.repository.is_repository_public(repo) - ) - - @classmethod - def get_repository(cls, namespace_name, repo_name): + def get_repository(self, namespace_name, repo_name): repo = model.repository.get_repository(namespace_name, repo_name) if repo is None: return None - return cls._repository_for_repo(repo) + return _repository_for_repo(repo) - @classmethod - def has_active_tag(cls, namespace_name, repo_name, tag_name): + def has_active_tag(self, namespace_name, repo_name, tag_name): try: model.tag.get_active_tag(namespace_name, repo_name, tag_name) return True except database.RepositoryTag.DoesNotExist: return False - @classmethod - def get_manifest_by_tag(cls, namespace_name, repo_name, tag_name): + def get_manifest_by_tag(self, namespace_name, repo_name, tag_name): try: manifest = model.tag.load_tag_manifest(namespace_name, repo_name, tag_name) return ManifestJSON(digest=manifest.digest, json=manifest.json_data, media_type=_MEDIA_TYPE) except model.InvalidManifestException: return None - @classmethod - def get_manifest_by_digest(cls, namespace_name, repo_name, digest): + def get_manifest_by_digest(self, namespace_name, repo_name, digest): try: manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, digest) return ManifestJSON(digest=digest, json=manifest.json_data, media_type=_MEDIA_TYPE) except model.InvalidManifestException: return None - @classmethod - def delete_manifest_by_digest(cls, namespace_name, repo_name, digest): + def delete_manifest_by_digest(self, namespace_name, repo_name, digest): def _tag_view(tag): return Tag( name=tag.name, @@ -339,56 +315,31 @@ class PreOCIModel(DockerRegistryV2DataInterface): tags = model.tag.delete_manifest_by_digest(namespace_name, repo_name, digest) return [_tag_view(tag) for tag in tags] - @classmethod - def _docker_v1_metadata(cls, namespace_name, repo_name, repo_image): - """ - Returns a DockerV1Metadata object for the given image under the repository with the given - namespace and name. Note that the namespace and name are passed here as an optimization, and are - *not checked* against the image. - """ - return DockerV1Metadata( - namespace_name=namespace_name, - repo_name=repo_name, - image_id=repo_image.docker_image_id, - checksum=repo_image.v1_checksum, - content_checksum=repo_image.storage.content_checksum, - compat_json=repo_image.v1_json_metadata, - created=repo_image.created, - comment=repo_image.comment, - command=repo_image.command, - # TODO: make sure this isn't needed anywhere, as it is expensive to lookup - parent_image_id=None, - ) - - @classmethod - def get_docker_v1_metadata_by_tag(cls, namespace_name, repo_name, tag_name): + def get_docker_v1_metadata_by_tag(self, namespace_name, repo_name, tag_name): try: repo_img = model.tag.get_tag_image(namespace_name, repo_name, tag_name, include_storage=True) - return cls._docker_v1_metadata(namespace_name, repo_name, repo_img) + return _docker_v1_metadata(namespace_name, repo_name, repo_img) except DataModelException: return None - @classmethod - def get_docker_v1_metadata_by_image_id(cls, namespace_name, repo_name, docker_image_ids): + def get_docker_v1_metadata_by_image_id(self, namespace_name, repo_name, docker_image_ids): repo = model.repository.get_repository(namespace_name, repo_name) if repo is None: return {} images_query = model.image.lookup_repository_images(repo, docker_image_ids) - return {image.docker_image_id: cls._docker_v1_metadata(namespace_name, repo_name, image) + return {image.docker_image_id: _docker_v1_metadata(namespace_name, repo_name, image) for image in images_query} - @classmethod - def get_parents_docker_v1_metadata(cls, namespace_name, repo_name, docker_image_id): + def get_parents_docker_v1_metadata(self, namespace_name, repo_name, docker_image_id): repo_image = model.image.get_repo_image(namespace_name, repo_name, docker_image_id) if repo_image is None: return [] parents = model.image.get_parent_images(namespace_name, repo_name, repo_image) - return [cls._docker_v1_metadata(namespace_name, repo_name, image) for image in parents] + return [_docker_v1_metadata(namespace_name, repo_name, image) for image in parents] - @classmethod - def create_manifest_and_update_tag(cls, namespace_name, repo_name, tag_name, manifest_digest, + def create_manifest_and_update_tag(self, namespace_name, repo_name, tag_name, manifest_digest, manifest_bytes): try: model.tag.associate_generated_tag_manifest(namespace_name, repo_name, tag_name, @@ -397,8 +348,7 @@ class PreOCIModel(DockerRegistryV2DataInterface): # It's already there! pass - @classmethod - def synthesize_v1_image(cls, repository, storage, image_id, created, comment, command, + def synthesize_v1_image(self, repository, storage, image_id, created, comment, command, compat_json, parent_image_id): repo = model.repository.get_repository(repository.namespace_name, repository.name) if repo is None: @@ -417,18 +367,16 @@ class PreOCIModel(DockerRegistryV2DataInterface): repo_image = model.image.synthesize_v1_image(repo, storage_obj, image_id, created, comment, command, compat_json, parent_image) - return cls._docker_v1_metadata(repo.namespace_user.username, repo.name, repo_image) + return _docker_v1_metadata(repo.namespace_user.username, repo.name, repo_image) - @classmethod - def save_manifest(cls, namespace_name, repo_name, tag_name, leaf_layer_docker_id, manifest_digest, - manifest_bytes): + def save_manifest(self, namespace_name, repo_name, tag_name, leaf_layer_docker_id, + manifest_digest, manifest_bytes): (_, newly_created) = model.tag.store_tag_manifest(namespace_name, repo_name, tag_name, leaf_layer_docker_id, manifest_digest, manifest_bytes) return newly_created - @classmethod - def repository_tags(cls, namespace_name, repo_name, limit, offset): + def repository_tags(self, namespace_name, repo_name, limit, offset): def _tag_view(tag): return Tag( name=tag.name, @@ -443,15 +391,12 @@ class PreOCIModel(DockerRegistryV2DataInterface): tags_query = tags_query.limit(limit).offset(offset) return [_tag_view(tag) for tag in tags_query] - @classmethod - def get_visible_repositories(cls, username, limit, offset): + def get_visible_repositories(self, username, limit, offset): query = model.repository.get_visible_repositories(username, include_public=(username is None)) query = query.limit(limit).offset(offset) - return [cls._repository_for_repo(repo) for repo in query] + return [_repository_for_repo(repo) for repo in query] - @classmethod - def create_blob_upload(cls, namespace_name, repo_name, upload_uuid, location_name, - storage_metadata): + def create_blob_upload(self, namespace_name, repo_name, upload_uuid, location_name, storage_metadata): try: model.blob.initiate_upload(namespace_name, repo_name, upload_uuid, location_name, storage_metadata) @@ -459,8 +404,7 @@ class PreOCIModel(DockerRegistryV2DataInterface): except database.Repository.DoesNotExist: return False - @classmethod - def blob_upload_by_uuid(cls, namespace_name, repo_name, upload_uuid): + def blob_upload_by_uuid(self, namespace_name, repo_name, upload_uuid): try: found = model.blob.get_blob_upload(namespace_name, repo_name, upload_uuid) except model.InvalidBlobUpload: @@ -480,8 +424,7 @@ class PreOCIModel(DockerRegistryV2DataInterface): storage_metadata=found.storage_metadata, ) - @classmethod - def update_blob_upload(cls, blob_upload): + def update_blob_upload(self, blob_upload): # Lookup the blob upload object. try: blob_upload_record = model.blob.get_blob_upload(blob_upload.repo_namespace_name, @@ -498,21 +441,18 @@ class PreOCIModel(DockerRegistryV2DataInterface): blob_upload_record.sha_state = blob_upload.sha_state blob_upload_record.save() - @classmethod - def delete_blob_upload(cls, namespace_name, repo_name, uuid): + def delete_blob_upload(self, namespace_name, repo_name, uuid): try: found = model.blob.get_blob_upload(namespace_name, repo_name, uuid) found.delete_instance() except model.InvalidBlobUpload: return - @classmethod - def create_blob_and_temp_tag(cls, namespace_name, repo_name, blob_digest, blob_upload, + def create_blob_and_temp_tag(self, namespace_name, repo_name, blob_digest, blob_upload, expiration_sec): location_obj = model.storage.get_image_location_for_name(blob_upload.location_name) blob_record = model.blob.store_blob_record_and_temp_link(namespace_name, repo_name, - blob_digest, - location_obj.id, + blob_digest, location_obj.id, blob_upload.byte_count, expiration_sec, blob_upload.uncompressed_byte_count) @@ -523,8 +463,7 @@ class PreOCIModel(DockerRegistryV2DataInterface): locations=[blob_upload.location_name], ) - @classmethod - def lookup_blobs_by_digest(cls, namespace_name, repo_name, digests): + def lookup_blobs_by_digest(self, namespace_name, repo_name, digests): def _blob_view(blob_record): return Blob( uuid=blob_record.uuid, @@ -539,8 +478,7 @@ class PreOCIModel(DockerRegistryV2DataInterface): query = model.storage.lookup_repo_storages_by_content_checksum(repo, digests) return {storage.content_checksum: _blob_view(storage) for storage in query} - @classmethod - def get_blob_by_digest(cls, namespace_name, repo_name, digest): + def get_blob_by_digest(self, namespace_name, repo_name, digest): try: blob_record = model.blob.get_repo_blob_by_digest(namespace_name, repo_name, digest) return Blob( @@ -552,13 +490,11 @@ class PreOCIModel(DockerRegistryV2DataInterface): except model.BlobDoesNotExist: return None - @classmethod - def save_bittorrent_pieces(cls, blob, piece_size, piece_bytes): + def save_bittorrent_pieces(self, blob, piece_size, piece_bytes): blob_record = model.storage.get_storage_by_uuid(blob.uuid) model.storage.save_torrent_info(blob_record, piece_size, piece_bytes) - @classmethod - def create_manifest_labels(cls, namespace_name, repo_name, manifest_digest, labels): + def create_manifest_labels(self, namespace_name, repo_name, manifest_digest, labels): if not labels: # No point in doing anything more. return @@ -568,7 +504,42 @@ class PreOCIModel(DockerRegistryV2DataInterface): model.label.create_manifest_label(tag_manifest, label.key, label.value, label.source_type, label.media_type) - @classmethod - def get_blob_path(cls, blob): + def get_blob_path(self, blob): blob_record = model.storage.get_storage_by_uuid(blob.uuid) return model.storage.get_layer_path(blob_record) + + +def _docker_v1_metadata(namespace_name, repo_name, repo_image): + """ + Returns a DockerV1Metadata object for the given Pre-OCI repo_image under the + repository with the given namespace and name. Note that the namespace and + name are passed here as an optimization, and are *not checked* against the + image. + """ + return DockerV1Metadata( + namespace_name=namespace_name, + repo_name=repo_name, + image_id=repo_image.docker_image_id, + checksum=repo_image.v1_checksum, + content_checksum=repo_image.storage.content_checksum, + compat_json=repo_image.v1_json_metadata, + created=repo_image.created, + comment=repo_image.comment, + command=repo_image.command, + # TODO: make sure this isn't needed anywhere, as it is expensive to lookup + parent_image_id=None, + ) + + +def _repository_for_repo(repo): + """ Returns a Repository object representing the Pre-OCI data model repo instance given. """ + return Repository( + id=repo.id, + name=repo.name, + namespace_name=repo.namespace_user.username, + description=repo.description, + is_public=model.repository.is_repository_public(repo) + ) + + +pre_oci_model = PreOCIModel() diff --git a/data/interfaces/verbs.py b/data/interfaces/verbs.py index 826b1729b..f5758352e 100644 --- a/data/interfaces/verbs.py +++ b/data/interfaces/verbs.py @@ -1,8 +1,13 @@ +import json + +from abc import ABCMeta, abstractmethod from collections import namedtuple + +from six import add_metaclass + from data import model from image.docker.v1 import DockerV1Metadata -import json class DerivedImage(namedtuple('DerivedImage', ['ref', 'blob', 'internal_source_image_db_id'])): """ @@ -31,105 +36,106 @@ class TorrentInfo(namedtuple('TorrentInfo', ['piece_length', 'pieces'])): """ +@add_metaclass(ABCMeta) class VerbsDataInterface(object): """ Interface that represents all data store interactions required by the registry's custom HTTP verbs. """ - @classmethod - def repository_is_public(cls, namespace_name, repo_name): + @abstractmethod + def repository_is_public(self, namespace_name, repo_name): """ Returns a boolean for whether the repository with the given name and namespace is public. """ - raise NotImplementedError() + pass - @classmethod - def get_manifest_layers_with_blobs(cls, repo_image): + @abstractmethod + def get_manifest_layers_with_blobs(self, repo_image): """ Returns the full set of manifest layers and their associated blobs starting at the given repository image and working upwards to the root image. """ - raise NotImplementedError() + pass - @classmethod - def get_blob_path(cls, blob): + @abstractmethod + def get_blob_path(self, blob): """ Returns the storage path for the given blob. """ - raise NotImplementedError() + pass - @classmethod - def get_derived_image_signature(cls, derived_image, signer_name): + @abstractmethod + def get_derived_image_signature(self, derived_image, signer_name): """ Returns the signature associated with the derived image and a specific signer or None if none. """ - raise NotImplementedError() + pass - @classmethod - def set_derived_image_signature(cls, derived_image, signer_name, signature): + @abstractmethod + def set_derived_image_signature(self, derived_image, signer_name, signature): """ Sets the calculated signature for the given derived image and signer to that specified. """ - raise NotImplementedError() + pass - @classmethod - def delete_derived_image(cls, derived_image): + @abstractmethod + def delete_derived_image(self, derived_image): """ Deletes a derived image and all of its storage. """ - raise NotImplementedError() + pass - @classmethod - def set_blob_size(cls, blob, size): + @abstractmethod + def set_blob_size(self, blob, size): """ Sets the size field on a blob to the value specified. """ - raise NotImplementedError() + pass - @classmethod - def get_repo_blob_by_digest(cls, namespace_name, repo_name, digest): + @abstractmethod + def get_repo_blob_by_digest(self, namespace_name, repo_name, digest): """ Returns the blob with the given digest under the matching repository or None if none. """ - raise NotImplementedError() + pass - @classmethod - def get_torrent_info(cls, blob): + @abstractmethod + def get_torrent_info(self, blob): """ Returns the torrent information associated with the given blob or None if none. """ - raise NotImplementedError() + pass - @classmethod - def set_torrent_info(cls, blob, piece_length, pieces): + @abstractmethod + def set_torrent_info(self, blob, piece_length, pieces): """ Sets the torrent infomation associated with the given blob to that specified. """ - raise NotImplementedError() + pass - @classmethod - def lookup_derived_image(cls, repo_image, verb, varying_metadata=None): + @abstractmethod + def lookup_derived_image(self, repo_image, verb, varying_metadata=None): """ Looks up the derived image for the given repository image, verb and optional varying metadata and returns it or None if none. """ - raise NotImplementedError() + pass - @classmethod - def lookup_or_create_derived_image(cls, repo_image, verb, location, varying_metadata=None): + @abstractmethod + def lookup_or_create_derived_image(self, repo_image, verb, location, varying_metadata=None): """ Looks up the derived image for the given repository image, verb and optional varying metadata and returns it. If none exists, a new derived image is created. """ - raise NotImplementedError() + pass - @classmethod - def get_tag_image(cls, namespace_name, repo_name, tag_name): + @abstractmethod + def get_tag_image(self, namespace_name, repo_name, tag_name): """ Returns the image associated with the live tag with the given name under the matching repository or None if none. """ - raise NotImplementedError() + pass class PreOCIModel(VerbsDataInterface): @@ -138,35 +144,10 @@ class PreOCIModel(VerbsDataInterface): before it was changed to support the OCI specification. """ - @classmethod - def repository_is_public(cls, namespace_name, repo_name): + def repository_is_public(self, namespace_name, repo_name): return model.repository.repository_is_public(namespace_name, repo_name) - @classmethod - def _docker_v1_metadata(cls, namespace_name, repo_name, repo_image): - """ - Returns a DockerV1Metadata object for the given image under the repository with the given - namespace and name. Note that the namespace and name are passed here as an optimization, and are - *not checked* against the image. Also note that we only fill in the localized data needed by - verbs. - """ - return DockerV1Metadata( - namespace_name=namespace_name, - repo_name=repo_name, - image_id=repo_image.docker_image_id, - checksum=repo_image.v1_checksum, - compat_json=repo_image.v1_json_metadata, - created=repo_image.created, - comment=repo_image.comment, - command=repo_image.command, - - # Note: These are not needed in verbs and are expensive to load, so we just skip them. - content_checksum=None, - parent_image_id=None, - ) - - @classmethod - def get_manifest_layers_with_blobs(cls, repo_image): + def get_manifest_layers_with_blobs(self, repo_image): repo_image_record = model.image.get_image_by_id(repo_image.repository.namespace_name, repo_image.repository.name, repo_image.image_id) @@ -186,16 +167,15 @@ class PreOCIModel(VerbsDataInterface): yield ImageWithBlob( image_id=parent.docker_image_id, - blob=cls._blob(parent.storage), + blob=_blob(parent.storage), repository=repo_image.repository, compat_metadata=metadata, - v1_metadata=cls._docker_v1_metadata(repo_image.repository.namespace_name, - repo_image.repository.name, parent), + v1_metadata=_docker_v1_metadata(repo_image.repository.namespace_name, + repo_image.repository.name, parent), internal_db_id=parent.id, ) - @classmethod - def get_derived_image_signature(cls, derived_image, signer_name): + def get_derived_image_signature(self, derived_image, signer_name): storage = model.storage.get_storage_by_uuid(derived_image.blob.uuid) signature_entry = model.storage.lookup_storage_signature(storage, signer_name) if signature_entry is None: @@ -203,41 +183,35 @@ class PreOCIModel(VerbsDataInterface): return signature_entry.signature - @classmethod - def set_derived_image_signature(cls, derived_image, signer_name, signature): + def set_derived_image_signature(self, derived_image, signer_name, signature): storage = model.storage.get_storage_by_uuid(derived_image.blob.uuid) signature_entry = model.storage.find_or_create_storage_signature(storage, signer_name) signature_entry.signature = signature signature_entry.uploading = False signature_entry.save() - @classmethod - def delete_derived_image(cls, derived_image): + def delete_derived_image(self, derived_image): model.image.delete_derived_storage_by_uuid(derived_image.blob.uuid) - @classmethod - def set_blob_size(cls, blob, size): + def set_blob_size(self, blob, size): storage_entry = model.storage.get_storage_by_uuid(blob.uuid) storage_entry.image_size = size storage_entry.uploading = False storage_entry.save() - @classmethod - def get_blob_path(cls, blob): + def get_blob_path(self, blob): blob_record = model.storage.get_storage_by_uuid(blob.uuid) return model.storage.get_layer_path(blob_record) - @classmethod - def get_repo_blob_by_digest(cls, namespace_name, repo_name, digest): + def get_repo_blob_by_digest(self, namespace_name, repo_name, digest): try: blob_record = model.blob.get_repo_blob_by_digest(namespace_name, repo_name, digest) except model.BlobDoesNotExist: return None - return cls._blob(blob_record) + return _blob(blob_record) - @classmethod - def get_torrent_info(cls, blob): + def get_torrent_info(self, blob): blob_record = model.storage.get_storage_by_uuid(blob.uuid) try: @@ -250,51 +224,24 @@ class PreOCIModel(VerbsDataInterface): piece_length=torrent_info.piece_length, ) - @classmethod - def set_torrent_info(cls, blob, piece_length, pieces): + def set_torrent_info(self, blob, piece_length, pieces): blob_record = model.storage.get_storage_by_uuid(blob.uuid) model.storage.save_torrent_info(blob_record, piece_length, pieces) - @classmethod - def lookup_derived_image(cls, repo_image, verb, varying_metadata=None): + def lookup_derived_image(self, repo_image, verb, varying_metadata=None): blob_record = model.image.find_derived_storage_for_image(repo_image.internal_db_id, verb, varying_metadata) if blob_record is None: return None - return cls._derived_image(blob_record, repo_image) + return _derived_image(blob_record, repo_image) - @classmethod - def _derived_image(cls, blob_record, repo_image): - return DerivedImage( - ref=repo_image.internal_db_id, - blob=cls._blob(blob_record), - internal_source_image_db_id=repo_image.internal_db_id, - ) + def lookup_or_create_derived_image(self, repo_image, verb, location, varying_metadata=None): + blob_record = model.image.find_or_create_derived_storage(repo_image.internal_db_id, verb, + location, varying_metadata) + return _derived_image(blob_record, repo_image) - @classmethod - def _blob(cls, blob_record): - if hasattr(blob_record, 'locations'): - locations = blob_record.locations - else: - locations = model.storage.get_storage_locations(blob_record.uuid) - - return Blob( - uuid=blob_record.uuid, - size=blob_record.image_size, - uncompressed_size=blob_record.uncompressed_size, - uploading=blob_record.uploading, - locations=locations, - ) - - @classmethod - def lookup_or_create_derived_image(cls, repo_image, verb, location, varying_metadata=None): - blob_record = model.image.find_or_create_derived_storage(repo_image.internal_db_id, verb, location, - varying_metadata) - return cls._derived_image(blob_record, repo_image) - - @classmethod - def get_tag_image(cls, namespace_name, repo_name, tag_name): + def get_tag_image(self, namespace_name, repo_name, tag_name): try: found = model.tag.get_tag_image(namespace_name, repo_name, tag_name, include_storage=True) except model.DataModelException: @@ -308,15 +255,68 @@ class PreOCIModel(VerbsDataInterface): return ImageWithBlob( image_id=found.docker_image_id, - blob=cls._blob(found.storage), + blob=_blob(found.storage), repository=RepositoryReference( namespace_name=namespace_name, name=repo_name, id=found.repository_id, ), compat_metadata=metadata, - v1_metadata=cls._docker_v1_metadata(namespace_name, repo_name, found), + v1_metadata=_docker_v1_metadata(namespace_name, repo_name, found), internal_db_id=found.id, ) +pre_oci_model = PreOCIModel() + + +def _docker_v1_metadata(namespace_name, repo_name, repo_image): + """ + Returns a DockerV1Metadata object for the given Pre-OCI repo_image under the + repository with the given namespace and name. Note that the namespace and + name are passed here as an optimization, and are *not checked* against the + image. + """ + return DockerV1Metadata( + namespace_name=namespace_name, + repo_name=repo_name, + image_id=repo_image.docker_image_id, + checksum=repo_image.v1_checksum, + compat_json=repo_image.v1_json_metadata, + created=repo_image.created, + comment=repo_image.comment, + command=repo_image.command, + + # Note: These are not needed in verbs and are expensive to load, so we just skip them. + content_checksum=None, + parent_image_id=None, + ) + + +def _derived_image(blob_record, repo_image): + """ + Returns a DerivedImage object for the given Pre-OCI data model blob and repo_image instance. + """ + return DerivedImage( + ref=repo_image.internal_db_id, + blob=_blob(blob_record), + internal_source_image_db_id=repo_image.internal_db_id, + ) + + +def _blob(blob_record): + """ + Returns a Blob object for the given Pre-OCI data model blob instance. + """ + if hasattr(blob_record, 'locations'): + locations = blob_record.locations + else: + locations = model.storage.get_storage_locations(blob_record.uuid) + + return Blob( + uuid=blob_record.uuid, + size=blob_record.image_size, + uncompressed_size=blob_record.uncompressed_size, + uploading=blob_record.uploading, + locations=locations, + ) diff --git a/endpoints/keyserver/__init__.py b/endpoints/keyserver/__init__.py index b5e74b171..7cab60b40 100644 --- a/endpoints/keyserver/__init__.py +++ b/endpoints/keyserver/__init__.py @@ -5,7 +5,7 @@ from flask import Blueprint, jsonify, abort, request, make_response from jwt import get_unverified_header from app import app -from data.interfaces.key_server import PreOCIModel as model, ServiceKeyDoesNotExist +from data.interfaces.key_server import pre_oci_model as model, ServiceKeyDoesNotExist from data.model.log import log_action from util.security import jwtutil diff --git a/endpoints/v1/index.py b/endpoints/v1/index.py index c90ffc9d5..454b95c78 100644 --- a/endpoints/v1/index.py +++ b/endpoints/v1/index.py @@ -6,7 +6,7 @@ from functools import wraps from flask import request, make_response, jsonify, session -from data.interfaces.v1 import PreOCIModel as model +from data.interfaces.v1 import pre_oci_model as model from app import authentication, userevents, metric_queue from auth.auth import process_auth, generate_signed_token from auth.auth_context import get_authenticated_user, get_validated_token, get_validated_oauth_token diff --git a/endpoints/v1/registry.py b/endpoints/v1/registry.py index 2e0aa85bc..2e9a92c09 100644 --- a/endpoints/v1/registry.py +++ b/endpoints/v1/registry.py @@ -14,7 +14,7 @@ from auth.permissions import (ReadRepositoryPermission, ModifyRepositoryPermission) from auth.registry_jwt_auth import get_granted_username from data import model, database -from data.interfaces.v1 import PreOCIModel as model +from data.interfaces.v1 import pre_oci_model as model from digest import checksums from endpoints.v1 import v1_bp from endpoints.decorators import anon_protect diff --git a/endpoints/v1/tag.py b/endpoints/v1/tag.py index 07902309d..973464683 100644 --- a/endpoints/v1/tag.py +++ b/endpoints/v1/tag.py @@ -9,7 +9,7 @@ from auth.auth import process_auth from auth.permissions import (ReadRepositoryPermission, ModifyRepositoryPermission) from data import model -from data.interfaces.v1 import PreOCIModel as model +from data.interfaces.v1 import pre_oci_model as model from endpoints.common import parse_repository_name from endpoints.decorators import anon_protect from endpoints.v1 import v1_bp diff --git a/endpoints/v2/blob.py b/endpoints/v2/blob.py index 1e1a2bd30..79113bad0 100644 --- a/endpoints/v2/blob.py +++ b/endpoints/v2/blob.py @@ -8,7 +8,7 @@ import resumablehashlib from app import storage, app from auth.registry_jwt_auth import process_registry_jwt_auth from data import database -from data.interfaces.v2 import PreOCIModel as model +from data.interfaces.v2 import pre_oci_model as model from digest import digest_tools from endpoints.common import parse_repository_name from endpoints.v2 import v2_bp, require_repo_read, require_repo_write, get_input_stream diff --git a/endpoints/v2/catalog.py b/endpoints/v2/catalog.py index 4a145fe3f..8ae243460 100644 --- a/endpoints/v2/catalog.py +++ b/endpoints/v2/catalog.py @@ -3,7 +3,7 @@ from flask import jsonify from auth.registry_jwt_auth import process_registry_jwt_auth, get_granted_entity from endpoints.decorators import anon_protect from endpoints.v2 import v2_bp, paginate -from data.interfaces.v2 import PreOCIModel as model +from data.interfaces.v2 import pre_oci_model as model @v2_bp.route('/_catalog', methods=['GET']) @process_registry_jwt_auth() diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index 2cc716103..ff3987620 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -8,7 +8,7 @@ import features from app import docker_v2_signing_key, app, metric_queue from auth.registry_jwt_auth import process_registry_jwt_auth -from data.interfaces.v2 import PreOCIModel as model, Label +from data.interfaces.v2 import pre_oci_model as model, Label from digest import digest_tools from endpoints.common import parse_repository_name from endpoints.decorators import anon_protect diff --git a/endpoints/v2/tag.py b/endpoints/v2/tag.py index 6f2180b7c..6b1ce20ad 100644 --- a/endpoints/v2/tag.py +++ b/endpoints/v2/tag.py @@ -5,7 +5,7 @@ from endpoints.common import parse_repository_name from endpoints.v2 import v2_bp, require_repo_read, paginate from endpoints.v2.errors import NameUnknown from endpoints.decorators import anon_protect -from data.interfaces.v2 import PreOCIModel as model +from data.interfaces.v2 import pre_oci_model as model @v2_bp.route('//tags/list', methods=['GET']) @parse_repository_name() diff --git a/endpoints/v2/v2auth.py b/endpoints/v2/v2auth.py index 14e0b93fa..83445fd81 100644 --- a/endpoints/v2/v2auth.py +++ b/endpoints/v2/v2auth.py @@ -11,7 +11,7 @@ from auth.permissions import (ModifyRepositoryPermission, ReadRepositoryPermissi CreateRepositoryPermission) from endpoints.v2 import v2_bp from endpoints.decorators import anon_protect -from data.interfaces.v2 import PreOCIModel as model +from data.interfaces.v2 import pre_oci_model as model from util.cache import no_cache from util.names import parse_namespace_repository, REPOSITORY_NAME_REGEX from util.security.registry_jwt import generate_bearer_token, build_context_and_subject diff --git a/endpoints/verbs/__init__.py b/endpoints/verbs/__init__.py index 30f41a9ae..fadf63bbc 100644 --- a/endpoints/verbs/__init__.py +++ b/endpoints/verbs/__init__.py @@ -10,7 +10,7 @@ from auth.auth import process_auth from auth.auth_context import get_authenticated_user from auth.permissions import ReadRepositoryPermission from data import database -from data.interfaces.verbs import PreOCIModel as model +from data.interfaces.verbs import pre_oci_model as model from endpoints.common import route_show_if, parse_repository_name from endpoints.decorators import anon_protect from endpoints.trackhelper import track_and_log