diff --git a/data/database.py b/data/database.py index 935fad2a6..d93bba30f 100644 --- a/data/database.py +++ b/data/database.py @@ -1,3 +1,5 @@ +# pylint: disable=old-style-class,no-init + import inspect import logging import string @@ -9,7 +11,6 @@ from collections import defaultdict from datetime import datetime from random import SystemRandom -import resumablehashlib import toposort from enum import Enum @@ -18,6 +19,8 @@ from playhouse.shortcuts import RetryOperationalError from sqlalchemy.engine.url import make_url +import resumablehashlib + from data.fields import ResumableSHA256Field, ResumableSHA1Field, JSONField, Base64BinaryField from data.read_slave import ReadSlaveModel from util.names import urn_generator @@ -115,11 +118,11 @@ def delete_instance_filtered(instance, model_class, delete_nullable, skip_transi with db_transaction(): for query, fk in filtered_ops: - model = fk.model_class + _model = fk.model_class if fk.null and not delete_nullable: - model.update(**{fk.name: None}).where(query).execute() + _model.update(**{fk.name: None}).where(query).execute() else: - model.delete().where(query).execute() + _model.delete().where(query).execute() return instance.delete().where(instance._pk_expr()).execute() @@ -144,12 +147,12 @@ class CloseForLongOperation(object): self.config_object = config_object def __enter__(self): - if self.config_object.get('TESTING') == True: + if self.config_object.get('TESTING') is True: return close_db_filter(None) - def __exit__(self, type, value, traceback): + def __exit__(self, typ, value, traceback): # Note: Nothing to do. The next SQL call will reconnect automatically. pass @@ -163,7 +166,7 @@ class UseThenDisconnect(object): def __enter__(self): configure(self.config_object) - def __exit__(self, type, value, traceback): + def __exit__(self, typ, value, traceback): close_db_filter(None) @@ -279,6 +282,7 @@ def uuid_generator(): get_epoch_timestamp = lambda: int(time.time()) +get_epoch_timestamp_ms = lambda: int(time.time() * 1000) def close_db_filter(_): @@ -295,7 +299,7 @@ class QuayUserField(ForeignKeyField): def __init__(self, allows_robots=False, robot_null_delete=False, *args, **kwargs): self.allows_robots = allows_robots self.robot_null_delete = robot_null_delete - if not 'rel_model' in kwargs: + if 'rel_model' not in kwargs: kwargs['rel_model'] = User super(QuayUserField, self).__init__(*args, **kwargs) @@ -341,12 +345,12 @@ class User(BaseModel): # For all the model dependencies, only delete those that allow robots. for query, fk in reversed(list(self.dependencies(search_nullable=True))): if isinstance(fk, QuayUserField) and fk.allows_robots: - model = fk.model_class + _model = fk.model_class if fk.robot_null_delete: - model.update(**{fk.name: None}).where(query).execute() + _model.update(**{fk.name: None}).where(query).execute() else: - model.delete().where(query).execute() + _model.delete().where(query).execute() # Delete the instance itself. super(User, self).delete_instance(recursive=False, delete_nullable=False) @@ -494,7 +498,7 @@ class PermissionPrototype(BaseModel): uuid = CharField(default=uuid_generator) activating_user = QuayUserField(allows_robots=True, index=True, null=True, related_name='userpermissionproto') - delegate_user = QuayUserField(allows_robots=True,related_name='receivingpermission', + delegate_user = QuayUserField(allows_robots=True, related_name='receivingpermission', null=True) delegate_team = ForeignKeyField(Team, related_name='receivingpermission', null=True) @@ -989,5 +993,209 @@ class TagManifestLabel(BaseModel): ) +''' + +class ManifestLabel(BaseModel): + repository = ForeignKeyField(Repository, index=True) + annotated = ForeignKeyField(Manifest, index=True) + label = ForeignKeyField(Label) + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('repository', 'annotated', 'label'), True), + ) + + +class Blob(BaseModel): + """ Blob represents a content-addressable object stored outside of the database. """ + digest = CharField(index=True, unique=True) + media_type = ForeignKeyField(MediaType) + size = BigIntegerField() + uncompressed_size = BigIntegerField(null=True) + + +class BlobPlacementLocation(BaseModel): + """ BlobPlacementLocation is an enumeration of the possible storage locations for Blobs. """ + name = CharField(index=True, unique=True) + + +class BlobPlacementLocationPreference(BaseModel): + """ BlobPlacementLocationPreference is a location to which a user's data will be replicated. """ + user = QuayUserField(index=True, allows_robots=False) + location = ForeignKeyField(BlobPlacementLocation) + + +class BlobPlacement(BaseModel): + """ BlobPlacement represents the location of a Blob. """ + blob = ForeignKeyField(Blob) + location = ForeignKeyField(BlobPlacementLocation) + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('blob', 'location'), True), + ) + + +class BlobUploading(BaseModel): + """ BlobUploading represents the state of a Blob currently being uploaded. """ + uuid = CharField(index=True, unique=True) + created = DateTimeField(default=datetime.now, index=True) + repository = ForeignKeyField(Repository, index=True) + location = ForeignKeyField(BlobPlacementLocation) + byte_count = IntegerField(default=0) + uncompressed_byte_count = IntegerField(null=True) + chunk_count = IntegerField(default=0) + storage_metadata = JSONField(null=True, default={}) + sha_state = ResumableSHA256Field(null=True, default=resumablehashlib.sha256) + piece_sha_state = ResumableSHA1Field(null=True) + piece_hashes = Base64BinaryField(null=True) + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('repository', 'uuid'), True), + ) + + +class Manifest(BaseModel): + """ Manifest represents the metadata and collection of blobs that comprise a container image. """ + digest = CharField(index=True, unique=True) + media_type = ForeignKeyField(MediaType) + manifest_json = JSONField() + + +class ManifestBlob(BaseModel): + """ ManifestBlob is a many-to-many relation table linking Manifests and Blobs. """ + manifest = ForeignKeyField(Manifest, index=True) + blob = ForeignKeyField(Blob, index=True) + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('manifest', 'blob'), True), + ) + + +class ManifestList(BaseModel): + """ ManifestList represents all of the various manifests that compose a Tag. """ + digest = CharField(index=True, unique=True) + manifest_list_json = JSONField() + schema_version = CharField() + media_type = ForeignKeyField(MediaType) + + +class ManifestListManifest(BaseModel): + """ ManifestListManifest is a many-to-many relation table linking ManifestLists and Manifests. """ + manifest_list = ForeignKeyField(ManifestList, index=True) + manifest = ForeignKeyField(Manifest, index=True) + operating_system = CharField() + architecture = CharField() + platform_json = JSONField() + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('manifest_list', 'operating_system', 'architecture'), False), + ) + + +class ManifestLayer(BaseModel): + """ ManifestLayer represents one of the layers that compose a Manifest. """ + blob = ForeignKeyField(Blob, index=True) + manifest = ForeignKeyField(Manifest) + manifest_index = IntegerField(index=True) # index 0 is the last command in a Dockerfile + metadata_json = JSONField() + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('manifest', 'manifest_index'), True), + ) + + +class ManifestLayerDockerV1(BaseModel): + """ ManifestLayerDockerV1 is the Docker v1 registry protocol metadata for a ManifestLayer. """ + manifest_layer = ForeignKeyField(ManifestLayer) + image_id = CharField(index=True) + checksum = CharField() + compat_json = JSONField() + + +class ManifestLayerScan(BaseModel): + """ ManifestLayerScan represents the state of security scanning for a ManifestLayer. """ + layer = ForeignKeyField(ManifestLayer, unique=True) + scannable = BooleanField() + scanned_by = CharField() + + +class DerivedImage(BaseModel): + """ DerivedImage represents a Manifest transcoded into an alternative format. """ + uuid = CharField(default=uuid_generator, unique=True) + source_manifest = ForeignKeyField(Manifest) + derived_manifest_json = JSONField() + media_type = ForeignKeyField(MediaType) + blob = ForeignKeyField(Blob, related_name='blob') + uniqueness_hash = CharField(index=True, unique=True) + signature_blob = ForeignKeyField(Blob, null=True, related_name='signature_blob') + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('source_manifest', 'blob'), True), + (('source_manifest', 'media_type', 'uniqueness_hash'), True), + ) + + +class Tag(BaseModel): + """ Tag represents a user-facing alias for referencing a ManifestList. """ + name = CharField() + repository = ForeignKeyField(Repository) + manifest_list = ForeignKeyField(ManifestList) + lifetime_start = BigIntegerField(default=get_epoch_timestamp_ms) + lifetime_end = BigIntegerField(null=True, index=True) + hidden = BooleanField(default=False) + reverted = BooleanField(default=False) + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('repository', 'name'), False), + (('repository', 'name', 'hidden') False), + + # This unique index prevents deadlocks when concurrently moving and deleting tags + (('repository', 'name', 'lifetime_end'), True), + ) + + +class BitTorrentPieces(BaseModel): + """ BitTorrentPieces represents the BitTorrent piece metadata calculated from a Blob. """ + blob = ForeignKeyField(Blob) + pieces = Base64BinaryField() + piece_length = IntegerField() + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('blob', 'piece_length'), True), + ) + + +beta_classes = set([ManifestLayerScan, Tag, BlobPlacementLocation, ManifestLayer, ManifestList, + BitTorrentPieces, MediaType, Label, ManifestBlob, BlobUploading, Blob, + ManifestLayerDockerV1, BlobPlacementLocationPreference, ManifestListManifest, + Manifest, DerivedImage, BlobPlacement]) +''' + is_model = lambda x: inspect.isclass(x) and issubclass(x, BaseModel) and x is not BaseModel all_models = [model[1] for model in inspect.getmembers(sys.modules[__name__], is_model)] diff --git a/formats/__init__.py b/data/interfaces/__init__.py similarity index 100% rename from formats/__init__.py rename to data/interfaces/__init__.py diff --git a/data/interfaces/key_server.py b/data/interfaces/key_server.py new file mode 100644 index 000000000..b9b6d324b --- /dev/null +++ b/data/interfaces/key_server.py @@ -0,0 +1,124 @@ +from abc import ABCMeta, abstractmethod +from collections import namedtuple + +from six import add_metaclass + +import data.model + + +class ServiceKey(namedtuple('ServiceKey', ['name', 'kid', 'service', 'jwk', 'metadata', + 'created_date', 'expiration_date', 'rotation_duration', + 'approval'])): + """ + Service Key represents a public key (JWK) being used by an instance of a particular service to + authenticate with other services. + """ + pass + + +class ServiceKeyException(Exception): + pass + + +class ServiceKeyDoesNotExist(ServiceKeyException): + pass + + +# TODO(jzelinskie): maybe make this interface support superuser API +@add_metaclass(ABCMeta) +class KeyServerDataInterface(object): + """ + Interface that represents all data store interactions required by a JWT key service. + """ + + @abstractmethod + def list_service_keys(self, service): + """ + Returns a list of service keys or an empty list if the service does not exist. + """ + pass + + @abstractmethod + def get_service_key(self, signer_kid, service=None, alive_only=None, approved_only=None): + """ + Returns a service kid with the given kid or raises ServiceKeyNotFound. + """ + pass + + @abstractmethod + def create_service_key(self, name, kid, service, jwk, metadata, expiration_date, + rotation_duration=None): + """ + Stores a service key. + """ + pass + + @abstractmethod + def replace_service_key(self, old_kid, kid, jwk, metadata, expiration_date): + """ + Replaces a service with a new key or raises ServiceKeyNotFound. + """ + pass + + @abstractmethod + def delete_service_key(self, kid): + """ + Deletes and returns a service key with the given kid or raises ServiceKeyNotFound. + """ + pass + + +class PreOCIModel(KeyServerDataInterface): + """ + PreOCIModel implements the data model for JWT key service using a database schema before it was + changed to support the OCI specification. + """ + def list_service_keys(self, service): + return data.model.service_keys.list_service_keys(service) + + def get_service_key(self, signer_kid, service=None, alive_only=True, approved_only=True): + try: + key = data.model.service_keys.get_service_key(signer_kid, service, alive_only, approved_only) + return _db_key_to_servicekey(key) + except data.model.ServiceKeyDoesNotExist: + raise ServiceKeyDoesNotExist() + + def create_service_key(self, name, kid, service, jwk, metadata, expiration_date, + rotation_duration=None): + key = data.model.service_keys.create_service_key(name, kid, service, jwk, metadata, + expiration_date, rotation_duration) + return _db_key_to_servicekey(key) + + def replace_service_key(self, old_kid, kid, jwk, metadata, expiration_date): + try: + data.model.service_keys.replace_service_key(old_kid, kid, jwk, metadata, expiration_date) + except data.model.ServiceKeyDoesNotExist: + raise ServiceKeyDoesNotExist() + + def delete_service_key(self, kid): + try: + key = data.model.service_keys.delete_service_key(kid) + return _db_key_to_servicekey(key) + except data.model.ServiceKeyDoesNotExist: + raise ServiceKeyDoesNotExist() + + +pre_oci_model = PreOCIModel() + + +def _db_key_to_servicekey(key): + """ + Converts the Pre-OCI database model of a service key into a ServiceKey. + """ + return ServiceKey( + name=key.name, + kid=key.kid, + service=key.service, + jwk=key.jwk, + metadata=key.metadata, + created_date=key.created_date, + expiration_date=key.expiration_date, + rotation_duration=key.rotation_duration, + approval=key.approval, + ) + diff --git a/data/interfaces/v1.py b/data/interfaces/v1.py new file mode 100644 index 000000000..8aeb2a008 --- /dev/null +++ b/data/interfaces/v1.py @@ -0,0 +1,418 @@ +from abc import ABCMeta, abstractmethod +from collections import namedtuple + +from six import add_metaclass + +from app import app, storage as store +from data import model +from data.model import db_transaction +from util.morecollections import AttrDict + + +class Repository(namedtuple('Repository', ['id', 'name', 'namespace_name', 'description', + 'is_public'])): + """ + Repository represents a namespaced collection of tags. + """ + + +@add_metaclass(ABCMeta) +class DockerRegistryV1DataInterface(object): + """ + Interface that represents all data store interactions required by a Docker Registry v1. + """ + + @abstractmethod + def placement_locations_docker_v1(self, namespace_name, repo_name, image_id): + """ + Returns all the placements for the image with the given V1 Docker ID, found under the given + repository or None if no image was found. + """ + pass + + @abstractmethod + def placement_locations_and_path_docker_v1(self, namespace_name, repo_name, image_id): + """ + Returns all the placements for the image with the given V1 Docker ID, found under the given + repository or None if no image was found. + """ + pass + + @abstractmethod + def docker_v1_metadata(self, namespace_name, repo_name, image_id): + """ + Returns various pieces of metadata associated with an image with the given V1 Docker ID, + including the checksum and its V1 JSON metadata. + """ + pass + + @abstractmethod + def update_docker_v1_metadata(self, namespace_name, repo_name, image_id, created_date_str, + comment, command, compat_json, parent_image_id=None): + """ + Updates various pieces of V1 metadata associated with a particular image. + """ + pass + + @abstractmethod + def storage_exists(self, namespace_name, repo_name, image_id): + """ + Returns whether storage already exists for the image with the V1 Docker ID under the given + repository. + """ + pass + + @abstractmethod + def store_docker_v1_checksums(self, namespace_name, repo_name, image_id, checksum, + content_checksum): + """ + Stores the various V1 checksums for the image with the V1 Docker ID. + """ + pass + + @abstractmethod + def is_image_uploading(self, namespace_name, repo_name, image_id): + """ + Returns whether the image with the V1 Docker ID is currently marked as uploading. + """ + pass + + @abstractmethod + def update_image_uploading(self, namespace_name, repo_name, image_id, is_uploading): + """ + Marks the image with the V1 Docker ID with the given uploading status. + """ + pass + + @abstractmethod + def update_image_sizes(self, namespace_name, repo_name, image_id, size, uncompressed_size): + """ + Updates the sizing information for the image with the given V1 Docker ID. + """ + pass + + @abstractmethod + def get_image_size(self, namespace_name, repo_name, image_id): + """ + Returns the wire size of the image with the given Docker V1 ID. + """ + pass + + @abstractmethod + def create_bittorrent_pieces(self, namespace_name, repo_name, image_id, pieces_bytes): + """ + Saves the BitTorrent piece hashes for the image with the given Docker V1 ID. + """ + pass + + @abstractmethod + def image_ancestry(self, namespace_name, repo_name, image_id): + """ + Returns a list containing the full ancestry of Docker V1 IDs, in order, for the image with the + given Docker V1 ID. + """ + pass + + @abstractmethod + def repository_exists(self, namespace_name, repo_name): + """ + Returns whether the repository with the given name and namespace exists. + """ + pass + + @abstractmethod + def create_or_link_image(self, username, namespace_name, repo_name, image_id, storage_location): + """ + Adds the given image to the given repository, by either linking to an existing image visible to + the user with the given username, or creating a new one if no existing image matches. + """ + pass + + @abstractmethod + def create_temp_hidden_tag(self, namespace_name, repo_name, image_id, expiration): + """ + Creates a hidden tag under the matching namespace pointing to the image with the given V1 Docker + ID. + """ + pass + + @abstractmethod + def list_tags(self, namespace_name, repo_name): + """ + Returns all the tags defined in the repository with the given namespace and name. + """ + pass + + @abstractmethod + def create_or_update_tag(self, namespace_name, repo_name, image_id, tag_name): + """ + Creates or updates a tag under the matching repository to point to the image with the given + Docker V1 ID. + """ + pass + + @abstractmethod + def find_image_id_by_tag(self, namespace_name, repo_name, tag_name): + """ + Returns the Docker V1 image ID for the HEAD image for the tag with the given name under the + matching repository, or None if none. + """ + pass + + @abstractmethod + def delete_tag(self, namespace_name, repo_name, tag_name): + """ + Deletes the given tag from the given repository. + """ + pass + + @abstractmethod + def load_token(self, token): + """ + Loads the data associated with the given (deprecated) access token, and, if + found returns True. + """ + pass + + @abstractmethod + def verify_robot(self, username, token): + """ + Returns True if the given robot username and token match an existing robot + account. + """ + pass + + @abstractmethod + def change_user_password(self, user, new_password): + """ + Changes the password associated with the given user. + """ + pass + + @abstractmethod + def get_repository(self, namespace_name, repo_name): + """ + Returns the repository with the given name under the given namespace or None + if none. + """ + pass + + @abstractmethod + def create_repository(self, namespace_name, repo_name, user=None): + """ + Creates a new repository under the given namespace with the given name, for + the given user. + """ + pass + + @abstractmethod + def repository_is_public(self, namespace_name, repo_name): + """ + Returns whether the repository with the given name under the given namespace + is public. If no matching repository was found, returns False. + """ + pass + + @abstractmethod + def validate_oauth_token(self, token): + """ Returns whether the given OAuth token validates. """ + pass + + @abstractmethod + def get_sorted_matching_repositories(self, search_term, only_public, can_read, limit): + """ + Returns a sorted list of repositories matching the given search term. + can_read is a callback that will be invoked for each repository found, to + filter results to only those visible to the current user (if any). + """ + pass + + +class PreOCIModel(DockerRegistryV1DataInterface): + """ + PreOCIModel implements the data model for the v1 Docker Registry protocol using a database schema + before it was changed to support the OCI specification. + """ + def placement_locations_docker_v1(self, namespace_name, repo_name, image_id): + repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) + if repo_image is None or repo_image.storage is None: + return None + return repo_image.storage.locations + + def placement_locations_and_path_docker_v1(self, namespace_name, repo_name, image_id): + repo_image = model.image.get_repo_image_extended(namespace_name, repo_name, image_id) + if not repo_image or repo_image.storage is None: + return None, None + return repo_image.storage.locations, model.storage.get_layer_path(repo_image.storage) + + def docker_v1_metadata(self, namespace_name, repo_name, image_id): + repo_image = model.image.get_repo_image(namespace_name, repo_name, image_id) + if repo_image is None: + return None + + return AttrDict({ + 'namespace_name': namespace_name, + 'repo_name': repo_name, + 'image_id': image_id, + 'checksum': repo_image.v1_checksum, + 'compat_json': repo_image.v1_json_metadata, + }) + + def update_docker_v1_metadata(self, namespace_name, repo_name, image_id, created_date_str, + comment, command, compat_json, parent_image_id=None): + parent_image = None + if parent_image_id is not None: + parent_image = model.image.get_repo_image(namespace_name, repo_name, parent_image_id) + + model.image.set_image_metadata(image_id, namespace_name, repo_name, created_date_str, comment, + command, compat_json, parent=parent_image) + + def storage_exists(self, namespace_name, repo_name, image_id): + repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) + if repo_image is None or repo_image.storage is None: + return False + + if repo_image.storage.uploading: + return False + + layer_path = model.storage.get_layer_path(repo_image.storage) + return store.exists(repo_image.storage.locations, layer_path) + + def store_docker_v1_checksums(self, namespace_name, repo_name, image_id, checksum, + content_checksum): + repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) + if repo_image is None or repo_image.storage is None: + return + + with db_transaction(): + repo_image.storage.content_checksum = content_checksum + repo_image.v1_checksum = checksum + repo_image.storage.save() + repo_image.save() + + def is_image_uploading(self, namespace_name, repo_name, image_id): + repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) + if repo_image is None or repo_image.storage is None: + return False + return repo_image.storage.uploading + + def update_image_uploading(self, namespace_name, repo_name, image_id, is_uploading): + repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) + if repo_image is None or repo_image.storage is None: + return + + repo_image.storage.uploading = is_uploading + repo_image.storage.save() + return repo_image.storage + + def update_image_sizes(self, namespace_name, repo_name, image_id, size, uncompressed_size): + model.storage.set_image_storage_metadata(image_id, namespace_name, repo_name, size, + uncompressed_size) + + def get_image_size(self, namespace_name, repo_name, image_id): + repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) + if repo_image is None or repo_image.storage is None: + return None + return repo_image.storage.image_size + + def create_bittorrent_pieces(self, namespace_name, repo_name, image_id, pieces_bytes): + repo_image = model.image.get_repo_image_and_storage(namespace_name, repo_name, image_id) + if repo_image is None or repo_image.storage is None: + return + + model.storage.save_torrent_info(repo_image.storage, app.config['BITTORRENT_PIECE_SIZE'], + pieces_bytes) + + def image_ancestry(self, namespace_name, repo_name, image_id): + try: + image = model.image.get_image_by_id(namespace_name, repo_name, image_id) + except model.InvalidImageException: + return None + + parents = model.image.get_parent_images(namespace_name, repo_name, image) + ancestry_docker_ids = [image.docker_image_id] + ancestry_docker_ids.extend([parent.docker_image_id for parent in parents]) + return ancestry_docker_ids + + def repository_exists(self, namespace_name, repo_name): + repo = model.repository.get_repository(namespace_name, repo_name) + return repo is not None + + def create_or_link_image(self, username, namespace_name, repo_name, image_id, storage_location): + repo = model.repository.get_repository(namespace_name, repo_name) + model.image.find_create_or_link_image(image_id, repo, username, {}, storage_location) + + def create_temp_hidden_tag(self, namespace_name, repo_name, image_id, expiration): + repo_image = model.image.get_repo_image(namespace_name, repo_name, image_id) + if repo_image is None: + return + + repo = repo_image.repository + model.tag.create_temporary_hidden_tag(repo, repo_image, expiration) + + def list_tags(self, namespace_name, repo_name): + return model.tag.list_repository_tags(namespace_name, repo_name) + + def create_or_update_tag(self, namespace_name, repo_name, image_id, tag_name): + model.tag.create_or_update_tag(namespace_name, repo_name, tag_name, image_id) + + def find_image_id_by_tag(self, namespace_name, repo_name, tag_name): + try: + tag_image = model.tag.get_tag_image(namespace_name, repo_name, tag_name) + except model.DataModelException: + return None + + return tag_image.docker_image_id + + def delete_tag(self, namespace_name, repo_name, tag_name): + model.tag.delete_tag(namespace_name, repo_name, tag_name) + + def load_token(self, token): + try: + model.token.load_token_data(token) + return True + except model.InvalidTokenException: + return False + + def verify_robot(self, username, token): + try: + return bool(model.user.verify_robot(username, token)) + except model.InvalidRobotException: + return False + + def change_user_password(self, user, new_password): + model.user.change_password(user, new_password) + + def get_repository(self, namespace_name, repo_name): + repo = model.repository.get_repository(namespace_name, repo_name) + if repo is None: + return None + return _repository_for_repo(repo) + + def create_repository(self, namespace_name, repo_name, user=None): + model.repository.create_repository(namespace_name, repo_name, user) + + def repository_is_public(self, namespace_name, repo_name): + return model.repository.repository_is_public(namespace_name, repo_name) + + def validate_oauth_token(self, token): + return bool(model.oauth.validate_access_token(token)) + + def get_sorted_matching_repositories(self, search_term, only_public, can_read, limit): + repos = model.repository.get_sorted_matching_repositories(search_term, only_public, can_read, + limit=limit) + return [_repository_for_repo(repo) for repo in repos] + + +def _repository_for_repo(repo): + """ Returns a Repository object representing the Pre-OCI data model instance of a repository. """ + return Repository( + id=repo.id, + name=repo.name, + namespace_name=repo.namespace_user.username, + description=repo.description, + is_public=model.repository.is_repository_public(repo) + ) + + +pre_oci_model = PreOCIModel() diff --git a/data/interfaces/v2.py b/data/interfaces/v2.py new file mode 100644 index 000000000..cb16334d6 --- /dev/null +++ b/data/interfaces/v2.py @@ -0,0 +1,545 @@ +from abc import ABCMeta, abstractmethod +from collections import namedtuple + +from namedlist import namedlist +from peewee import IntegrityError +from six import add_metaclass + +from data import model, database +from data.model import DataModelException +from image.docker.v1 import DockerV1Metadata + +_MEDIA_TYPE = "application/vnd.docker.distribution.manifest.v1+prettyjws" + + +class Repository(namedtuple('Repository', ['id', 'name', 'namespace_name', 'description', + 'is_public'])): + """ + Repository represents a namespaced collection of tags. + """ + +class ManifestJSON(namedtuple('ManifestJSON', ['digest', 'json', 'media_type'])): + """ + ManifestJSON represents a Manifest of any format. + """ + + +class Tag(namedtuple('Tag', ['name', 'repository'])): + """ + Tag represents a user-facing alias for referencing a set of Manifests. + """ + + +class BlobUpload(namedlist('BlobUpload', ['uuid', 'byte_count', 'uncompressed_byte_count', + 'chunk_count', 'sha_state', 'location_name', + 'storage_metadata', 'piece_sha_state', 'piece_hashes', + 'repo_namespace_name', 'repo_name'])): + """ + BlobUpload represents the current state of an Blob being uploaded. + """ + + +class Blob(namedtuple('Blob', ['uuid', 'digest', 'size', 'locations'])): + """ + Blob represents an opaque binary blob saved to the storage system. + """ + + +class RepositoryReference(namedtuple('RepositoryReference', ['id', 'name', 'namespace_name'])): + """ + RepositoryReference represents a reference to a Repository, without its full metadata. + """ + +class Label(namedtuple('Label', ['key', 'value', 'source_type', 'media_type'])): + """ + Label represents a key-value pair that describes a particular Manifest. + """ + + +@add_metaclass(ABCMeta) +class DockerRegistryV2DataInterface(object): + """ + Interface that represents all data store interactions required by a Docker Registry v1. + """ + + @abstractmethod + def create_repository(self, namespace_name, repo_name, creating_user=None): + """ + Creates a new repository under the specified namespace with the given name. The user supplied is + the user creating the repository, if any. + """ + pass + + @abstractmethod + def repository_is_public(self, namespace_name, repo_name): + """ + Returns true if the repository with the given name under the given namespace has public + visibility. + """ + pass + + @abstractmethod + def get_repository(self, namespace_name, repo_name): + """ + Returns a repository tuple for the repository with the given name under the given namespace. + Returns None if no such repository was found. + """ + pass + + @abstractmethod + def has_active_tag(self, namespace_name, repo_name, tag_name): + """ + Returns whether there is an active tag for the tag with the given name under the matching + repository, if any, or none if none. + """ + pass + + @abstractmethod + def get_manifest_by_tag(self, namespace_name, repo_name, tag_name): + """ + Returns the current manifest for the tag with the given name under the matching repository, if + any, or None if none. + """ + pass + + @abstractmethod + def get_manifest_by_digest(self, namespace_name, repo_name, digest): + """ + Returns the manifest matching the given digest under the matching repository, if any, or None if + none. + """ + pass + + @abstractmethod + def delete_manifest_by_digest(self, namespace_name, repo_name, digest): + """ + Deletes the manifest with the associated digest (if any) and returns all removed tags that + pointed to that manifest. If the manifest was not found, returns an empty list. + """ + pass + + @abstractmethod + def get_docker_v1_metadata_by_tag(self, namespace_name, repo_name, tag_name): + """ + Returns the Docker V1 metadata associated with the tag with the given name under the matching + repository, if any. If none, returns None. + """ + pass + + @abstractmethod + def get_docker_v1_metadata_by_image_id(self, namespace_name, repo_name, docker_image_ids): + """ + Returns a map of Docker V1 metadata for each given image ID, matched under the repository with + the given namespace and name. Returns an empty map if the matching repository was not found. + """ + pass + + @abstractmethod + def get_parents_docker_v1_metadata(self, namespace_name, repo_name, docker_image_id): + """ + Returns an ordered list containing the Docker V1 metadata for each parent of the image with the + given docker ID under the matching repository. Returns an empty list if the image was not found. + """ + pass + + @abstractmethod + def create_manifest_and_update_tag(self, namespace_name, repo_name, tag_name, manifest_digest, + manifest_bytes): + """ + Creates a new manifest with the given digest and byte data, and assigns the tag with the given + name under the matching repository to it. + """ + pass + + @abstractmethod + def synthesize_v1_image(self, repository, storage, image_id, created, comment, command, + compat_json, parent_image_id): + """ + Synthesizes a V1 image under the specified repository, pointing to the given storage and returns + the V1 metadata for the synthesized image. + """ + pass + + @abstractmethod + def save_manifest(self, namespace_name, repo_name, tag_name, leaf_layer_docker_id, + manifest_digest, manifest_bytes): + """ + Saves a manifest pointing to the given leaf image, with the given manifest, under the matching + repository as a tag with the given name. + + Returns a boolean whether or not the tag was newly created or not. + """ + pass + + @abstractmethod + def repository_tags(self, namespace_name, repo_name, limit, offset): + """ + Returns the active tags under the repository with the given name and namespace. + """ + pass + + @abstractmethod + def get_visible_repositories(self, username, limit, offset): + """ + Returns the repositories visible to the user with the given username, if any. + """ + pass + + @abstractmethod + def create_blob_upload(self, namespace_name, repo_name, upload_uuid, location_name, storage_metadata): + """ + Creates a blob upload under the matching repository with the given UUID and metadata. + Returns whether the matching repository exists. + """ + pass + + @abstractmethod + def blob_upload_by_uuid(self, namespace_name, repo_name, upload_uuid): + """ + Searches for a blob upload with the given UUID under the given repository and returns it or None + if none. + """ + pass + + @abstractmethod + def update_blob_upload(self, blob_upload): + """ + Saves any changes to the blob upload object given to the backing data store. + Fields that can change: + - uncompressed_byte_count + - piece_hashes + - piece_sha_state + - storage_metadata + - byte_count + - chunk_count + - sha_state + """ + pass + + @abstractmethod + def delete_blob_upload(self, namespace_name, repo_name, uuid): + """ + Deletes the blob upload with the given uuid under the matching repository. If none, does + nothing. + """ + pass + + @abstractmethod + def create_blob_and_temp_tag(self, namespace_name, repo_name, blob_digest, blob_upload, + expiration_sec): + """ + Creates a blob and links a temporary tag with the specified expiration to it under the matching + repository. + """ + pass + + @abstractmethod + def get_blob_by_digest(self, namespace_name, repo_name, digest): + """ + Returns the blob with the given digest under the matching repository or None if none. + """ + pass + + @abstractmethod + def save_bittorrent_pieces(self, blob, piece_size, piece_bytes): + """ + Saves the BitTorrent piece hashes for the given blob. + """ + pass + + @abstractmethod + def create_manifest_labels(self, namespace_name, repo_name, manifest_digest, labels): + """ + Creates a new labels for the provided manifest. + """ + pass + + + @abstractmethod + def get_blob_path(self, blob): + """ + Once everything is moved over, this could be in util.registry and not even touch the database. + """ + pass + + +class PreOCIModel(DockerRegistryV2DataInterface): + """ + PreOCIModel implements the data model for the v2 Docker Registry protocol using a database schema + before it was changed to support the OCI specification. + """ + def create_repository(self, namespace_name, repo_name, creating_user=None): + return model.repository.create_repository(namespace_name, repo_name, creating_user) + + def repository_is_public(self, namespace_name, repo_name): + return model.repository.repository_is_public(namespace_name, repo_name) + + def get_repository(self, namespace_name, repo_name): + repo = model.repository.get_repository(namespace_name, repo_name) + if repo is None: + return None + return _repository_for_repo(repo) + + def has_active_tag(self, namespace_name, repo_name, tag_name): + try: + model.tag.get_active_tag(namespace_name, repo_name, tag_name) + return True + except database.RepositoryTag.DoesNotExist: + return False + + def get_manifest_by_tag(self, namespace_name, repo_name, tag_name): + try: + manifest = model.tag.load_tag_manifest(namespace_name, repo_name, tag_name) + return ManifestJSON(digest=manifest.digest, json=manifest.json_data, media_type=_MEDIA_TYPE) + except model.InvalidManifestException: + return None + + def get_manifest_by_digest(self, namespace_name, repo_name, digest): + try: + manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, digest) + return ManifestJSON(digest=digest, json=manifest.json_data, media_type=_MEDIA_TYPE) + except model.InvalidManifestException: + return None + + def delete_manifest_by_digest(self, namespace_name, repo_name, digest): + def _tag_view(tag): + return Tag( + name=tag.name, + repository=RepositoryReference( + id=tag.repository_id, + name=repo_name, + namespace_name=namespace_name, + ) + ) + + tags = model.tag.delete_manifest_by_digest(namespace_name, repo_name, digest) + return [_tag_view(tag) for tag in tags] + + def get_docker_v1_metadata_by_tag(self, namespace_name, repo_name, tag_name): + try: + repo_img = model.tag.get_tag_image(namespace_name, repo_name, tag_name, include_storage=True) + return _docker_v1_metadata(namespace_name, repo_name, repo_img) + except DataModelException: + return None + + def get_docker_v1_metadata_by_image_id(self, namespace_name, repo_name, docker_image_ids): + repo = model.repository.get_repository(namespace_name, repo_name) + if repo is None: + return {} + + images_query = model.image.lookup_repository_images(repo, docker_image_ids) + return {image.docker_image_id: _docker_v1_metadata(namespace_name, repo_name, image) + for image in images_query} + + def get_parents_docker_v1_metadata(self, namespace_name, repo_name, docker_image_id): + repo_image = model.image.get_repo_image(namespace_name, repo_name, docker_image_id) + if repo_image is None: + return [] + + parents = model.image.get_parent_images(namespace_name, repo_name, repo_image) + return [_docker_v1_metadata(namespace_name, repo_name, image) for image in parents] + + def create_manifest_and_update_tag(self, namespace_name, repo_name, tag_name, manifest_digest, + manifest_bytes): + try: + model.tag.associate_generated_tag_manifest(namespace_name, repo_name, tag_name, + manifest_digest, manifest_bytes) + except IntegrityError: + # It's already there! + pass + + def synthesize_v1_image(self, repository, storage, image_id, created, comment, command, + compat_json, parent_image_id): + repo = model.repository.get_repository(repository.namespace_name, repository.name) + if repo is None: + raise DataModelException('Unknown repository: %s/%s' % (repository.namespace_name, + repository.name)) + + parent_image = None + if parent_image_id is not None: + parent_image = model.image.get_image(repo, parent_image_id) + if parent_image is None: + raise DataModelException('Unknown parent image: %s' % parent_image_id) + + storage_obj = model.storage.get_storage_by_uuid(storage.uuid) + if storage_obj is None: + raise DataModelException('Unknown storage: %s' % storage.uuid) + + repo_image = model.image.synthesize_v1_image(repo, storage_obj, image_id, created, comment, + command, compat_json, parent_image) + return _docker_v1_metadata(repo.namespace_user.username, repo.name, repo_image) + + def save_manifest(self, namespace_name, repo_name, tag_name, leaf_layer_docker_id, + manifest_digest, manifest_bytes): + (_, newly_created) = model.tag.store_tag_manifest(namespace_name, repo_name, tag_name, + leaf_layer_docker_id, manifest_digest, + manifest_bytes) + return newly_created + + def repository_tags(self, namespace_name, repo_name, limit, offset): + def _tag_view(tag): + return Tag( + name=tag.name, + repository=RepositoryReference( + id=tag.repository_id, + name=repo_name, + namespace_name=namespace_name, + ) + ) + + tags_query = model.tag.list_repository_tags(namespace_name, repo_name) + tags_query = tags_query.limit(limit).offset(offset) + return [_tag_view(tag) for tag in tags_query] + + def get_visible_repositories(self, username, limit, offset): + query = model.repository.get_visible_repositories(username, include_public=(username is None)) + query = query.limit(limit).offset(offset) + return [_repository_for_repo(repo) for repo in query] + + def create_blob_upload(self, namespace_name, repo_name, upload_uuid, location_name, storage_metadata): + try: + model.blob.initiate_upload(namespace_name, repo_name, upload_uuid, location_name, + storage_metadata) + return True + except database.Repository.DoesNotExist: + return False + + def blob_upload_by_uuid(self, namespace_name, repo_name, upload_uuid): + try: + found = model.blob.get_blob_upload(namespace_name, repo_name, upload_uuid) + except model.InvalidBlobUpload: + return None + + return BlobUpload( + repo_namespace_name=namespace_name, + repo_name=repo_name, + uuid=upload_uuid, + byte_count=found.byte_count, + uncompressed_byte_count=found.uncompressed_byte_count, + chunk_count=found.chunk_count, + sha_state=found.sha_state, + piece_sha_state=found.piece_sha_state, + piece_hashes=found.piece_hashes, + location_name=found.location.name, + storage_metadata=found.storage_metadata, + ) + + def update_blob_upload(self, blob_upload): + # Lookup the blob upload object. + try: + blob_upload_record = model.blob.get_blob_upload(blob_upload.repo_namespace_name, + blob_upload.repo_name, blob_upload.uuid) + except model.InvalidBlobUpload: + return + + blob_upload_record.uncompressed_byte_count = blob_upload.uncompressed_byte_count + blob_upload_record.piece_hashes = blob_upload.piece_hashes + blob_upload_record.piece_sha_state = blob_upload.piece_sha_state + blob_upload_record.storage_metadata = blob_upload.storage_metadata + blob_upload_record.byte_count = blob_upload.byte_count + blob_upload_record.chunk_count = blob_upload.chunk_count + blob_upload_record.sha_state = blob_upload.sha_state + blob_upload_record.save() + + def delete_blob_upload(self, namespace_name, repo_name, uuid): + try: + found = model.blob.get_blob_upload(namespace_name, repo_name, uuid) + found.delete_instance() + except model.InvalidBlobUpload: + return + + def create_blob_and_temp_tag(self, namespace_name, repo_name, blob_digest, blob_upload, + expiration_sec): + location_obj = model.storage.get_image_location_for_name(blob_upload.location_name) + blob_record = model.blob.store_blob_record_and_temp_link(namespace_name, repo_name, + blob_digest, location_obj.id, + blob_upload.byte_count, + expiration_sec, + blob_upload.uncompressed_byte_count) + return Blob( + uuid=blob_record.uuid, + digest=blob_digest, + size=blob_upload.byte_count, + locations=[blob_upload.location_name], + ) + + def lookup_blobs_by_digest(self, namespace_name, repo_name, digests): + def _blob_view(blob_record): + return Blob( + uuid=blob_record.uuid, + digest=blob_record.content_checksum, + size=blob_record.image_size, + locations=None, # Note: Locations is None in this case. + ) + + repo = model.repository.get_repository(namespace_name, repo_name) + if repo is None: + return {} + query = model.storage.lookup_repo_storages_by_content_checksum(repo, digests) + return {storage.content_checksum: _blob_view(storage) for storage in query} + + def get_blob_by_digest(self, namespace_name, repo_name, digest): + try: + blob_record = model.blob.get_repo_blob_by_digest(namespace_name, repo_name, digest) + return Blob( + uuid=blob_record.uuid, + digest=digest, + size=blob_record.image_size, + locations=blob_record.locations, + ) + except model.BlobDoesNotExist: + return None + + def save_bittorrent_pieces(self, blob, piece_size, piece_bytes): + blob_record = model.storage.get_storage_by_uuid(blob.uuid) + model.storage.save_torrent_info(blob_record, piece_size, piece_bytes) + + def create_manifest_labels(self, namespace_name, repo_name, manifest_digest, labels): + if not labels: + # No point in doing anything more. + return + + tag_manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, manifest_digest) + for label in labels: + model.label.create_manifest_label(tag_manifest, label.key, label.value, label.source_type, + label.media_type) + + def get_blob_path(self, blob): + blob_record = model.storage.get_storage_by_uuid(blob.uuid) + return model.storage.get_layer_path(blob_record) + + +def _docker_v1_metadata(namespace_name, repo_name, repo_image): + """ + Returns a DockerV1Metadata object for the given Pre-OCI repo_image under the + repository with the given namespace and name. Note that the namespace and + name are passed here as an optimization, and are *not checked* against the + image. + """ + return DockerV1Metadata( + namespace_name=namespace_name, + repo_name=repo_name, + image_id=repo_image.docker_image_id, + checksum=repo_image.v1_checksum, + content_checksum=repo_image.storage.content_checksum, + compat_json=repo_image.v1_json_metadata, + created=repo_image.created, + comment=repo_image.comment, + command=repo_image.command, + # TODO: make sure this isn't needed anywhere, as it is expensive to lookup + parent_image_id=None, + ) + + +def _repository_for_repo(repo): + """ Returns a Repository object representing the Pre-OCI data model repo instance given. """ + return Repository( + id=repo.id, + name=repo.name, + namespace_name=repo.namespace_user.username, + description=repo.description, + is_public=model.repository.is_repository_public(repo) + ) + + +pre_oci_model = PreOCIModel() diff --git a/data/interfaces/verbs.py b/data/interfaces/verbs.py new file mode 100644 index 000000000..f5758352e --- /dev/null +++ b/data/interfaces/verbs.py @@ -0,0 +1,322 @@ +import json + +from abc import ABCMeta, abstractmethod +from collections import namedtuple + +from six import add_metaclass + +from data import model +from image.docker.v1 import DockerV1Metadata + + +class DerivedImage(namedtuple('DerivedImage', ['ref', 'blob', 'internal_source_image_db_id'])): + """ + DerivedImage represents a user-facing alias for an image which was derived from another image. + """ + +class RepositoryReference(namedtuple('RepositoryReference', ['id', 'name', 'namespace_name'])): + """ + RepositoryReference represents a reference to a Repository, without its full metadata. + """ + +class ImageWithBlob(namedtuple('Image', ['image_id', 'blob', 'compat_metadata', 'repository', + 'internal_db_id', 'v1_metadata'])): + """ + ImageWithBlob represents a user-facing alias for referencing an image, along with its blob. + """ + +class Blob(namedtuple('Blob', ['uuid', 'size', 'uncompressed_size', 'uploading', 'locations'])): + """ + Blob represents an opaque binary blob saved to the storage system. + """ + +class TorrentInfo(namedtuple('TorrentInfo', ['piece_length', 'pieces'])): + """ + TorrentInfo represents the torrent piece information associated with a blob. + """ + + +@add_metaclass(ABCMeta) +class VerbsDataInterface(object): + """ + Interface that represents all data store interactions required by the registry's custom HTTP + verbs. + """ + @abstractmethod + def repository_is_public(self, namespace_name, repo_name): + """ + Returns a boolean for whether the repository with the given name and namespace is public. + """ + pass + + @abstractmethod + def get_manifest_layers_with_blobs(self, repo_image): + """ + Returns the full set of manifest layers and their associated blobs starting at the given + repository image and working upwards to the root image. + """ + pass + + @abstractmethod + def get_blob_path(self, blob): + """ + Returns the storage path for the given blob. + """ + pass + + @abstractmethod + def get_derived_image_signature(self, derived_image, signer_name): + """ + Returns the signature associated with the derived image and a specific signer or None if none. + """ + pass + + @abstractmethod + def set_derived_image_signature(self, derived_image, signer_name, signature): + """ + Sets the calculated signature for the given derived image and signer to that specified. + """ + pass + + @abstractmethod + def delete_derived_image(self, derived_image): + """ + Deletes a derived image and all of its storage. + """ + pass + + @abstractmethod + def set_blob_size(self, blob, size): + """ + Sets the size field on a blob to the value specified. + """ + pass + + @abstractmethod + def get_repo_blob_by_digest(self, namespace_name, repo_name, digest): + """ + Returns the blob with the given digest under the matching repository or None if none. + """ + pass + + @abstractmethod + def get_torrent_info(self, blob): + """ + Returns the torrent information associated with the given blob or None if none. + """ + pass + + @abstractmethod + def set_torrent_info(self, blob, piece_length, pieces): + """ + Sets the torrent infomation associated with the given blob to that specified. + """ + pass + + @abstractmethod + def lookup_derived_image(self, repo_image, verb, varying_metadata=None): + """ + Looks up the derived image for the given repository image, verb and optional varying metadata + and returns it or None if none. + """ + pass + + @abstractmethod + def lookup_or_create_derived_image(self, repo_image, verb, location, varying_metadata=None): + """ + Looks up the derived image for the given repository image, verb and optional varying metadata + and returns it. If none exists, a new derived image is created. + """ + pass + + @abstractmethod + def get_tag_image(self, namespace_name, repo_name, tag_name): + """ + Returns the image associated with the live tag with the given name under the matching repository + or None if none. + """ + pass + + +class PreOCIModel(VerbsDataInterface): + """ + PreOCIModel implements the data model for the registry's custom HTTP verbs using a database schema + before it was changed to support the OCI specification. + """ + + def repository_is_public(self, namespace_name, repo_name): + return model.repository.repository_is_public(namespace_name, repo_name) + + def get_manifest_layers_with_blobs(self, repo_image): + repo_image_record = model.image.get_image_by_id(repo_image.repository.namespace_name, + repo_image.repository.name, + repo_image.image_id) + + parents = model.image.get_parent_images_with_placements(repo_image.repository.namespace_name, + repo_image.repository.name, + repo_image_record) + + yield repo_image + + for parent in parents: + metadata = {} + try: + metadata = json.loads(parent.v1_json_metadata) + except ValueError: + pass + + yield ImageWithBlob( + image_id=parent.docker_image_id, + blob=_blob(parent.storage), + repository=repo_image.repository, + compat_metadata=metadata, + v1_metadata=_docker_v1_metadata(repo_image.repository.namespace_name, + repo_image.repository.name, parent), + internal_db_id=parent.id, + ) + + def get_derived_image_signature(self, derived_image, signer_name): + storage = model.storage.get_storage_by_uuid(derived_image.blob.uuid) + signature_entry = model.storage.lookup_storage_signature(storage, signer_name) + if signature_entry is None: + return None + + return signature_entry.signature + + def set_derived_image_signature(self, derived_image, signer_name, signature): + storage = model.storage.get_storage_by_uuid(derived_image.blob.uuid) + signature_entry = model.storage.find_or_create_storage_signature(storage, signer_name) + signature_entry.signature = signature + signature_entry.uploading = False + signature_entry.save() + + def delete_derived_image(self, derived_image): + model.image.delete_derived_storage_by_uuid(derived_image.blob.uuid) + + def set_blob_size(self, blob, size): + storage_entry = model.storage.get_storage_by_uuid(blob.uuid) + storage_entry.image_size = size + storage_entry.uploading = False + storage_entry.save() + + def get_blob_path(self, blob): + blob_record = model.storage.get_storage_by_uuid(blob.uuid) + return model.storage.get_layer_path(blob_record) + + def get_repo_blob_by_digest(self, namespace_name, repo_name, digest): + try: + blob_record = model.blob.get_repo_blob_by_digest(namespace_name, repo_name, digest) + except model.BlobDoesNotExist: + return None + + return _blob(blob_record) + + def get_torrent_info(self, blob): + blob_record = model.storage.get_storage_by_uuid(blob.uuid) + + try: + torrent_info = model.storage.get_torrent_info(blob_record) + except model.TorrentInfoDoesNotExist: + return None + + return TorrentInfo( + pieces=torrent_info.pieces, + piece_length=torrent_info.piece_length, + ) + + def set_torrent_info(self, blob, piece_length, pieces): + blob_record = model.storage.get_storage_by_uuid(blob.uuid) + model.storage.save_torrent_info(blob_record, piece_length, pieces) + + def lookup_derived_image(self, repo_image, verb, varying_metadata=None): + blob_record = model.image.find_derived_storage_for_image(repo_image.internal_db_id, verb, + varying_metadata) + if blob_record is None: + return None + + return _derived_image(blob_record, repo_image) + + def lookup_or_create_derived_image(self, repo_image, verb, location, varying_metadata=None): + blob_record = model.image.find_or_create_derived_storage(repo_image.internal_db_id, verb, + location, varying_metadata) + return _derived_image(blob_record, repo_image) + + def get_tag_image(self, namespace_name, repo_name, tag_name): + try: + found = model.tag.get_tag_image(namespace_name, repo_name, tag_name, include_storage=True) + except model.DataModelException: + return None + + metadata = {} + try: + metadata = json.loads(found.v1_json_metadata) + except ValueError: + pass + + return ImageWithBlob( + image_id=found.docker_image_id, + blob=_blob(found.storage), + repository=RepositoryReference( + namespace_name=namespace_name, + name=repo_name, + id=found.repository_id, + ), + compat_metadata=metadata, + v1_metadata=_docker_v1_metadata(namespace_name, repo_name, found), + internal_db_id=found.id, + ) + + +pre_oci_model = PreOCIModel() + + +def _docker_v1_metadata(namespace_name, repo_name, repo_image): + """ + Returns a DockerV1Metadata object for the given Pre-OCI repo_image under the + repository with the given namespace and name. Note that the namespace and + name are passed here as an optimization, and are *not checked* against the + image. + """ + return DockerV1Metadata( + namespace_name=namespace_name, + repo_name=repo_name, + image_id=repo_image.docker_image_id, + checksum=repo_image.v1_checksum, + compat_json=repo_image.v1_json_metadata, + created=repo_image.created, + comment=repo_image.comment, + command=repo_image.command, + + # Note: These are not needed in verbs and are expensive to load, so we just skip them. + content_checksum=None, + parent_image_id=None, + ) + + +def _derived_image(blob_record, repo_image): + """ + Returns a DerivedImage object for the given Pre-OCI data model blob and repo_image instance. + """ + return DerivedImage( + ref=repo_image.internal_db_id, + blob=_blob(blob_record), + internal_source_image_db_id=repo_image.internal_db_id, + ) + + +def _blob(blob_record): + """ + Returns a Blob object for the given Pre-OCI data model blob instance. + """ + if hasattr(blob_record, 'locations'): + locations = blob_record.locations + else: + locations = model.storage.get_storage_locations(blob_record.uuid) + + return Blob( + uuid=blob_record.uuid, + size=blob_record.image_size, + uncompressed_size=blob_record.uncompressed_size, + uploading=blob_record.uploading, + locations=locations, + ) diff --git a/data/model/blob.py b/data/model/blob.py index 2584d96f2..470bb519d 100644 --- a/data/model/blob.py +++ b/data/model/blob.py @@ -3,7 +3,7 @@ from uuid import uuid4 from data.model import (tag, _basequery, BlobDoesNotExist, InvalidBlobUpload, db_transaction, storage as storage_model, InvalidImageException) from data.database import (Repository, Namespace, ImageStorage, Image, ImageStoragePlacement, - BlobUpload) + BlobUpload, ImageStorageLocation) def get_repo_blob_by_digest(namespace, repo_name, blob_digest): @@ -63,7 +63,9 @@ def get_blob_upload(namespace, repo_name, upload_uuid): """ try: return (BlobUpload - .select() + .select(BlobUpload, ImageStorageLocation) + .join(ImageStorageLocation) + .switch(BlobUpload) .join(Repository) .join(Namespace, on=(Namespace.id == Repository.namespace_user)) .where(Repository.name == repo_name, Namespace.username == namespace, diff --git a/data/model/image.py b/data/model/image.py index 031f4a660..2fb46842c 100644 --- a/data/model/image.py +++ b/data/model/image.py @@ -99,6 +99,17 @@ def get_repo_image_extended(namespace_name, repository_name, docker_image_id): return images[0] +def get_repo_image_and_storage(namespace_name, repository_name, docker_image_id): + def limit_to_image_id(query): + return query.where(Image.docker_image_id == docker_image_id) + + images = _get_repository_images_and_storages(namespace_name, repository_name, limit_to_image_id) + if not images: + return None + + return images[0] + + def _get_repository_images_and_storages(namespace_name, repository_name, query_modifier): query = (Image .select(Image, ImageStorage) @@ -502,7 +513,6 @@ def find_or_create_derived_storage(source_image, transformation_name, preferred_ if existing is not None: return existing - logger.debug('Creating storage dervied from source image: %s', source_image.id) uniqueness_hash = _get_uniqueness_hash(varying_metadata) trans = ImageStorageTransformation.get(name=transformation_name) new_storage = storage.create_v1_storage(preferred_location) diff --git a/data/model/label.py b/data/model/label.py index ad5eadc7d..bd783e168 100644 --- a/data/model/label.py +++ b/data/model/label.py @@ -1,11 +1,12 @@ import logging +from cachetools import lru_cache + from data.database import Label, TagManifestLabel, MediaType, LabelSourceType, db_transaction from data.model import InvalidLabelKeyException, InvalidMediaTypeException, DataModelException from data.model._basequery import prefix_search from util.validation import validate_label_key from util.validation import is_json -from cachetools import lru_cache logger = logging.getLogger(__name__) diff --git a/data/model/tag.py b/data/model/tag.py index 03fa30ec3..10537d071 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -333,6 +333,16 @@ def load_tag_manifest(namespace, repo_name, tag_name): raise InvalidManifestException(msg) +def delete_manifest_by_digest(namespace, repo_name, digest): + tag_manifests = list(_load_repo_manifests(namespace, repo_name) + .where(TagManifest.digest == digest)) + + for tag_manifest in tag_manifests: + delete_tag(namespace, repo_name, tag_manifest.tag.name) + + return [tag_manifest.tag for tag_manifest in tag_manifests] + + def load_manifest_by_digest(namespace, repo_name, digest): try: return (_load_repo_manifests(namespace, repo_name) @@ -344,9 +354,10 @@ def load_manifest_by_digest(namespace, repo_name, digest): def _load_repo_manifests(namespace, repo_name): - return _tag_alive(TagManifest - .select(TagManifest, RepositoryTag, Repository) - .join(RepositoryTag) - .join(Repository) - .join(Namespace, on=(Namespace.id == Repository.namespace_user)) - .where(Repository.name == repo_name, Namespace.username == namespace)) + return _tag_alive(TagManifest + .select(TagManifest, RepositoryTag) + .join(RepositoryTag) + .join(Image) + .join(Repository) + .join(Namespace, on=(Namespace.id == Repository.namespace_user)) + .where(Repository.name == repo_name, Namespace.username == namespace)) diff --git a/endpoints/building.py b/endpoints/building.py index 93961bfc8..977a964a3 100644 --- a/endpoints/building.py +++ b/endpoints/building.py @@ -9,7 +9,7 @@ from data.database import db from auth.auth_context import get_authenticated_user from endpoints.notificationhelper import spawn_notification from util.names import escape_tag - +from util.morecollections import AttrDict logger = logging.getLogger(__name__) @@ -72,7 +72,13 @@ def start_build(repository, prepared_build, pull_robot_name=None): model.log.log_action('build_dockerfile', repository.namespace_user.username, ip=request.remote_addr, metadata=event_log_metadata, repository=repository) - spawn_notification(repository, 'build_queued', event_log_metadata, + # TODO(jzelinskie): remove when more endpoints have been converted to using interfaces + repo = AttrDict({ + 'namespace_name': repository.namespace_user.username, + 'name': repository.name, + }) + + spawn_notification(repo, 'build_queued', event_log_metadata, subpage='build/%s' % build_request.uuid, pathargs=['build', build_request.uuid]) diff --git a/endpoints/key_server.py b/endpoints/keyserver/__init__.py similarity index 84% rename from endpoints/key_server.py rename to endpoints/keyserver/__init__.py index 70c0da0eb..7cab60b40 100644 --- a/endpoints/key_server.py +++ b/endpoints/keyserver/__init__.py @@ -4,11 +4,9 @@ from datetime import datetime, timedelta from flask import Blueprint, jsonify, abort, request, make_response from jwt import get_unverified_header -import data.model -import data.model.service_keys -from data.model.log import log_action - from app import app +from data.interfaces.key_server import pre_oci_model as model, ServiceKeyDoesNotExist +from data.model.log import log_action from util.security import jwtutil @@ -38,7 +36,7 @@ def _validate_jwt(encoded_jwt, jwk, service): try: jwtutil.decode(encoded_jwt, public_key, algorithms=['RS256'], - audience=JWT_AUDIENCE, issuer=service) + audience=JWT_AUDIENCE, issuer=service) except jwtutil.InvalidTokenError: logger.exception('JWT validation failure') abort(400) @@ -55,23 +53,22 @@ def _signer_kid(encoded_jwt, allow_none=False): def _lookup_service_key(service, signer_kid, approved_only=True): try: - return data.model.service_keys.get_service_key(signer_kid, service=service, - approved_only=approved_only) - except data.model.ServiceKeyDoesNotExist: + return model.get_service_key(signer_kid, service=service, approved_only=approved_only) + except ServiceKeyDoesNotExist: abort(403) @key_server.route('/services//keys', methods=['GET']) def list_service_keys(service): - keys = data.model.service_keys.list_service_keys(service) + keys = model.list_service_keys(service) return jsonify({'keys': [key.jwk for key in keys]}) @key_server.route('/services//keys/', methods=['GET']) def get_service_key(service, kid): try: - key = data.model.service_keys.get_service_key(kid, alive_only=False, approved_only=False) - except data.model.ServiceKeyDoesNotExist: + key = model.get_service_key(kid, alive_only=False, approved_only=False) + except ServiceKeyDoesNotExist: abort(404) if key.approval is None: @@ -119,8 +116,8 @@ def put_service_key(service, kid): if kid == signer_kid or signer_kid is None: # The key is self-signed. Create a new instance and await approval. _validate_jwt(encoded_jwt, jwk, service) - data.model.service_keys.create_service_key('', kid, service, jwk, metadata, expiration_date, - rotation_duration=rotation_duration) + model.create_service_key('', kid, service, jwk, metadata, expiration_date, + rotation_duration=rotation_duration) key_log_metadata = { 'kid': kid, @@ -143,8 +140,8 @@ def put_service_key(service, kid): _validate_jwt(encoded_jwt, signer_jwk, service) try: - data.model.service_keys.replace_service_key(signer_key.kid, kid, jwk, metadata, expiration_date) - except data.model.ServiceKeyDoesNotExist: + model.replace_service_key(signer_key.kid, kid, jwk, metadata, expiration_date) + except ServiceKeyDoesNotExist: abort(404) key_log_metadata = { @@ -180,8 +177,8 @@ def delete_service_key(service, kid): _validate_jwt(encoded_jwt, signer_key.jwk, service) try: - data.model.service_keys.delete_service_key(kid) - except data.model.ServiceKeyDoesNotExist: + model.delete_service_key(kid) + except ServiceKeyDoesNotExist: abort(404) key_log_metadata = { diff --git a/endpoints/notificationhelper.py b/endpoints/notificationhelper.py index b5a71c574..8640d741d 100644 --- a/endpoints/notificationhelper.py +++ b/endpoints/notificationhelper.py @@ -5,7 +5,7 @@ from auth.auth_context import get_authenticated_user, get_validated_oauth_token import json def build_event_data(repo, extra_data={}, subpage=None): - repo_string = '%s/%s' % (repo.namespace_user.username, repo.name) + repo_string = '%s/%s' % (repo.namespace_name, repo.name) homepage = '%s://%s/repository/%s' % (app.config['PREFERRED_URL_SCHEME'], app.config['SERVER_HOSTNAME'], repo_string) @@ -18,11 +18,10 @@ def build_event_data(repo, extra_data={}, subpage=None): event_data = { 'repository': repo_string, - 'namespace': repo.namespace_user.username, + 'namespace': repo.namespace_name, 'name': repo.name, 'docker_url': '%s/%s' % (app.config['SERVER_HOSTNAME'], repo_string), 'homepage': homepage, - 'visibility': repo.visibility.name } event_data.update(extra_data) @@ -54,10 +53,10 @@ def spawn_notification(repo, event_name, extra_data={}, subpage=None, pathargs=[ performer_data=None): event_data = build_event_data(repo, extra_data=extra_data, subpage=subpage) - notifications = model.notification.list_repo_notifications(repo.namespace_user.username, + notifications = model.notification.list_repo_notifications(repo.namespace_name, repo.name, event_name=event_name) for notification in list(notifications): notification_data = build_notification_data(notification, event_data, performer_data) - path = [repo.namespace_user.username, repo.name, event_name] + pathargs + path = [repo.namespace_name, repo.name, event_name] + pathargs notification_queue.put(path, json.dumps(notification_data)) diff --git a/endpoints/trackhelper.py b/endpoints/trackhelper.py index 1905ac86d..f1bc708e5 100644 --- a/endpoints/trackhelper.py +++ b/endpoints/trackhelper.py @@ -3,21 +3,22 @@ import random from urlparse import urlparse -from app import analytics, app, userevents -from data import model from flask import request + +from app import analytics, userevents +from data import model from auth.registry_jwt_auth import get_granted_entity from auth.auth_context import (get_authenticated_user, get_validated_token, get_validated_oauth_token) logger = logging.getLogger(__name__) -def track_and_log(event_name, repo, analytics_name=None, analytics_sample=1, **kwargs): - repository = repo.name - namespace = repo.namespace_user.username +def track_and_log(event_name, repo_obj, analytics_name=None, analytics_sample=1, **kwargs): + repo_name = repo_obj.name + namespace_name = repo_obj.namespace_name, metadata = { - 'repo': repository, - 'namespace': namespace, + 'repo': repo_name, + 'namespace': namespace_name, } metadata.update(kwargs) @@ -57,7 +58,7 @@ def track_and_log(event_name, repo, analytics_name=None, analytics_sample=1, **k request_parsed = urlparse(request.url_root) extra_params = { - 'repository': '%s/%s' % (namespace, repository), + 'repository': '%s/%s' % (namespace_name, repo_name), 'user-agent': request.user_agent.string, 'hostname': request_parsed.hostname, } @@ -68,8 +69,8 @@ def track_and_log(event_name, repo, analytics_name=None, analytics_sample=1, **k logger.debug('Publishing %s to the user events system', event_name) user_event_data = { 'action': event_name, - 'repository': repository, - 'namespace': namespace + 'repository': repo_name, + 'namespace': namespace_name, } event = userevents.get_event(authenticated_user.username) @@ -84,7 +85,6 @@ def track_and_log(event_name, repo, analytics_name=None, analytics_sample=1, **k # Log the action to the database. logger.debug('Logging the %s to logs system', event_name) - model.log.log_action(event_name, namespace, performer=authenticated_user, ip=request.remote_addr, - metadata=metadata, repository=repo) - + model.log.log_action(event_name, namespace_name, performer=authenticated_user, + ip=request.remote_addr, metadata=metadata, repository=repo_obj) logger.debug('Track and log of %s complete', event_name) diff --git a/endpoints/v1/index.py b/endpoints/v1/index.py index 82d26837e..454b95c78 100644 --- a/endpoints/v1/index.py +++ b/endpoints/v1/index.py @@ -6,7 +6,7 @@ from functools import wraps from flask import request, make_response, jsonify, session -from data import model +from data.interfaces.v1 import pre_oci_model as model from app import authentication, userevents, metric_queue from auth.auth import process_auth, generate_signed_token from auth.auth_context import get_authenticated_user, get_validated_token, get_validated_oauth_token @@ -85,26 +85,19 @@ def create_user(): success = make_response('"Username or email already exists"', 400) if username == '$token': - try: - model.token.load_token_data(password) + if model.load_token(password): return success - except model.InvalidTokenException: - abort(400, 'Invalid access token.', issue='invalid-access-token') + abort(400, 'Invalid access token.', issue='invalid-access-token') elif username == '$oauthtoken': - validated = model.oauth.validate_access_token(password) - if validated is not None: + if model.validate_oauth_token(password): return success - else: - abort(400, 'Invalid oauth access token.', issue='invalid-oauth-access-token') + abort(400, 'Invalid oauth access token.', issue='invalid-oauth-access-token') elif '+' in username: - try: - model.user.verify_robot(username, password) + if model.verify_robot(username, password): return success - except model.InvalidRobotException: - abort(400, 'Invalid robot account or password.', - issue='robot-login-failure') + abort(400, 'Invalid robot account or password.', issue='robot-login-failure') (verified, error_message) = authentication.verify_and_link_user(username, password, basic_auth=True) @@ -148,23 +141,17 @@ def get_user(): @anon_allowed def update_user(username): permission = UserAdminPermission(username) - if permission.can(): update_request = request.get_json() if 'password' in update_request: logger.debug('Updating user password') - model.user.change_password(get_authenticated_user(), update_request['password']) - - if 'email' in update_request: - logger.debug('Updating user email') - model.user.update_email(get_authenticated_user(), update_request['email']) + model.change_user_password(get_authenticated_user(), update_request['password']) return jsonify({ 'username': get_authenticated_user().username, - 'email': get_authenticated_user().email, + 'email': get_authenticated_user().email }) - abort(403) @@ -179,7 +166,7 @@ def create_repository(namespace_name, repo_name): abort(400, message='Invalid repository name. Repository names cannot contain slashes.') logger.debug('Looking up repository %s/%s', namespace_name, repo_name) - repo = model.repository.get_repository(namespace_name, repo_name) + repo = model.get_repository(namespace_name, repo_name) logger.debug('Found repository %s/%s', namespace_name, repo_name) if not repo and get_authenticated_user() is None: @@ -189,15 +176,15 @@ def create_repository(namespace_name, repo_name): issue='no-login') elif repo: - permission = ModifyRepositoryPermission(namespace_name, repo_name) - if not permission.can(): + modify_perm = ModifyRepositoryPermission(namespace_name, repo_name) + if not modify_perm.can(): abort(403, message='You do not have permission to modify repository %(namespace)s/%(repository)s', issue='no-repo-write-permission', namespace=namespace_name, repository=repo_name) else: - permission = CreateRepositoryPermission(namespace_name) - if not permission.can(): + create_perm = CreateRepositoryPermission(namespace_name) + if not create_perm.can(): logger.info('Attempt to create a new repo %s/%s with insufficient perms', namespace_name, repo_name) msg = 'You do not have permission to create repositories in namespace "%(namespace)s"' @@ -207,7 +194,7 @@ def create_repository(namespace_name, repo_name): logger.debug('Creating repository %s/%s with owner: %s', namespace_name, repo_name, get_authenticated_user().username) - repo = model.repository.create_repository(namespace_name, repo_name, get_authenticated_user()) + model.create_repository(namespace_name, repo_name, get_authenticated_user()) if get_authenticated_user(): user_event_data = { @@ -232,7 +219,7 @@ def update_images(namespace_name, repo_name): if permission.can(): logger.debug('Looking up repository') - repo = model.repository.get_repository(namespace_name, repo_name) + repo = model.get_repository(namespace_name, repo_name) if not repo: # Make sure the repo actually exists. abort(404, message='Unknown repository', issue='unknown-repo') @@ -262,10 +249,10 @@ def get_repository_images(namespace_name, repo_name): permission = ReadRepositoryPermission(namespace_name, repo_name) # TODO invalidate token? - if permission.can() or model.repository.repository_is_public(namespace_name, repo_name): + if permission.can() or model.repository_is_public(namespace_name, repo_name): # We can't rely on permissions to tell us if a repo exists anymore logger.debug('Looking up repository') - repo = model.repository.get_repository(namespace_name, repo_name) + repo = model.get_repository(namespace_name, repo_name) if not repo: abort(404, message='Unknown repository', issue='unknown-repo') @@ -296,27 +283,6 @@ def put_repository_auth(namespace_name, repo_name): abort(501, 'Not Implemented', issue='not-implemented') -def conduct_repo_search(username, query, results): - """ Finds matching repositories. """ - def can_read(repo): - if repo.is_public: - return True - - return ReadRepositoryPermission(repo.namespace_user.username, repo.name).can() - - only_public = username is None - matching_repos = model.repository.get_sorted_matching_repositories(query, only_public, can_read, - limit=5) - - for repo in matching_repos: - results.append({ - 'name': repo.namespace_user.username + '/' + repo.name, - 'description': repo.description, - 'is_public': repo.is_public, - 'href': '/repository/' + repo.namespace_user.username + '/' + repo.name - }) - - @v1_bp.route('/search', methods=['GET']) @process_auth @anon_protect @@ -330,7 +296,7 @@ def get_search(): results = [] if query: - conduct_repo_search(username, query, results) + _conduct_repo_search(username, query, results) data = { "query": query, @@ -341,3 +307,23 @@ def get_search(): resp = make_response(json.dumps(data), 200) resp.mimetype = 'application/json' return resp + + +def _conduct_repo_search(username, query, results): + """ Finds matching repositories. """ + def can_read(repo): + if repo.is_public: + return True + + return ReadRepositoryPermission(repo.namespace_name, repo.name).can() + + only_public = username is None + matching_repos = model.get_sorted_matching_repositories(query, only_public, can_read, limit=5) + + for repo in matching_repos: + results.append({ + 'name': repo.namespace_name + '/' + repo.name, + 'description': repo.description, + 'is_public': repo.is_public, + 'href': '/repository/' + repo.namespace_name + '/' + repo.name + }) diff --git a/endpoints/v1/registry.py b/endpoints/v1/registry.py index 5bcdd89aa..2e9a92c09 100644 --- a/endpoints/v1/registry.py +++ b/endpoints/v1/registry.py @@ -1,49 +1,39 @@ import logging import json -from flask import make_response, request, session, Response, redirect, abort as flask_abort from functools import wraps from datetime import datetime from time import time +from flask import make_response, request, session, Response, redirect, abort as flask_abort + from app import storage as store, app from auth.auth import process_auth, extract_namespace_repo_from_session from auth.auth_context import get_authenticated_user -from auth.registry_jwt_auth import get_granted_username -from digest import checksums -from util.http import abort, exact_abort -from util.registry.filelike import SocketReader from auth.permissions import (ReadRepositoryPermission, ModifyRepositoryPermission) +from auth.registry_jwt_auth import get_granted_username from data import model, database -from util.registry import gzipstream -from util.registry.torrent import PieceHasher +from data.interfaces.v1 import pre_oci_model as model +from digest import checksums from endpoints.v1 import v1_bp from endpoints.decorators import anon_protect +from util.http import abort, exact_abort +from util.registry.filelike import SocketReader +from util.registry import gzipstream from util.registry.replication import queue_storage_replication +from util.registry.torrent import PieceHasher logger = logging.getLogger(__name__) -def image_is_uploading(repo_image): - if repo_image is None: - return False - - return repo_image.storage.uploading - - -def set_uploading_flag(repo_image, is_image_uploading): - repo_image.storage.uploading = is_image_uploading - repo_image.storage.save() - - -def _finish_image(namespace, repository, repo_image): +def _finish_image(namespace, repository, image_id): # Checksum is ok, we remove the marker - set_uploading_flag(repo_image, False) + blob_ref = model.update_image_uploading(namespace, repository, image_id, False) # Send a job to the work queue to replicate the image layer. - queue_storage_replication(namespace, repo_image.storage) + queue_storage_replication(namespace, blob_ref) def require_completion(f): @@ -51,11 +41,9 @@ def require_completion(f): @wraps(f) def wrapper(namespace, repository, *args, **kwargs): image_id = kwargs['image_id'] - repo_image = model.image.get_repo_image_extended(namespace, repository, image_id) - if image_is_uploading(repo_image): + if model.is_image_uploading(namespace, repository, image_id): abort(400, 'Image %(image_id)s is being uploaded, retry later', - issue='upload-in-progress', image_id=kwargs['image_id']) - + issue='upload-in-progress', image_id=image_id) return f(namespace, repository, *args, **kwargs) return wrapper @@ -94,19 +82,18 @@ def head_image_layer(namespace, repository, image_id, headers): permission = ReadRepositoryPermission(namespace, repository) logger.debug('Checking repo permissions') - if permission.can() or model.repository.repository_is_public(namespace, repository): - logger.debug('Looking up repo image') - repo_image = model.image.get_repo_image_extended(namespace, repository, image_id) - if not repo_image: - logger.debug('Image not found') + if permission.can() or model.repository_is_public(namespace, repository): + logger.debug('Looking up placement locations') + locations = model.placement_locations_docker_v1(namespace, repository, image_id) + if locations is None: + logger.debug('Could not find any blob placement locations') abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) - extra_headers = {} - # Add the Accept-Ranges header if the storage engine supports resumable # downloads. - if store.get_supports_resumable_downloads(repo_image.storage.locations): + extra_headers = {} + if store.get_supports_resumable_downloads(locations): logger.debug('Storage supports resumable downloads') extra_headers['Accept-Ranges'] = 'bytes' @@ -128,30 +115,24 @@ def get_image_layer(namespace, repository, image_id, headers): permission = ReadRepositoryPermission(namespace, repository) logger.debug('Checking repo permissions') - if permission.can() or model.repository.repository_is_public(namespace, repository): - logger.debug('Looking up repo image') - repo_image = model.image.get_repo_image_extended(namespace, repository, image_id) - if not repo_image: - logger.debug('Image not found') + if permission.can() or model.repository_is_public(namespace, repository): + logger.debug('Looking up placement locations and path') + locations, path = model.placement_locations_and_path_docker_v1(namespace, repository, image_id) + if not locations or not path: abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) - try: - path = model.storage.get_layer_path(repo_image.storage) logger.debug('Looking up the direct download URL for path: %s', path) - direct_download_url = store.get_direct_download_url(repo_image.storage.locations, path) - + direct_download_url = store.get_direct_download_url(locations, path) if direct_download_url: logger.debug('Returning direct download URL') resp = redirect(direct_download_url) return resp - logger.debug('Streaming layer data') - # Close the database handle here for this process before we send the long download. database.close_db_filter(None) - - return Response(store.stream_read(repo_image.storage.locations, path), headers=headers) + logger.debug('Streaming layer data') + return Response(store.stream_read(locations, path), headers=headers) except (IOError, AttributeError): logger.exception('Image layer data not found') abort(404, 'Image %(image_id)s not found', issue='unknown-image', @@ -171,23 +152,13 @@ def put_image_layer(namespace, repository, image_id): abort(403) logger.debug('Retrieving image') - repo_image = model.image.get_repo_image_extended(namespace, repository, image_id) - try: - logger.debug('Retrieving image data') - uuid = repo_image.storage.uuid - json_data = repo_image.v1_json_metadata - except (AttributeError): - logger.exception('Exception when retrieving image data') - abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) - - uuid = repo_image.storage.uuid - layer_path = store.v1_image_layer_path(uuid) - logger.info('Storing layer at v1 path: %s', layer_path) - - if (store.exists(repo_image.storage.locations, layer_path) and not - image_is_uploading(repo_image)): + if model.storage_exists(namespace, repository, image_id): exact_abort(409, 'Image already exists') + v1_metadata = model.docker_v1_metadata(namespace, repository, image_id) + if v1_metadata is None: + abort(404) + logger.debug('Storing layer data') input_stream = request.stream @@ -215,7 +186,7 @@ def put_image_layer(namespace, repository, image_id): sr.add_handler(piece_hasher.update) # Add a handler which computes the checksum. - h, sum_hndlr = checksums.simple_checksum_handler(json_data) + h, sum_hndlr = checksums.simple_checksum_handler(v1_metadata.compat_json) sr.add_handler(sum_hndlr) # Add a handler which computes the content checksum only @@ -223,19 +194,20 @@ def put_image_layer(namespace, repository, image_id): sr.add_handler(content_sum_hndlr) # Stream write the data to storage. + locations, path = model.placement_locations_and_path_docker_v1(namespace, repository, image_id) with database.CloseForLongOperation(app.config): try: - store.stream_write(repo_image.storage.locations, layer_path, sr) + store.stream_write(locations, path, sr) except IOError: logger.exception('Exception when writing image data') abort(520, 'Image %(image_id)s could not be written. Please try again.', image_id=image_id) # Save the size of the image. - updated_storage = model.storage.set_image_storage_metadata(image_id, namespace, repository, - size_info.compressed_size, - size_info.uncompressed_size) - pieces_bytes = piece_hasher.final_piece_hashes() - model.storage.save_torrent_info(updated_storage, app.config['BITTORRENT_PIECE_SIZE'], pieces_bytes) + model.update_image_sizes(namespace, repository, image_id, size_info.compressed_size, + size_info.uncompressed_size) + + # Save the BitTorrent pieces. + model.create_bittorrent_pieces(namespace, repository, image_id, piece_hasher.final_piece_hashes()) # Append the computed checksum. csums = [] @@ -244,29 +216,26 @@ def put_image_layer(namespace, repository, image_id): try: if requires_tarsum: tmp.seek(0) - csums.append(checksums.compute_tarsum(tmp, json_data)) + csums.append(checksums.compute_tarsum(tmp, v1_metadata.compat_json)) tmp.close() - except (IOError, checksums.TarError) as exc: logger.debug('put_image_layer: Error when computing tarsum %s', exc) - if repo_image.v1_checksum is None: + if v1_metadata.checksum is None: # We don't have a checksum stored yet, that's fine skipping the check. # Not removing the mark though, image is not downloadable yet. session['checksum'] = csums session['content_checksum'] = 'sha256:{0}'.format(ch.hexdigest()) return make_response('true', 200) - checksum = repo_image.v1_checksum - # We check if the checksums provided matches one the one we computed - if checksum not in csums: + if v1_metadata.checksum not in csums: logger.warning('put_image_layer: Wrong checksum') abort(400, 'Checksum mismatch; ignoring the layer for image %(image_id)s', issue='checksum-mismatch', image_id=image_id) # Mark the image as uploaded. - _finish_image(namespace, repository, repo_image) + _finish_image(namespace, repository, image_id) return make_response('true', 200) @@ -303,24 +272,27 @@ def put_image_checksum(namespace, repository, image_id): issue='missing-checksum-cookie', image_id=image_id) logger.debug('Looking up repo image') - repo_image = model.image.get_repo_image_extended(namespace, repository, image_id) - if not repo_image or not repo_image.storage: + v1_metadata = model.docker_v1_metadata(namespace, repository, image_id) + if not v1_metadata: abort(404, 'Image not found: %(image_id)s', issue='unknown-image', image_id=image_id) logger.debug('Looking up repo layer data') - if not repo_image.v1_json_metadata: + if not v1_metadata.compat_json: abort(404, 'Image not found: %(image_id)s', issue='unknown-image', image_id=image_id) logger.debug('Marking image path') - if not image_is_uploading(repo_image): + if not model.is_image_uploading(namespace, repository, image_id): abort(409, 'Cannot set checksum for image %(image_id)s', issue='image-write-error', image_id=image_id) logger.debug('Storing image and content checksums') + content_checksum = session.get('content_checksum', None) - err = store_checksum(repo_image, checksum, content_checksum) - if err: - abort(400, err) + checksum_parts = checksum.split(':') + if len(checksum_parts) != 2: + abort(400, 'Invalid checksum format') + + model.store_docker_v1_checksums(namespace, repository, image_id, checksum, content_checksum) if checksum not in session.get('checksum', []): logger.debug('session checksums: %s', session.get('checksum', [])) @@ -330,7 +302,7 @@ def put_image_checksum(namespace, repository, image_id): issue='checksum-mismatch', image_id=image_id) # Mark the image as uploaded. - _finish_image(namespace, repository, repo_image) + _finish_image(namespace, repository, image_id) return make_response('true', 200) @@ -344,22 +316,22 @@ def put_image_checksum(namespace, repository, image_id): def get_image_json(namespace, repository, image_id, headers): logger.debug('Checking repo permissions') permission = ReadRepositoryPermission(namespace, repository) - if not permission.can() and not model.repository.repository_is_public(namespace, repository): + if not permission.can() and not model.repository_is_public(namespace, repository): abort(403) logger.debug('Looking up repo image') - repo_image = model.image.get_repo_image_extended(namespace, repository, image_id) - if repo_image is None: + v1_metadata = model.docker_v1_metadata(namespace, repository, image_id) + if v1_metadata is None: flask_abort(404) logger.debug('Looking up repo layer size') - size = repo_image.storage.image_size + size = model.get_image_size(namespace, repository, image_id) if size is not None: # Note: X-Docker-Size is optional and we *can* end up with a NULL image_size, # so handle this case rather than failing. headers['X-Docker-Size'] = str(size) - response = make_response(repo_image.v1_json_metadata, 200) + response = make_response(v1_metadata.compat_json, 200) response.headers.extend(headers) return response @@ -373,38 +345,19 @@ def get_image_json(namespace, repository, image_id, headers): def get_image_ancestry(namespace, repository, image_id, headers): logger.debug('Checking repo permissions') permission = ReadRepositoryPermission(namespace, repository) - if not permission.can() and not model.repository.repository_is_public(namespace, repository): + if not permission.can() and not model.repository_is_public(namespace, repository): abort(403) - try: - image = model.image.get_image_by_id(namespace, repository, image_id) - except model.InvalidImageException: + ancestry_docker_ids = model.image_ancestry(namespace, repository, image_id) + if ancestry_docker_ids is None: abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) - parents = model.image.get_parent_images(namespace, repository, image) - - ancestry_docker_ids = [image.docker_image_id] - ancestry_docker_ids.extend([parent.docker_image_id for parent in parents]) - # We can not use jsonify here because we are returning a list not an object response = make_response(json.dumps(ancestry_docker_ids), 200) response.headers.extend(headers) return response -def store_checksum(image_with_storage, checksum, content_checksum): - checksum_parts = checksum.split(':') - if len(checksum_parts) != 2: - return 'Invalid checksum format' - - # We store the checksum - image_with_storage.storage.content_checksum = content_checksum - image_with_storage.storage.save() - - image_with_storage.v1_checksum = checksum - image_with_storage.save() - - @v1_bp.route('/images//json', methods=['PUT']) @process_auth @extract_namespace_repo_from_session @@ -417,8 +370,8 @@ def put_image_json(namespace, repository, image_id): logger.debug('Parsing image JSON') try: - v1_metadata = request.data - data = json.loads(v1_metadata.decode('utf8')) + uploaded_metadata = request.data + data = json.loads(uploaded_metadata.decode('utf8')) except ValueError: pass @@ -430,48 +383,45 @@ def put_image_json(namespace, repository, image_id): abort(400, 'Missing key `id` in JSON for image: %(image_id)s', issue='invalid-request', image_id=image_id) - logger.debug('Looking up repo image') - - repo = model.repository.get_repository(namespace, repository) - if repo is None: - abort(404, 'Repository does not exist: %(namespace)s/%(repository)s', issue='no-repo', - namespace=namespace, repository=repository) - - repo_image = model.image.get_repo_image_extended(namespace, repository, image_id) - if not repo_image: - username = get_authenticated_user() and get_authenticated_user().username - if not username: - username = get_granted_username() - - logger.debug('Image not found, creating image with initiating user context: %s', username) - repo_image = model.image.find_create_or_link_image(image_id, repo, username, {}, - store.preferred_locations[0]) - - # Create a temporary tag to prevent this image from getting garbage collected while the push - # is in progress. - model.tag.create_temporary_hidden_tag(repo, repo_image, - app.config['PUSH_TEMP_TAG_EXPIRATION_SEC']) - if image_id != data['id']: abort(400, 'JSON data contains invalid id for image: %(image_id)s', issue='invalid-request', image_id=image_id) - parent_id = data.get('parent', None) + logger.debug('Looking up repo image') - parent_image = None + if not model.repository_exists(namespace, repository): + abort(404, 'Repository does not exist: %(namespace)s/%(repository)s', issue='no-repo', + namespace=namespace, repository=repository) + + v1_metadata = model.docker_v1_metadata(namespace, repository, image_id) + if v1_metadata is None: + username = get_authenticated_user() and get_authenticated_user().username + if not username: + username = get_granted_username() + + logger.debug('Image not found, creating or linking image with initiating user context: %s', + username) + location_pref = store.preferred_locations[0] + model.create_or_link_image(username, namespace, repository, image_id, location_pref) + v1_metadata = model.docker_v1_metadata(namespace, repository, image_id) + + # Create a temporary tag to prevent this image from getting garbage collected while the push + # is in progress. + model.create_temp_hidden_tag(namespace, repository, image_id, + app.config['PUSH_TEMP_TAG_EXPIRATION_SEC']) + + parent_id = data.get('parent', None) if parent_id: logger.debug('Looking up parent image') - parent_image = model.image.get_repo_image_extended(namespace, repository, parent_id) - - if not parent_image or parent_image.storage.uploading: + if model.docker_v1_metadata(namespace, repository, parent_id) is None: abort(400, 'Image %(image_id)s depends on non existing parent image %(parent_id)s', issue='invalid-request', image_id=image_id, parent_id=parent_id) logger.debug('Checking if image already exists') - if repo_image.v1_json_metadata and not image_is_uploading(repo_image): + if v1_metadata and not model.is_image_uploading(namespace, repository, image_id): exact_abort(409, 'Image already exists') - set_uploading_flag(repo_image, True) + model.update_image_uploading(namespace, repository, image_id, True) # If we reach that point, it means that this is a new image or a retry # on a failed push, save the metadata @@ -479,8 +429,7 @@ def put_image_json(namespace, repository, image_id): command = json.dumps(command_list) if command_list else None logger.debug('Setting image metadata') - model.image.set_image_metadata(image_id, namespace, repository, data.get('created'), - data.get('comment'), command, v1_metadata, parent_image) + model.update_docker_v1_metadata(namespace, repository, image_id, data.get('created'), + data.get('comment'), command, uploaded_metadata, parent_id) return make_response('true', 200) - diff --git a/endpoints/v1/tag.py b/endpoints/v1/tag.py index da730748d..973464683 100644 --- a/endpoints/v1/tag.py +++ b/endpoints/v1/tag.py @@ -3,11 +3,13 @@ import json from flask import abort, request, jsonify, make_response, session + from util.names import TAG_ERROR, TAG_REGEX from auth.auth import process_auth from auth.permissions import (ReadRepositoryPermission, ModifyRepositoryPermission) from data import model +from data.interfaces.v1 import pre_oci_model as model from endpoints.common import parse_repository_name from endpoints.decorators import anon_protect from endpoints.v1 import v1_bp @@ -24,8 +26,8 @@ logger = logging.getLogger(__name__) def get_tags(namespace_name, repo_name): permission = ReadRepositoryPermission(namespace_name, repo_name) - if permission.can() or model.repository.repository_is_public(namespace_name, repo_name): - tags = model.tag.list_repository_tags(namespace_name, repo_name) + if permission.can() or model.repository_is_public(namespace_name, repo_name): + tags = model.list_tags(namespace_name, repo_name) tag_map = {tag.name: tag.image.docker_image_id for tag in tags} return jsonify(tag_map) @@ -39,13 +41,12 @@ def get_tags(namespace_name, repo_name): def get_tag(namespace_name, repo_name, tag): permission = ReadRepositoryPermission(namespace_name, repo_name) - if permission.can() or model.repository.repository_is_public(namespace_name, repo_name): - try: - tag_image = model.tag.get_tag_image(namespace_name, repo_name, tag) - except model.DataModelException: + if permission.can() or model.repository_is_public(namespace_name, repo_name): + image_id = model.find_image_id_by_tag(namespace_name, repo_name, tag) + if image_id is None: abort(404) - resp = make_response('"%s"' % tag_image.docker_image_id) + resp = make_response('"%s"' % image_id) resp.headers['Content-Type'] = 'application/json' return resp @@ -63,14 +64,14 @@ def put_tag(namespace_name, repo_name, tag): if not TAG_REGEX.match(tag): abort(400, TAG_ERROR) - docker_image_id = json.loads(request.data) - model.tag.create_or_update_tag(namespace_name, repo_name, tag, docker_image_id) + image_id = json.loads(request.data) + model.create_or_update_tag(namespace_name, repo_name, image_id, tag) # Store the updated tag. if 'pushed_tags' not in session: session['pushed_tags'] = {} - session['pushed_tags'][tag] = docker_image_id + session['pushed_tags'][tag] = image_id return make_response('Created', 200) @@ -85,9 +86,8 @@ def delete_tag(namespace_name, repo_name, tag): permission = ModifyRepositoryPermission(namespace_name, repo_name) if permission.can(): - model.tag.delete_tag(namespace_name, repo_name, tag) - track_and_log('delete_tag', model.repository.get_repository(namespace_name, repo_name), - tag=tag) + model.delete_tag(namespace_name, repo_name, tag) + track_and_log('delete_tag', model.get_repository(namespace_name, repo_name), tag=tag) return make_response('Deleted', 200) abort(403) diff --git a/endpoints/v2/__init__.py b/endpoints/v2/__init__.py index 1ab42747f..07e325df9 100644 --- a/endpoints/v2/__init__.py +++ b/endpoints/v2/__init__.py @@ -2,13 +2,14 @@ import logging from functools import wraps from urlparse import urlparse +from urllib import urlencode from flask import Blueprint, make_response, url_for, request, jsonify from semantic_version import Spec import features -from app import app, metric_queue +from app import app, metric_queue, get_app_url from auth.auth_context import get_grant_context from auth.permissions import (ReadRepositoryPermission, ModifyRepositoryPermission, AdministerRepositoryPermission) @@ -19,12 +20,56 @@ from endpoints.v2.errors import V2RegistryException, Unauthorized from util.http import abort from util.registry.dockerver import docker_version from util.metrics.metricqueue import time_blueprint +from util.pagination import encrypt_page_token, decrypt_page_token logger = logging.getLogger(__name__) v2_bp = Blueprint('v2', __name__) time_blueprint(v2_bp, metric_queue) + +_MAX_RESULTS_PER_PAGE = 50 + + +def paginate(limit_kwarg_name='limit', offset_kwarg_name='offset', + callback_kwarg_name='pagination_callback'): + """ + Decorates a handler adding a parsed pagination token and a callback to encode a response token. + """ + def wrapper(func): + @wraps(func) + def wrapped(*args, **kwargs): + try: + requested_limit = int(request.args.get('n', _MAX_RESULTS_PER_PAGE)) + except ValueError: + requested_limit = 0 + + limit = max(min(requested_limit, _MAX_RESULTS_PER_PAGE), 1) + next_page_token = request.args.get('next_page', None) + + # Decrypt the next page token, if any. + offset = 0 + page_info = decrypt_page_token(next_page_token) + if page_info is not None: + # Note: we use offset here instead of ID >= n because one of the V2 queries is a UNION. + offset = page_info.get('offset', 0) + + def callback(num_results, response): + if num_results <= limit: + return + next_page_token = encrypt_page_token({'offset': limit + offset}) + link = get_app_url() + url_for(request.endpoint, **request.view_args) + link += '?%s; rel="next"' % urlencode({'n': limit, 'next_page': next_page_token}) + response.headers['Link'] = link + + kwargs[limit_kwarg_name] = limit + kwargs[offset_kwarg_name] = offset + kwargs[callback_kwarg_name] = callback + return func(*args, **kwargs) + return wrapped + return wrapper + + @v2_bp.app_errorhandler(V2RegistryException) def handle_registry_v2_exception(error): response = jsonify({ @@ -104,8 +149,10 @@ def v2_support_enabled(): return response -from endpoints.v2 import v2auth -from endpoints.v2 import manifest -from endpoints.v2 import blob -from endpoints.v2 import tag -from endpoints.v2 import catalog +from endpoints.v2 import ( + blob, + catalog, + manifest, + tag, + v2auth, +) diff --git a/endpoints/v2/blob.py b/endpoints/v2/blob.py index 3136b4580..79113bad0 100644 --- a/endpoints/v2/blob.py +++ b/endpoints/v2/blob.py @@ -1,13 +1,14 @@ import logging import re -from flask import make_response, url_for, request, redirect, Response, abort as flask_abort +from flask import url_for, request, redirect, Response, abort as flask_abort import resumablehashlib from app import storage, app from auth.registry_jwt_auth import process_registry_jwt_auth -from data import model, database +from data import database +from data.interfaces.v2 import pre_oci_model as model from digest import digest_tools from endpoints.common import parse_repository_name from endpoints.v2 import v2_bp, require_repo_read, require_repo_write, get_input_stream @@ -33,28 +34,6 @@ class _InvalidRangeHeader(Exception): pass -def _base_blob_fetch(namespace_name, repo_name, digest): - """ Some work that is common to both GET and HEAD requests. Callers MUST check for proper - authorization before calling this method. - """ - try: - found = model.blob.get_repo_blob_by_digest(namespace_name, repo_name, digest) - except model.BlobDoesNotExist: - raise BlobUnknown() - - headers = { - 'Docker-Content-Digest': digest, - } - - # Add the Accept-Ranges header if the storage engine supports resumable - # downloads. - if storage.get_supports_resumable_downloads(found.locations): - logger.debug('Storage supports resumable downloads') - headers['Accept-Ranges'] = 'bytes' - - return found, headers - - @v2_bp.route(BLOB_DIGEST_ROUTE, methods=['HEAD']) @parse_repository_name() @process_registry_jwt_auth(scopes=['pull']) @@ -62,13 +41,24 @@ def _base_blob_fetch(namespace_name, repo_name, digest): @anon_protect @cache_control(max_age=31436000) def check_blob_exists(namespace_name, repo_name, digest): - found, headers = _base_blob_fetch(namespace_name, repo_name, digest) + # Find the blob. + blob = model.get_blob_by_digest(namespace_name, repo_name, digest) + if blob is None: + raise BlobUnknown() - response = make_response('') - response.headers.extend(headers) - response.headers['Content-Length'] = found.image_size - response.headers['Content-Type'] = BLOB_CONTENT_TYPE - return response + # Build the response headers. + headers = { + 'Docker-Content-Digest': digest, + 'Content-Length': blob.size, + 'Content-Type': BLOB_CONTENT_TYPE, + } + + # If our storage supports range requests, let the client know. + if storage.get_supports_resumable_downloads(blob.locations): + headers['Accept-Ranges'] = 'bytes' + + # Write the response to the client. + return Response(headers=headers) @v2_bp.route(BLOB_DIGEST_ROUTE, methods=['GET']) @@ -78,31 +68,42 @@ def check_blob_exists(namespace_name, repo_name, digest): @anon_protect @cache_control(max_age=31536000) def download_blob(namespace_name, repo_name, digest): - found, headers = _base_blob_fetch(namespace_name, repo_name, digest) + # Find the blob. + blob = model.get_blob_by_digest(namespace_name, repo_name, digest) + if blob is None: + raise BlobUnknown() - path = model.storage.get_layer_path(found) + # Build the response headers. + headers = {'Docker-Content-Digest': digest} + + # If our storage supports range requests, let the client know. + if storage.get_supports_resumable_downloads(blob.locations): + headers['Accept-Ranges'] = 'bytes' + + # Find the storage path for the blob. + path = model.get_blob_path(blob) + + # Short-circuit by redirecting if the storage supports it. logger.debug('Looking up the direct download URL for path: %s', path) - direct_download_url = storage.get_direct_download_url(found.locations, path) - + direct_download_url = storage.get_direct_download_url(blob.locations, path) if direct_download_url: logger.debug('Returning direct download URL') resp = redirect(direct_download_url) resp.headers.extend(headers) return resp - logger.debug('Streaming layer data') + # Close the database connection before we stream the download. + logger.debug('Closing database connection before streaming layer data') + with database.CloseForLongOperation(app.config): + # Stream the response to the client. + return Response( + storage.stream_read(blob.locations, path), + headers=headers.update({ + 'Content-Length': blob.size, + 'Content-Type': BLOB_CONTENT_TYPE, + }), + ) - # Close the database handle here for this process before we send the long download. - database.close_db_filter(None) - - headers['Content-Length'] = found.image_size - headers['Content-Type'] = BLOB_CONTENT_TYPE - - return Response(storage.stream_read(found.locations, path), headers=headers) - - -def _render_range(num_uploaded_bytes, with_bytes_prefix=True): - return '{0}0-{1}'.format('bytes=' if with_bytes_prefix else '', num_uploaded_bytes - 1) @v2_bp.route('//blobs/uploads/', methods=['POST']) @@ -111,37 +112,54 @@ def _render_range(num_uploaded_bytes, with_bytes_prefix=True): @require_repo_write @anon_protect def start_blob_upload(namespace_name, repo_name): + # Begin the blob upload process in the database and storage. location_name = storage.preferred_locations[0] new_upload_uuid, upload_metadata = storage.initiate_chunked_upload(location_name) - - try: - model.blob.initiate_upload(namespace_name, repo_name, new_upload_uuid, location_name, - upload_metadata) - except database.Repository.DoesNotExist: + repository_exists = model.create_blob_upload(namespace_name, repo_name, new_upload_uuid, + location_name, upload_metadata) + if not repository_exists: raise NameUnknown() digest = request.args.get('digest', None) if digest is None: - # The user will send the blob data in another request - accepted = make_response('', 202) - accepted.headers['Location'] = url_for('v2.upload_chunk', - repository='%s/%s' % (namespace_name, repo_name), - upload_uuid=new_upload_uuid) + # Short-circuit because the user will send the blob data in another request. + return Response( + status=202, + headers={ + 'Docker-Upload-UUID': new_upload_uuid, + 'Range': _render_range(0), + 'Location': url_for('v2.upload_chunk', repository='%s/%s' % (namespace_name, repo_name), + upload_uuid=new_upload_uuid) + }, + ) - accepted.headers['Range'] = _render_range(0) - accepted.headers['Docker-Upload-UUID'] = new_upload_uuid - return accepted - else: - # The user plans to send us the entire body right now - blob_upload, upload_error = _upload_chunk(namespace_name, repo_name, new_upload_uuid) - blob_upload.save() + # The user plans to send us the entire body right now. + # Find the upload. + blob_upload = model.blob_upload_by_uuid(namespace_name, repo_name, new_upload_uuid) + if blob_upload is None: + raise BlobUploadUnknown() - if upload_error: - logger.error('Got error when uploading chunk for blob %s under repository %s/%s: %s', - namespace_name, repo_name, new_upload_uuid, upload_error) - _range_not_satisfiable(blob_upload.byte_count) + # Upload the chunk to storage while calculating some metadata and updating + # the upload state. + updated_blob_upload = _upload_chunk(blob_upload, request.headers.get('range')) + if updated_blob_upload is None: + _abort_range_not_satisfiable(blob_upload.byte_count, new_upload_uuid) - return _finish_upload(namespace_name, repo_name, blob_upload, digest) + # Save the upload state to the database. + model.update_blob_upload(updated_blob_upload) + + # Finalize the upload process in the database and storage. + _finish_upload(namespace_name, repo_name, updated_blob_upload, digest) + + # Write the response to the client. + return Response( + status=201, + headers={ + 'Docker-Content-Digest': digest, + 'Location': url_for('v2.download_blob', repository='%s/%s' % (namespace_name, repo_name), + digest=digest), + }, + ) @v2_bp.route('//blobs/uploads/', methods=['GET']) @@ -150,33 +168,144 @@ def start_blob_upload(namespace_name, repo_name): @require_repo_write @anon_protect def fetch_existing_upload(namespace_name, repo_name, upload_uuid): - try: - found = model.blob.get_blob_upload(namespace_name, repo_name, upload_uuid) - except model.InvalidBlobUpload: + blob_upload = model.blob_upload_by_uuid(namespace_name, repo_name, upload_uuid) + if blob_upload is None: raise BlobUploadUnknown() - # Note: Docker byte ranges are exclusive so we have to add one to the byte count. - accepted = make_response('', 204) - accepted.headers['Range'] = _render_range(found.byte_count + 1) - accepted.headers['Docker-Upload-UUID'] = upload_uuid - return accepted + return Response( + status=204, + headers={ + 'Docker-Upload-UUID': upload_uuid, + 'Range': _render_range(blob_upload.byte_count+1), # byte ranges are exclusive + }, + ) + + +@v2_bp.route('//blobs/uploads/', methods=['PATCH']) +@parse_repository_name() +@process_registry_jwt_auth(scopes=['pull', 'push']) +@require_repo_write +@anon_protect +def upload_chunk(namespace_name, repo_name, upload_uuid): + # Find the upload. + blob_upload = model.blob_upload_by_uuid(namespace_name, repo_name, upload_uuid) + if blob_upload is None: + raise BlobUploadUnknown() + + # Upload the chunk to storage while calculating some metadata and updating + # the upload state. + updated_blob_upload = _upload_chunk(blob_upload, request.headers.get('range')) + if updated_blob_upload is None: + _abort_range_not_satisfiable(blob_upload.byte_count, upload_uuid) + + # Save the upload state to the database. + model.update_blob_upload(updated_blob_upload) + + # Write the response to the client. + return Response( + status=204, + headers={ + 'Location': _current_request_path(), + 'Range': _render_range(updated_blob_upload.byte_count, with_bytes_prefix=False), + 'Docker-Upload-UUID': upload_uuid, + }, + ) + + +@v2_bp.route('//blobs/uploads/', methods=['PUT']) +@parse_repository_name() +@process_registry_jwt_auth(scopes=['pull', 'push']) +@require_repo_write +@anon_protect +def monolithic_upload_or_last_chunk(namespace_name, repo_name, upload_uuid): + # Ensure the digest is present before proceeding. + digest = request.args.get('digest', None) + if digest is None: + raise BlobUploadInvalid(detail={'reason': 'Missing digest arg on monolithic upload'}) + + # Find the upload. + blob_upload = model.blob_upload_by_uuid(namespace_name, repo_name, upload_uuid) + if blob_upload is None: + raise BlobUploadUnknown() + + # Upload the chunk to storage while calculating some metadata and updating + # the upload state. + updated_blob_upload = _upload_chunk(blob_upload, request.headers.get('range')) + if updated_blob_upload is None: + _abort_range_not_satisfiable(blob_upload.byte_count, upload_uuid) + + # Finalize the upload process in the database and storage. + _finish_upload(namespace_name, repo_name, updated_blob_upload, digest) + + # Write the response to the client. + return Response( + status=201, + headers={ + 'Docker-Content-Digest': digest, + 'Location': url_for('v2.download_blob', repository='%s/%s' % (namespace_name, repo_name), + digest=digest), + } + ) + + +@v2_bp.route('//blobs/uploads/', methods=['DELETE']) +@parse_repository_name() +@process_registry_jwt_auth(scopes=['pull', 'push']) +@require_repo_write +@anon_protect +def cancel_upload(namespace_name, repo_name, upload_uuid): + blob_upload = model.blob_upload_by_uuid(namespace_name, repo_name, upload_uuid) + if blob_upload is None: + raise BlobUploadUnknown() + + # We delete the record for the upload first, since if the partial upload in + # storage fails to delete, it doesn't break anything. + model.delete_blob_upload(namespace_name, repo_name, upload_uuid) + storage.cancel_chunked_upload({blob_upload.location_name}, blob_upload.uuid, + blob_upload.storage_metadata) + + return Response(status=204) + + +@v2_bp.route('//blobs/', methods=['DELETE']) +@parse_repository_name() +@process_registry_jwt_auth(scopes=['pull', 'push']) +@require_repo_write +@anon_protect +def delete_digest(namespace_name, repo_name, upload_uuid): + # We do not support deleting arbitrary digests, as they break repo images. + raise Unsupported() + + +def _render_range(num_uploaded_bytes, with_bytes_prefix=True): + """ + Returns a string formatted to be used in the Range header. + """ + return '{0}0-{1}'.format('bytes=' if with_bytes_prefix else '', num_uploaded_bytes - 1) def _current_request_path(): return '{0}{1}'.format(request.script_root, request.path) -def _range_not_satisfiable(valid_end): - invalid_range = make_response('', 416) - invalid_range.headers['Location'] = _current_request_path() - invalid_range.headers['Range'] = '0-{0}'.format(valid_end) - invalid_range.headers['Docker-Upload-UUID'] = request.view_args['upload_uuid'] - flask_abort(invalid_range) +def _abort_range_not_satisfiable(valid_end, upload_uuid): + """ + Writes a failure response for scenarios where the registry cannot function + with the provided range. + + TODO(jzelinskie): Unify this with the V2RegistryException class. + """ + flask_abort(Response(status=416, headers={'Location': _current_request_path(), + 'Range': '0-{0}'.format(valid_end), + 'Docker-Upload-UUID': upload_uuid})) def _parse_range_header(range_header_text): - """ Parses the range header, and returns a tuple of the start offset and the length, - or raises an _InvalidRangeHeader exception. + """ + Parses the range header. + + Returns a tuple of the start offset and the length. + If the parse fails, raises _InvalidRangeHeader. """ found = RANGE_HEADER_REGEX.match(range_header_text) if found is None: @@ -191,208 +320,183 @@ def _parse_range_header(range_header_text): return (start, length) -def _upload_chunk(namespace_name, repo_name, upload_uuid): - """ Common code among the various uploading paths for appending data to blobs. - Callers MUST call .save() or .delete_instance() on the returned database object. - Returns the BlobUpload object and the error that occurred, if any (or None if none). +def _start_offset_and_length(range_header): + """ + Returns a tuple of the start offset and the length. + If the range header doesn't exist, defaults to (0, -1). + If parsing fails, returns (None, None). """ - try: - found = model.blob.get_blob_upload(namespace_name, repo_name, upload_uuid) - except model.InvalidBlobUpload: - raise BlobUploadUnknown() - start_offset, length = 0, -1 - range_header = request.headers.get('range', None) if range_header is not None: try: start_offset, length = _parse_range_header(range_header) except _InvalidRangeHeader: - _range_not_satisfiable(found.byte_count) + return None, None - if start_offset > 0 and start_offset > found.byte_count: - _range_not_satisfiable(found.byte_count) + return start_offset, length - location_set = {found.location.name} + +def _upload_chunk(blob_upload, range_header): + """ + Calculates metadata while uploading a chunk to storage. + + Returns a BlobUpload object or None if there was a failure. + """ + # Get the offset and length of the current chunk. + start_offset, length = _start_offset_and_length(range_header) + if blob_upload is None or None in {start_offset, length}: + logger.error('Invalid arguments provided to _upload_chunk') + return None + + if start_offset > 0 and start_offset > blob_upload.byte_count: + logger.error('start_offset provided to _upload_chunk greater than blob.upload.byte_count') + return None + + location_set = {blob_upload.location_name} upload_error = None with database.CloseForLongOperation(app.config): input_fp = get_input_stream(request) - if start_offset > 0 and start_offset < found.byte_count: + if start_offset > 0 and start_offset < blob_upload.byte_count: # Skip the bytes which were received on a previous push, which are already stored and # included in the sha calculation - overlap_size = found.byte_count - start_offset + overlap_size = blob_upload.byte_count - start_offset input_fp = StreamSlice(input_fp, overlap_size) # Update our upload bounds to reflect the skipped portion of the overlap - start_offset = found.byte_count + start_offset = blob_upload.byte_count length = max(length - overlap_size, 0) # We use this to escape early in case we have already processed all of the bytes the user # wants to upload if length == 0: - return found, None + return blob_upload - input_fp = wrap_with_handler(input_fp, found.sha_state.update) + input_fp = wrap_with_handler(input_fp, blob_upload.sha_state.update) # Add a hasher for calculating SHA1s for torrents if this is the first chunk and/or we have # already calculated hash data for the previous chunk(s). piece_hasher = None - if found.chunk_count == 0 or found.piece_sha_state: - initial_sha1_value = found.piece_sha_state or resumablehashlib.sha1() - initial_sha1_pieces_value = found.piece_hashes or '' + if blob_upload.chunk_count == 0 or blob_upload.piece_sha_state: + initial_sha1_value = blob_upload.piece_sha_state or resumablehashlib.sha1() + initial_sha1_pieces_value = blob_upload.piece_hashes or '' piece_hasher = PieceHasher(app.config['BITTORRENT_PIECE_SIZE'], start_offset, - initial_sha1_pieces_value, - initial_sha1_value) + initial_sha1_pieces_value, initial_sha1_value) input_fp = wrap_with_handler(input_fp, piece_hasher.update) # If this is the first chunk and we're starting at the 0 offset, add a handler to gunzip the # stream so we can determine the uncompressed size. We'll throw out this data if another chunk - # comes in, but in the common case Docker only sends one chunk. + # comes in, but in the common case the docker client only sends one chunk. size_info = None - if start_offset == 0 and found.chunk_count == 0: + if start_offset == 0 and blob_upload.chunk_count == 0: size_info, fn = calculate_size_handler() input_fp = wrap_with_handler(input_fp, fn) - chunk_result = storage.stream_upload_chunk(location_set, upload_uuid, start_offset, length, - input_fp, found.storage_metadata, - content_type=BLOB_CONTENT_TYPE) - length_written, new_metadata, upload_error = chunk_result + length_written, new_metadata, upload_error = storage.stream_upload_chunk( + location_set, + blob_upload.uuid, + start_offset, + length, + input_fp, + blob_upload.storage_metadata, + content_type=BLOB_CONTENT_TYPE, + ) + + if upload_error is not None: + logger.error('storage.stream_upload_chunk returned error %s', upload_error) + return None # If we determined an uncompressed size and this is the first chunk, add it to the blob. # Otherwise, we clear the size from the blob as it was uploaded in multiple chunks. - if size_info is not None and found.chunk_count == 0 and size_info.is_valid: - found.uncompressed_byte_count = size_info.uncompressed_size + if size_info is not None and blob_upload.chunk_count == 0 and size_info.is_valid: + blob_upload.uncompressed_byte_count = size_info.uncompressed_size elif length_written > 0: # Otherwise, if we wrote some bytes and the above conditions were not met, then we don't # know the uncompressed size. - found.uncompressed_byte_count = None + blob_upload.uncompressed_byte_count = None if piece_hasher is not None: - found.piece_hashes = piece_hasher.piece_hashes - found.piece_sha_state = piece_hasher.hash_fragment + blob_upload.piece_hashes = piece_hasher.piece_hashes + blob_upload.piece_sha_state = piece_hasher.hash_fragment - found.storage_metadata = new_metadata - found.byte_count += length_written - found.chunk_count += 1 - return found, upload_error + blob_upload.storage_metadata = new_metadata + blob_upload.byte_count += length_written + blob_upload.chunk_count += 1 + return blob_upload -def _finish_upload(namespace_name, repo_name, upload_obj, expected_digest): - # Verify that the digest's SHA matches that of the uploaded data. - computed_digest = digest_tools.sha256_digest_from_hashlib(upload_obj.sha_state) +def _validate_digest(blob_upload, expected_digest): + """ + Verifies that the digest's SHA matches that of the uploaded data. + """ + computed_digest = digest_tools.sha256_digest_from_hashlib(blob_upload.sha_state) if not digest_tools.digests_equal(computed_digest, expected_digest): logger.error('Digest mismatch for upload %s: Expected digest %s, found digest %s', - upload_obj.uuid, expected_digest, computed_digest) + blob_upload.uuid, expected_digest, computed_digest) raise BlobUploadInvalid(detail={'reason': 'Digest mismatch on uploaded blob'}) + +def _finalize_blob_storage(blob_upload, expected_digest): + """ + When an upload is successful, this ends the uploading process from the + storage's perspective. + + Returns True if the blob already existed. + """ final_blob_location = digest_tools.content_path(expected_digest) # Move the storage into place, or if this was a re-upload, cancel it with database.CloseForLongOperation(app.config): - already_exists = storage.exists({upload_obj.location.name}, final_blob_location) - if already_exists: - # It already existed, clean up our upload which served as proof that we had the file - storage.cancel_chunked_upload({upload_obj.location.name}, upload_obj.uuid, - upload_obj.storage_metadata) + already_existed = storage.exists({blob_upload.location_name}, final_blob_location) + if already_existed: + # It already existed, clean up our upload which served as proof that the + # uploader had the blob. + storage.cancel_chunked_upload({blob_upload.location_name}, blob_upload.uuid, + blob_upload.storage_metadata) else: # We were the first ones to upload this image (at least to this location) # Let's copy it into place - storage.complete_chunked_upload({upload_obj.location.name}, upload_obj.uuid, - final_blob_location, upload_obj.storage_metadata) - - # Mark the blob as uploaded. - blob_storage = model.blob.store_blob_record_and_temp_link(namespace_name, repo_name, expected_digest, - upload_obj.location, - upload_obj.byte_count, - app.config['PUSH_TEMP_TAG_EXPIRATION_SEC'], - upload_obj.uncompressed_byte_count) - - if upload_obj.piece_sha_state is not None and not already_exists: - piece_bytes = upload_obj.piece_hashes + upload_obj.piece_sha_state.digest() - model.storage.save_torrent_info(blob_storage, app.config['BITTORRENT_PIECE_SIZE'], piece_bytes) - - # Delete the upload tracking row. - upload_obj.delete_instance() - - response = make_response('', 201) - response.headers['Docker-Content-Digest'] = expected_digest - response.headers['Location'] = url_for('v2.download_blob', - repository='%s/%s' % (namespace_name, repo_name), - digest=expected_digest) - return response + storage.complete_chunked_upload({blob_upload.location_name}, blob_upload.uuid, + final_blob_location, blob_upload.storage_metadata) + return already_existed -@v2_bp.route('//blobs/uploads/', methods=['PATCH']) -@parse_repository_name() -@process_registry_jwt_auth(scopes=['pull', 'push']) -@require_repo_write -@anon_protect -def upload_chunk(namespace_name, repo_name, upload_uuid): - blob_upload, upload_error = _upload_chunk(namespace_name, repo_name, upload_uuid) - blob_upload.save() +def _finalize_blob_database(namespace_name, repo_name, blob_upload, digest, already_existed): + """ + When an upload is successful, this ends the uploading process from the + database's perspective. + """ + # Create the blob and temporarily tag it. + blob_storage = model.create_blob_and_temp_tag( + namespace_name, + repo_name, + digest, + blob_upload, + app.config['PUSH_TEMP_TAG_EXPIRATION_SEC'], + ) - if upload_error: - logger.error('Got error when uploading chunk for blob %s under repository %s/%s: %s', - namespace_name, repo_name, upload_uuid, upload_error) - _range_not_satisfiable(blob_upload.byte_count) + # If it doesn't already exist, create the BitTorrent pieces for the blob. + if blob_upload.piece_sha_state is not None and not already_existed: + piece_bytes = blob_upload.piece_hashes + blob_upload.piece_sha_state.digest() + model.save_bittorrent_pieces(blob_storage, app.config['BITTORRENT_PIECE_SIZE'], piece_bytes) - accepted = make_response('', 204) - accepted.headers['Location'] = _current_request_path() - accepted.headers['Range'] = _render_range(blob_upload.byte_count, with_bytes_prefix=False) - accepted.headers['Docker-Upload-UUID'] = upload_uuid - return accepted + # Delete the blob upload. + model.delete_blob_upload(namespace_name, repo_name, blob_upload.uuid) -@v2_bp.route('//blobs/uploads/', methods=['PUT']) -@parse_repository_name() -@process_registry_jwt_auth(scopes=['pull', 'push']) -@require_repo_write -@anon_protect -def monolithic_upload_or_last_chunk(namespace_name, repo_name, upload_uuid): - digest = request.args.get('digest', None) - if digest is None: - raise BlobUploadInvalid(detail={'reason': 'Missing digest arg on monolithic upload'}) - - blob_upload, upload_error = _upload_chunk(namespace_name, repo_name, upload_uuid) - blob_upload.save() - - if upload_error: - logger.error('Got error when uploading chunk for blob %s under repository %s/%s: %s', - namespace_name, repo_name, upload_uuid, upload_error) - _range_not_satisfiable(blob_upload.byte_count) - - return _finish_upload(namespace_name, repo_name, blob_upload, digest) - - -@v2_bp.route('//blobs/uploads/', methods=['DELETE']) -@parse_repository_name() -@process_registry_jwt_auth(scopes=['pull', 'push']) -@require_repo_write -@anon_protect -def cancel_upload(namespace_name, repo_name, upload_uuid): - try: - found = model.blob.get_blob_upload(namespace_name, repo_name, upload_uuid) - except model.InvalidBlobUpload: - raise BlobUploadUnknown() - - # We delete the record for the upload first, since if the partial upload in - # storage fails to delete, it doesn't break anything - found.delete_instance() - storage.cancel_chunked_upload({found.location.name}, found.uuid, found.storage_metadata) - - return make_response('', 204) - - - -@v2_bp.route('//blobs/', methods=['DELETE']) -@parse_repository_name() -@process_registry_jwt_auth(scopes=['pull', 'push']) -@require_repo_write -@anon_protect -def delete_digest(namespace_name, repo_name, upload_uuid): - # We do not support deleting arbitrary digests, as they break repo images. - raise Unsupported() +def _finish_upload(namespace_name, repo_name, blob_upload, digest): + """ + When an upload is successful, this ends the uploading process. + """ + _validate_digest(blob_upload, digest) + _finalize_blob_database( + namespace_name, + repo_name, + blob_upload, + digest, + _finalize_blob_storage(blob_upload, digest), + ) diff --git a/endpoints/v2/catalog.py b/endpoints/v2/catalog.py index c49b4091a..8ae243460 100644 --- a/endpoints/v2/catalog.py +++ b/endpoints/v2/catalog.py @@ -1,30 +1,25 @@ -from flask import jsonify, url_for +from flask import jsonify -from endpoints.v2 import v2_bp from auth.registry_jwt_auth import process_registry_jwt_auth, get_granted_entity from endpoints.decorators import anon_protect -from data import model -from endpoints.v2.v2util import add_pagination +from endpoints.v2 import v2_bp, paginate +from data.interfaces.v2 import pre_oci_model as model @v2_bp.route('/_catalog', methods=['GET']) @process_registry_jwt_auth() @anon_protect -def catalog_search(): - url = url_for('v2.catalog_search') - +@paginate() +def catalog_search(limit, offset, pagination_callback): username = None entity = get_granted_entity() if entity: username = entity.user.username - query = model.repository.get_visible_repositories(username, include_public=(username is None)) - link, query = add_pagination(query, url) - + visible_repositories = model.get_visible_repositories(username, limit+1, offset) response = jsonify({ - 'repositories': ['%s/%s' % (repo.namespace_user.username, repo.name) for repo in query], + 'repositories': ['%s/%s' % (repo.namespace_name, repo.name) + for repo in visible_repositories][0:limit], }) - if link is not None: - response.headers['Link'] = link - + pagination_callback(len(visible_repositories), response) return response diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index b1302d1b8..ff3987620 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -1,34 +1,29 @@ import logging -import json -import hashlib -from collections import namedtuple, OrderedDict -from datetime import datetime from functools import wraps -import jwt.utils - -from peewee import IntegrityError -from flask import make_response, request, url_for -from jwkest.jws import SIGNER_ALGS, keyrep +from flask import request, url_for, Response import features from app import docker_v2_signing_key, app, metric_queue from auth.registry_jwt_auth import process_registry_jwt_auth +from data.interfaces.v2 import pre_oci_model as model, Label +from digest import digest_tools from endpoints.common import parse_repository_name from endpoints.decorators import anon_protect from endpoints.v2 import v2_bp, require_repo_read, require_repo_write from endpoints.v2.errors import (BlobUnknown, ManifestInvalid, ManifestUnknown, TagInvalid, - NameInvalid, TagAlreadyExists) + NameInvalid) from endpoints.trackhelper import track_and_log from endpoints.notificationhelper import spawn_notification -from util.registry.replication import queue_storage_replication +from image.docker import ManifestException +from image.docker.schema1 import DockerSchema1Manifest, DockerSchema1ManifestBuilder +from image.docker.schema2 import DOCKER_SCHEMA2_CONTENT_TYPES from util.names import VALID_TAG_PATTERN -from digest import digest_tools -from data import model -from data.model import TagAlreadyCreatedException -from data.database import RepositoryTag +from util.registry.replication import queue_storage_replication +from util.validation import is_json + logger = logging.getLogger(__name__) @@ -37,236 +32,32 @@ BASE_MANIFEST_ROUTE = '//manifests/ 1: - self._namespace, self._repo_name = repo_name_tuple - elif len(repo_name_tuple) == 1: - self._namespace = '' - self._repo_name = repo_name_tuple[0] - else: - raise ValueError('repo_name has too many or too few pieces') - - if validate: - self._validate() - - def _validate(self): - for signature in self._signatures: - bytes_to_verify = '{0}.{1}'.format(signature['protected'], - jwt.utils.base64url_encode(self.payload)) - signer = SIGNER_ALGS[signature['header']['alg']] - key = keyrep(signature['header']['jwk']) - gk = key.get_key() - sig = jwt.utils.base64url_decode(signature['signature'].encode('utf-8')) - verified = signer.verify(bytes_to_verify, sig, gk) - if not verified: - raise ValueError('manifest file failed signature verification') - - @property - def signatures(self): - return self._signatures - - @property - def namespace(self): - return self._namespace - - @property - def repo_name(self): - return self._repo_name - - @property - def tag(self): - return self._tag - - @property - def bytes(self): - return self._bytes - - @property - def digest(self): - return digest_tools.sha256_digest(self.payload) - - @property - def layers(self): - """ Returns a generator of objects that have the blobSum and v1Compatibility keys in them, - starting from the base image and working toward the leaf node. - """ - for blob_sum_obj, history_obj in reversed(zip(self._parsed[_FS_LAYERS_KEY], - self._parsed[_HISTORY_KEY])): - - try: - image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[_BLOB_SUM_KEY]) - except digest_tools.InvalidDigestException: - err_message = 'could not parse manifest digest: %s' % blob_sum_obj[_BLOB_SUM_KEY] - raise ManifestInvalid(detail={'message': err_message}) - - metadata_string = history_obj[_V1_COMPAT_KEY] - - v1_metadata = json.loads(metadata_string) - command_list = v1_metadata.get('container_config', {}).get('Cmd', None) - command = json.dumps(command_list) if command_list else None - - if not 'id' in v1_metadata: - raise ManifestInvalid(detail={'message': 'invalid manifest v1 history'}) - - labels = v1_metadata.get('config', {}).get('Labels', {}) or {} - extracted = ExtractedV1Metadata(v1_metadata['id'], v1_metadata.get('parent'), - v1_metadata.get('created'), v1_metadata.get('comment'), - command, labels) - yield ImageMetadata(image_digest, extracted, metadata_string) - - @property - def payload(self): - protected = str(self._signatures[0][_PROTECTED_KEY]) - parsed_protected = json.loads(jwt.utils.base64url_decode(protected)) - signed_content_head = self._bytes[:parsed_protected[_FORMAT_LENGTH_KEY]] - signed_content_tail = jwt.utils.base64url_decode(str(parsed_protected[_FORMAT_TAIL_KEY])) - return signed_content_head + signed_content_tail - - -class SignedManifestBuilder(object): - """ Class which represents a manifest which is currently being built. - """ - def __init__(self, namespace_name, repo_name, tag, architecture='amd64', schema_ver=1): - repo_name_key = '{0}/{1}'.format(namespace_name, repo_name) - if namespace_name == '': - repo_name_key = repo_name - - self._base_payload = { - _REPO_TAG_KEY: tag, - _REPO_NAME_KEY: repo_name_key, - _ARCH_KEY: architecture, - _SCHEMA_VER: schema_ver, - } - - self._fs_layer_digests = [] - self._history = [] - - def add_layer(self, layer_digest, v1_json_metadata): - self._fs_layer_digests.append({ - _BLOB_SUM_KEY: layer_digest, - }) - self._history.append({ - _V1_COMPAT_KEY: v1_json_metadata, - }) - return self - - - def build(self, json_web_key): - """ Build the payload and sign it, returning a SignedManifest object. - """ - payload = OrderedDict(self._base_payload) - payload.update({ - _HISTORY_KEY: self._history, - _FS_LAYERS_KEY: self._fs_layer_digests, - }) - - payload_str = json.dumps(payload, indent=3) - - split_point = payload_str.rfind('\n}') - - protected_payload = { - 'formatTail': jwt.utils.base64url_encode(payload_str[split_point:]), - 'formatLength': split_point, - 'time': datetime.utcnow().strftime(ISO_DATETIME_FORMAT_ZULU), - } - protected = jwt.utils.base64url_encode(json.dumps(protected_payload)) - logger.debug('Generated protected block: %s', protected) - - bytes_to_sign = '{0}.{1}'.format(protected, jwt.utils.base64url_encode(payload_str)) - - signer = SIGNER_ALGS[JWS_ALGORITHM] - signature = jwt.utils.base64url_encode(signer.sign(bytes_to_sign, json_web_key.get_key())) - logger.debug('Generated signature: %s', signature) - - public_members = set(json_web_key.public_members) - public_key = {comp: value for comp, value in json_web_key.to_dict().items() - if comp in public_members} - - signature_block = { - 'header': { - 'jwk': public_key, - 'alg': JWS_ALGORITHM, - }, - 'signature': signature, - _PROTECTED_KEY: protected, - } - - logger.debug('Encoded signature block: %s', json.dumps(signature_block)) - - payload.update({ - _SIGNATURES_KEY: [signature_block], - }) - - return SignedManifest(json.dumps(payload, indent=3)) - - @v2_bp.route(MANIFEST_TAGNAME_ROUTE, methods=['GET']) @parse_repository_name() @process_registry_jwt_auth(scopes=['pull']) @require_repo_read @anon_protect def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref): - try: - manifest = model.tag.load_tag_manifest(namespace_name, repo_name, manifest_ref) - except model.InvalidManifestException: - try: - model.tag.get_active_tag(namespace_name, repo_name, manifest_ref) - except RepositoryTag.DoesNotExist: + manifest = model.get_manifest_by_tag(namespace_name, repo_name, manifest_ref) + if manifest is None: + has_tag = model.has_active_tag(namespace_name, repo_name, manifest_ref) + if not has_tag: raise ManifestUnknown() - try: - manifest = _generate_and_store_manifest(namespace_name, repo_name, manifest_ref) - except model.DataModelException: - logger.exception('Exception when generating manifest for %s/%s:%s', namespace_name, repo_name, - manifest_ref) + manifest = _generate_and_store_manifest(namespace_name, repo_name, manifest_ref) + if manifest is None: raise ManifestUnknown() - repo = model.repository.get_repository(namespace_name, repo_name) + repo = model.get_repository(namespace_name, repo_name) if repo is not None: track_and_log('pull_repo', repo, analytics_name='pull_repo_100x', analytics_sample=0.01) metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2']) - response = make_response(manifest.json_data, 200) - response.headers['Content-Type'] = MANIFEST_CONTENT_TYPE - response.headers['Docker-Content-Digest'] = manifest.digest - return response + return Response( + manifest.json, + status=200, + headers={'Content-Type': manifest.media_type, 'Docker-Content-Digest': manifest.digest}, + ) @v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['GET']) @@ -275,27 +66,24 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref): @require_repo_read @anon_protect def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref): - try: - manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, manifest_ref) - except model.InvalidManifestException: + manifest = model.get_manifest_by_digest(namespace_name, repo_name, manifest_ref) + if manifest is None: # Without a tag name to reference, we can't make an attempt to generate the manifest raise ManifestUnknown() - repo = model.repository.get_repository(namespace_name, repo_name) + repo = model.get_repository(namespace_name, repo_name) if repo is not None: track_and_log('pull_repo', repo) metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2']) - response = make_response(manifest.json_data, 200) - response.headers['Content-Type'] = MANIFEST_CONTENT_TYPE - response.headers['Docker-Content-Digest'] = manifest.digest - return response + return Response(manifest.json, status=200, headers={'Content-Type': manifest.media_type, + 'Docker-Content-Digest': manifest.digest}) def _reject_manifest2_schema2(func): @wraps(func) def wrapped(*args, **kwargs): - if request.content_type in MANIFEST2_SCHEMA2_CONTENT_TYPES: + if request.content_type in DOCKER_SCHEMA2_CONTENT_TYPES: raise ManifestInvalid(detail={'message': 'manifest schema version not supported'}, http_status_code=415) return func(*args, **kwargs) @@ -310,14 +98,14 @@ def _reject_manifest2_schema2(func): @anon_protect def write_manifest_by_tagname(namespace_name, repo_name, manifest_ref): try: - manifest = SignedManifest(request.data) - except ValueError: - raise ManifestInvalid(detail={'message': 'could not parse manifest'}) + manifest = DockerSchema1Manifest(request.data) + except ManifestException as me: + raise ManifestInvalid(detail={'message': me.message}) if manifest.tag != manifest_ref: raise TagInvalid() - return _write_manifest(namespace_name, repo_name, manifest) + return _write_manifest_and_log(namespace_name, repo_name, manifest) @v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['PUT']) @@ -328,37 +116,20 @@ def write_manifest_by_tagname(namespace_name, repo_name, manifest_ref): @anon_protect def write_manifest_by_digest(namespace_name, repo_name, manifest_ref): try: - manifest = SignedManifest(request.data) - except ValueError: - raise ManifestInvalid(detail={'message': 'could not parse manifest'}) + manifest = DockerSchema1Manifest(request.data) + except ManifestException as me: + raise ManifestInvalid(detail={'message': me.message}) if manifest.digest != manifest_ref: raise ManifestInvalid(detail={'message': 'manifest digest mismatch'}) - return _write_manifest(namespace_name, repo_name, manifest) + return _write_manifest_and_log(namespace_name, repo_name, manifest) -def _updated_v1_metadata(v1_metadata_json, updated_id_map): - parsed = json.loads(v1_metadata_json) - parsed['id'] = updated_id_map[parsed['id']] - - if parsed.get('parent') and parsed['parent'] in updated_id_map: - parsed['parent'] = updated_id_map[parsed['parent']] - - if parsed.get('container_config', {}).get('Image'): - existing_image = parsed['container_config']['Image'] - if existing_image in updated_id_map: - parsed['container_config']['image'] = updated_id_map[existing_image] - - return json.dumps(parsed) - - -def _write_manifest_itself(namespace_name, repo_name, manifest): - # Ensure that the manifest is for this repository. If the manifest's namespace is empty, then - # it is for the library namespace and we need an extra check. - if (manifest.namespace == '' and features.LIBRARY_SUPPORT and +def _write_manifest(namespace_name, repo_name, manifest): + if (manifest.namespace == '' and + features.LIBRARY_SUPPORT and namespace_name == app.config['LIBRARY_NAMESPACE']): - # This is a library manifest. All good. pass elif manifest.namespace != namespace_name: raise NameInvalid() @@ -367,136 +138,80 @@ def _write_manifest_itself(namespace_name, repo_name, manifest): raise NameInvalid() # Ensure that the repository exists. - repo = model.repository.get_repository(namespace_name, repo_name) + repo = model.get_repository(namespace_name, repo_name) if repo is None: raise NameInvalid() - # Lookup all the images and their parent images (if any) inside the manifest. This will let us - # know which V1 images we need to synthesize and which ones are invalid. - layers = list(manifest.layers) - - docker_image_ids = {mdata.v1_metadata.docker_id for mdata in layers} - parent_image_ids = {mdata.v1_metadata.parent for mdata in layers - if mdata.v1_metadata.parent} - all_image_ids = list(docker_image_ids | parent_image_ids) - - images_query = model.image.lookup_repository_images(repo, all_image_ids) - images_map = {image.docker_image_id: image for image in images_query} - - # Lookup the storages associated with each blob in the manifest. - checksums = list({str(mdata.digest) for mdata in manifest.layers}) - storage_query = model.storage.lookup_repo_storages_by_content_checksum(repo, checksums) - storage_map = {storage.content_checksum: storage for storage in storage_query} - - # Ensure that we have valid V1 docker IDs. If Docker gives us a V1 layer ID pointing to - # a storage with a content checksum different from the existing, then we need to rewrite - # the Docker ID to ensure consistency. - tag_name = manifest.tag - has_rewritten_ids = False - updated_id_map = {} - - # Synthesized image id hash. Can be used to pull a "content addressable" image id out of thin air. - digest_history = hashlib.sha256() - - for mdata in layers: - digest_str = str(mdata.digest) - v1_mdata = mdata.v1_metadata - working_docker_id = v1_mdata.docker_id - - # Update our digest_history hash for the new layer data. - digest_history.update(digest_str) - digest_history.update("@") - digest_history.update(mdata.v1_metadata_str.encode('utf-8')) - digest_history.update("|") - - # Ensure that all blobs exist. - blob_storage = storage_map.get(digest_str) - if blob_storage is None: - raise BlobUnknown(detail={'digest': digest_str}) - - # Ensure that the V1 image's storage matches the V2 blob. If not, we've found - # a data inconsistency and need to create a new layer ID for the V1 image, and all images - # that follow it in the ancestry chain. - if ((v1_mdata.docker_id in images_map and - images_map[v1_mdata.docker_id].storage.content_checksum != digest_str) or - has_rewritten_ids): - - working_docker_id = digest_history.hexdigest() - logger.warning('Rewriting docker_id %s/%s %s -> %s', namespace_name, repo_name, - v1_mdata.docker_id, working_docker_id) - has_rewritten_ids = True - - # Store the new docker id in the map - updated_id_map[v1_mdata.docker_id] = working_docker_id - - # Lookup the parent image for the layer, if any. - parent_image = None - if v1_mdata.parent is not None: - parent_image = images_map.get(v1_mdata.parent) - if parent_image is None: - msg = 'Parent not found with docker image id {0}'.format(v1_mdata.parent) - raise ManifestInvalid(detail={'message': msg}) - - # Synthesize and store the v1 metadata in the db. - v1_metadata_json = mdata.v1_metadata_str - if has_rewritten_ids: - v1_metadata_json = _updated_v1_metadata(mdata.v1_metadata_str, updated_id_map) - - image = model.image.synthesize_v1_image(repo, blob_storage, working_docker_id, - v1_mdata.created, v1_mdata.comment, v1_mdata.command, - v1_metadata_json, parent_image) - images_map[v1_mdata.docker_id] = image - - if not layers: - # The manifest doesn't actually reference any layers! + if not manifest.layers: raise ManifestInvalid(detail={'message': 'manifest does not reference any layers'}) - # Store the manifest pointing to the tag. - manifest_digest = manifest.digest - leaf_layer_id = images_map[layers[-1].v1_metadata.docker_id].docker_image_id + # Ensure all the blobs in the manifest exist. + storage_map = model.lookup_blobs_by_digest(namespace_name, repo_name, manifest.checksums) + for layer in manifest.layers: + digest_str = str(layer.digest) + if digest_str not in storage_map: + raise BlobUnknown(detail={'digest': digest_str}) + # Lookup all the images and their parent images (if any) inside the manifest. + # This will let us know which v1 images we need to synthesize and which ones are invalid. + all_image_ids = list(manifest.parent_image_ids | manifest.image_ids) + images_map = model.get_docker_v1_metadata_by_image_id(namespace_name, repo_name, all_image_ids) + + # Rewrite any v1 image IDs that do not match the checksum in the database. try: - tag_manifest, manifest_created = model.tag.store_tag_manifest(namespace_name, repo_name, - tag_name, leaf_layer_id, - manifest_digest, manifest.bytes) - except TagAlreadyCreatedException: - logger.warning('Tag %s was already created under repository %s/%s pointing to image %s', - tag_name, namespace_name, repo_name, leaf_layer_id) - raise TagAlreadyExists() + rewritten_images = list(manifest.rewrite_invalid_image_ids(images_map)) + for rewritten_image in rewritten_images: + model.synthesize_v1_image( + repo, + storage_map[rewritten_image.content_checksum], + rewritten_image.image_id, + rewritten_image.created, + rewritten_image.comment, + rewritten_image.command, + rewritten_image.compat_json, + rewritten_image.parent_image_id, + ) + except ManifestException as me: + raise ManifestInvalid(detail={'message': me.message}) - if manifest_created: - for key, value in layers[-1].v1_metadata.labels.iteritems(): - model.label.create_manifest_label(tag_manifest, key, value, 'manifest') + # Store the manifest pointing to the tag. + leaf_layer_id = rewritten_images[-1].image_id + newly_created = model.save_manifest(namespace_name, repo_name, manifest.tag, leaf_layer_id, + manifest.digest, manifest.bytes) + if newly_created: + labels = [] + for key, value in manifest.layers[-1].v1_metadata.labels.iteritems(): + media_type = 'application/json' if is_json(value) else 'text/plain' + labels.append(Label(key=key, value=value, source_type='manifest', media_type=media_type)) + model.create_manifest_labels(namespace_name, repo_name, manifest.digest, labels) + + return repo, storage_map + + +def _write_manifest_and_log(namespace_name, repo_name, manifest): + repo, storage_map = _write_manifest(namespace_name, repo_name, manifest) # Queue all blob manifests for replication. # TODO(jschorr): Find a way to optimize this insertion. if features.STORAGE_REPLICATION: - for mdata in layers: - digest_str = str(mdata.digest) - blob_storage = storage_map.get(digest_str) - queue_storage_replication(namespace_name, blob_storage) + for layer in manifest.layers: + digest_str = str(layer.digest) + queue_storage_replication(namespace_name, storage_map[digest_str]) - return (repo, tag_name, manifest_digest) - - -def _write_manifest(namespace_name, repo_name, manifest): - (repo, tag_name, manifest_digest) = _write_manifest_itself(namespace_name, repo_name, manifest) - - # Spawn the repo_push event. - event_data = { - 'updated_tags': [tag_name], - } - - track_and_log('push_repo', repo, tag=tag_name) - spawn_notification(repo, 'repo_push', event_data) + track_and_log('push_repo', repo, tag=manifest.tag) + spawn_notification(repo, 'repo_push', {'updated_tags': [manifest.tag]}) metric_queue.repository_push.Inc(labelvalues=[namespace_name, repo_name, 'v2']) - response = make_response('OK', 202) - response.headers['Docker-Content-Digest'] = manifest_digest - response.headers['Location'] = url_for('v2.fetch_manifest_by_digest', - repository='%s/%s' % (namespace_name, repo_name), - manifest_ref=manifest_digest) - return response + return Response( + 'OK', + status=202, + headers={ + 'Docker-Content-Digest': manifest.digest, + 'Location': url_for('v2.fetch_manifest_by_digest', + repository='%s/%s' % (namespace_name, repo_name), + manifest_ref=manifest.digest), + }, + ) @v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['DELETE']) @@ -505,32 +220,27 @@ def _write_manifest(namespace_name, repo_name, manifest): @require_repo_write @anon_protect def delete_manifest_by_digest(namespace_name, repo_name, manifest_ref): - """ Delete the manifest specified by the digest. Note: there is no equivalent - method for deleting by tag name because it is forbidden by the spec. """ - try: - manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, manifest_ref) - except model.InvalidManifestException: - # Without a tag name to reference, we can't make an attempt to generate the manifest + Delete the manifest specified by the digest. + + Note: there is no equivalent method for deleting by tag name because it is + forbidden by the spec. + """ + tags = model.delete_manifest_by_digest(namespace_name, repo_name, manifest_ref) + if not tags: raise ManifestUnknown() - # Mark the tag as no longer alive. - try: - model.tag.delete_tag(namespace_name, repo_name, manifest.tag.name) - except model.DataModelException: - # Tag is not alive. - raise ManifestUnknown() + for tag in tags: + track_and_log('delete_tag', tag.repository, tag=tag.name, digest=manifest_ref) - track_and_log('delete_tag', manifest.tag.repository, - tag=manifest.tag.name, digest=manifest_ref) - - return make_response('', 202) + return Response(status=202) def _generate_and_store_manifest(namespace_name, repo_name, tag_name): - # First look up the tag object and its ancestors - image = model.tag.get_tag_image(namespace_name, repo_name, tag_name, include_storage=True) - parents = model.image.get_parent_images(namespace_name, repo_name, image) + # Find the v1 metadata for this image and its parents. + v1_metadata = model.get_docker_v1_metadata_by_tag(namespace_name, repo_name, tag_name) + parents_v1_metadata = model.get_parents_docker_v1_metadata(namespace_name, repo_name, + v1_metadata.image_id) # If the manifest is being generated under the library namespace, then we make its namespace # empty. @@ -539,26 +249,21 @@ def _generate_and_store_manifest(namespace_name, repo_name, tag_name): manifest_namespace = '' # Create and populate the manifest builder - builder = SignedManifestBuilder(manifest_namespace, repo_name, tag_name) + builder = DockerSchema1ManifestBuilder(manifest_namespace, repo_name, tag_name) # Add the leaf layer - builder.add_layer(image.storage.content_checksum, image.v1_json_metadata) + builder.add_layer(v1_metadata.content_checksum, v1_metadata.compat_json) - for parent in parents: - builder.add_layer(parent.storage.content_checksum, parent.v1_json_metadata) + for parent_v1_metadata in parents_v1_metadata: + builder.add_layer(parent_v1_metadata.content_checksum, parent_v1_metadata.compat_json) # Sign the manifest with our signing key. manifest = builder.build(docker_v2_signing_key) - # Write the manifest to the DB. If an existing manifest already exists, return the - # one found. - try: - return model.tag.associate_generated_tag_manifest(namespace_name, repo_name, tag_name, - manifest.digest, manifest.bytes) - except IntegrityError as ie: - logger.debug('Got integrity error: %s', ie) - try: - return model.tag.load_tag_manifest(namespace_name, repo_name, tag_name) - except model.InvalidManifestException: - logger.exception('Exception when generating manifest') - raise model.DataModelException('Could not load or generate manifest') + # Write the manifest to the DB. + model.create_manifest_and_update_tag(namespace_name, repo_name, tag_name, manifest.digest, + manifest.bytes) + return manifest + +def _determine_media_type(value): + media_type_name = 'application/json' if is_json(value) else 'text/plain' diff --git a/endpoints/v2/tag.py b/endpoints/v2/tag.py index 44e87c7c0..6b1ce20ad 100644 --- a/endpoints/v2/tag.py +++ b/endpoints/v2/tag.py @@ -1,33 +1,28 @@ -from flask import jsonify, url_for +from flask import jsonify from auth.registry_jwt_auth import process_registry_jwt_auth from endpoints.common import parse_repository_name -from endpoints.v2 import v2_bp, require_repo_read +from endpoints.v2 import v2_bp, require_repo_read, paginate from endpoints.v2.errors import NameUnknown -from endpoints.v2.v2util import add_pagination from endpoints.decorators import anon_protect -from data import model +from data.interfaces.v2 import pre_oci_model as model @v2_bp.route('//tags/list', methods=['GET']) @parse_repository_name() @process_registry_jwt_auth(scopes=['pull']) @require_repo_read @anon_protect -def list_all_tags(namespace_name, repo_name): - repository = model.repository.get_repository(namespace_name, repo_name) - if repository is None: +@paginate() +def list_all_tags(namespace_name, repo_name, limit, offset, pagination_callback): + repo = model.get_repository(namespace_name, repo_name) + if repo is None: raise NameUnknown() - query = model.tag.list_repository_tags(namespace_name, repo_name) - url = url_for('v2.list_all_tags', repository='%s/%s' % (namespace_name, repo_name)) - link, query = add_pagination(query, url) - + tags = model.repository_tags(namespace_name, repo_name, limit, offset) response = jsonify({ 'name': '{0}/{1}'.format(namespace_name, repo_name), - 'tags': [tag.name for tag in query], + 'tags': [tag.name for tag in tags], }) - if link is not None: - response.headers['Link'] = link - + pagination_callback(len(tags), response) return response diff --git a/endpoints/v2/v2auth.py b/endpoints/v2/v2auth.py index abae60b65..83445fd81 100644 --- a/endpoints/v2/v2auth.py +++ b/endpoints/v2/v2auth.py @@ -1,17 +1,17 @@ import logging import re +from cachetools import lru_cache from flask import request, jsonify, abort from app import app, userevents, instance_keys -from data import model from auth.auth import process_auth from auth.auth_context import get_authenticated_user, get_validated_token, get_validated_oauth_token from auth.permissions import (ModifyRepositoryPermission, ReadRepositoryPermission, CreateRepositoryPermission) -from cachetools import lru_cache from endpoints.v2 import v2_bp from endpoints.decorators import anon_protect +from data.interfaces.v2 import pre_oci_model as model from util.cache import no_cache from util.names import parse_namespace_repository, REPOSITORY_NAME_REGEX from util.security.registry_jwt import generate_bearer_token, build_context_and_subject @@ -21,9 +21,7 @@ logger = logging.getLogger(__name__) TOKEN_VALIDITY_LIFETIME_S = 60 * 60 # 1 hour -SCOPE_REGEX_TEMPLATE = ( - r'^repository:((?:{}\/)?((?:[\.a-zA-Z0-9_\-]+\/)?[\.a-zA-Z0-9_\-]+)):((?:push|pull|\*)(?:,(?:push|pull|\*))*)$' -) +SCOPE_REGEX_TEMPLATE = r'^repository:((?:{}\/)?((?:[\.a-zA-Z0-9_\-]+\/)?[\.a-zA-Z0-9_\-]+)):((?:push|pull|\*)(?:,(?:push|pull|\*))*)$' @lru_cache(maxsize=1) @@ -38,8 +36,9 @@ def get_scope_regex(): @no_cache @anon_protect def generate_registry_jwt(): - """ This endpoint will generate a JWT conforming to the Docker registry v2 auth spec: - https://docs.docker.com/registry/spec/auth/token/ + """ + This endpoint will generate a JWT conforming to the Docker Registry v2 Auth Spec: + https://docs.docker.com/registry/spec/auth/token/ """ audience_param = request.args.get('service') logger.debug('Request audience: %s', audience_param) @@ -97,7 +96,7 @@ def generate_registry_jwt(): if user is not None or token is not None: # Lookup the repository. If it exists, make sure the entity has modify # permission. Otherwise, make sure the entity has create permission. - repo = model.repository.get_repository(namespace, reponame) + repo = model.get_repository(namespace, reponame) if repo: if ModifyRepositoryPermission(namespace, reponame).can(): final_actions.append('push') @@ -106,7 +105,7 @@ def generate_registry_jwt(): else: if CreateRepositoryPermission(namespace).can() and user is not None: logger.debug('Creating repository: %s/%s', namespace, reponame) - model.repository.create_repository(namespace, reponame, user) + model.create_repository(namespace, reponame, user) final_actions.append('push') else: logger.debug('No permission to create repository %s/%s', namespace, reponame) @@ -114,7 +113,7 @@ def generate_registry_jwt(): if 'pull' in actions: # Grant pull if the user can read the repo or it is public. if (ReadRepositoryPermission(namespace, reponame).can() or - model.repository.repository_is_public(namespace, reponame)): + model.repository_is_public(namespace, reponame)): final_actions.append('pull') else: logger.debug('No permission to pull repository %s/%s', namespace, reponame) diff --git a/endpoints/v2/v2util.py b/endpoints/v2/v2util.py deleted file mode 100644 index df4a70fb9..000000000 --- a/endpoints/v2/v2util.py +++ /dev/null @@ -1,42 +0,0 @@ -from flask import request -from app import get_app_url -from util.pagination import encrypt_page_token, decrypt_page_token -import urllib -import logging - -_MAX_RESULTS_PER_PAGE = 50 - -def add_pagination(query, url): - """ Adds optional pagination to the given query by looking for the Docker V2 pagination request - args. - """ - try: - requested_limit = int(request.args.get('n', _MAX_RESULTS_PER_PAGE)) - except ValueError: - requested_limit = 0 - - limit = max(min(requested_limit, _MAX_RESULTS_PER_PAGE), 1) - next_page_token = request.args.get('next_page', None) - - # Decrypt the next page token, if any. - offset = 0 - page_info = decrypt_page_token(next_page_token) - if page_info is not None: - # Note: we use offset here instead of ID >= n because one of the V2 queries is a UNION. - offset = page_info.get('offset', 0) - query = query.offset(offset) - - query = query.limit(limit + 1) - url = get_app_url() + url - - results = list(query) - if len(results) <= limit: - return None, results - - # Add a link to the next page of results. - page_info = dict(offset=limit + offset) - next_page_token = encrypt_page_token(page_info) - - link = url + '?' + urllib.urlencode(dict(n=limit, next_page=next_page_token)) - link = link + '; rel="next"' - return link, results[0:-1] diff --git a/endpoints/verbs.py b/endpoints/verbs/__init__.py similarity index 60% rename from endpoints/verbs.py rename to endpoints/verbs/__init__.py index eff2ca35c..fadf63bbc 100644 --- a/endpoints/verbs.py +++ b/endpoints/verbs/__init__.py @@ -1,5 +1,4 @@ import logging -import json import hashlib from flask import redirect, Blueprint, abort, send_file, make_response, request @@ -10,39 +9,35 @@ from app import app, signer, storage, metric_queue from auth.auth import process_auth from auth.auth_context import get_authenticated_user from auth.permissions import ReadRepositoryPermission -from data import model, database -from endpoints.trackhelper import track_and_log +from data import database +from data.interfaces.verbs import pre_oci_model as model +from endpoints.common import route_show_if, parse_repository_name from endpoints.decorators import anon_protect +from endpoints.trackhelper import track_and_log +from endpoints.v2.blob import BLOB_DIGEST_ROUTE +from image.appc import AppCImageFormatter +from image.docker.squashed import SquashedDockerImageFormatter +from storage import Storage +from util.registry.filelike import wrap_with_handler from util.registry.queuefile import QueueFile from util.registry.queueprocess import QueueProcess from util.registry.torrent import (make_torrent, per_user_torrent_filename, public_torrent_filename, PieceHasher) -from util.registry.filelike import wrap_with_handler -from formats.squashed import SquashedDockerImage -from formats.aci import ACIImage -from storage import Storage -from endpoints.v2.blob import BLOB_DIGEST_ROUTE -from endpoints.common import route_show_if, parse_repository_name verbs = Blueprint('verbs', __name__) logger = logging.getLogger(__name__) -def _open_stream(formatter, namespace, repository, tag, synthetic_image_id, image_json, repo_image, - handlers): - """ This method generates a stream of data which will be replicated and read from the queue files. - This method runs in a separate process. +def _open_stream(formatter, namespace, repository, tag, derived_image_id, repo_image, handlers): + """ + This method generates a stream of data which will be replicated and read from the queue files. + This method runs in a separate process. """ # For performance reasons, we load the full image list here, cache it, then disconnect from # the database. with database.UseThenDisconnect(app.config): - image_list = list(model.image.get_parent_images_with_placements(namespace, repository, - repo_image)) - image_list.insert(0, repo_image) - - def get_image_json(image): - return json.loads(image.v1_json_metadata) + image_list = list(model.get_manifest_layers_with_blobs(repo_image)) def get_next_image(): for current_image in image_list: @@ -51,18 +46,16 @@ def _open_stream(formatter, namespace, repository, tag, synthetic_image_id, imag def get_next_layer(): # Re-Initialize the storage engine because some may not respond well to forking (e.g. S3) store = Storage(app, metric_queue) - for current_image_entry in image_list: - current_image_path = model.storage.get_layer_path(current_image_entry.storage) - current_image_stream = store.stream_read_file(current_image_entry.storage.locations, + for current_image in image_list: + current_image_path = model.get_blob_path(current_image.blob) + current_image_stream = store.stream_read_file(current_image.blob.locations, current_image_path) - current_image_id = current_image_entry.id - logger.debug('Returning image layer %s (%s): %s', current_image_id, - current_image_entry.docker_image_id, current_image_path) + logger.debug('Returning image layer %s: %s', current_image.image_id, current_image_path) yield current_image_stream - stream = formatter.build_stream(namespace, repository, tag, synthetic_image_id, image_json, - get_next_image, get_next_layer, get_image_json) + stream = formatter.build_stream(namespace, repository, tag, repo_image, derived_image_id, + get_next_image, get_next_layer) for handler_fn in handlers: stream = wrap_with_handler(stream, handler_fn) @@ -70,75 +63,58 @@ def _open_stream(formatter, namespace, repository, tag, synthetic_image_id, imag return stream.read -def _sign_synthetic_image(verb, linked_storage_uuid, queue_file): +def _sign_derived_image(verb, derived_image, queue_file): """ Read from the queue file and sign the contents which are generated. This method runs in a separate process. """ signature = None try: signature = signer.detached_sign(queue_file) except: - logger.exception('Exception when signing %s image %s', verb, linked_storage_uuid) + logger.exception('Exception when signing %s deriving image %s', verb, derived_image.ref) return # Setup the database (since this is a new process) and then disconnect immediately # once the operation completes. if not queue_file.raised_exception: with database.UseThenDisconnect(app.config): - try: - derived = model.storage.get_storage_by_uuid(linked_storage_uuid) - except model.storage.InvalidImageException: - return - - signature_entry = model.storage.find_or_create_storage_signature(derived, signer.name) - signature_entry.signature = signature - signature_entry.uploading = False - signature_entry.save() + model.set_derived_image_signature(derived_image, signer.name, signature) -def _write_synthetic_image_to_storage(verb, linked_storage_uuid, linked_locations, queue_file): +def _write_derived_image_to_storage(verb, derived_image, queue_file): """ Read from the generated stream and write it back to the storage engine. This method runs in a separate process. """ def handle_exception(ex): - logger.debug('Exception when building %s image %s: %s', verb, linked_storage_uuid, ex) + logger.debug('Exception when building %s derived image %s: %s', verb, derived_image.ref, ex) with database.UseThenDisconnect(app.config): - model.image.delete_derived_storage_by_uuid(linked_storage_uuid) + model.delete_derived_image(derived_image) queue_file.add_exception_handler(handle_exception) # Re-Initialize the storage engine because some may not respond well to forking (e.g. S3) store = Storage(app, metric_queue) - image_path = store.v1_image_layer_path(linked_storage_uuid) - store.stream_write(linked_locations, image_path, queue_file) + image_path = model.get_blob_path(derived_image.blob) + store.stream_write(derived_image.blob.locations, image_path, queue_file) queue_file.close() - if not queue_file.raised_exception: - # Setup the database (since this is a new process) and then disconnect immediately - # once the operation completes. - with database.UseThenDisconnect(app.config): - done_uploading = model.storage.get_storage_by_uuid(linked_storage_uuid) - done_uploading.uploading = False - done_uploading.save() - -def _torrent_for_storage(storage_ref, is_public): - """ Returns a response containing the torrent file contents for the given storage. May abort +def _torrent_for_blob(blob, is_public): + """ Returns a response containing the torrent file contents for the given blob. May abort with an error if the state is not valid (e.g. non-public, non-user request). """ # Make sure the storage has a size. - if not storage_ref.image_size: + if not blob.size: abort(404) # Lookup the torrent information for the storage. - try: - torrent_info = model.storage.get_torrent_info(storage_ref) - except model.TorrentInfoDoesNotExist: + torrent_info = model.get_torrent_info(blob) + if torrent_info is None: abort(404) # Lookup the webseed path for the storage. - path = model.storage.get_layer_path(storage_ref) - webseed = storage.get_direct_download_url(storage_ref.locations, path, + path = model.get_blob_path(blob) + webseed = storage.get_direct_download_url(blob.locations, path, expires_in=app.config['BITTORRENT_WEBSEED_LIFETIME']) if webseed is None: # We cannot support webseeds for storages that cannot provide direct downloads. @@ -146,17 +122,17 @@ def _torrent_for_storage(storage_ref, is_public): # Build the filename for the torrent. if is_public: - name = public_torrent_filename(storage_ref.uuid) + name = public_torrent_filename(blob.uuid) else: user = get_authenticated_user() if not user: abort(403) - name = per_user_torrent_filename(user.uuid, storage_ref.uuid) + name = per_user_torrent_filename(user.uuid, blob.uuid) # Return the torrent file. - torrent_file = make_torrent(name, webseed, storage_ref.image_size, - torrent_info.piece_length, torrent_info.pieces) + torrent_file = make_torrent(name, webseed, blob.size, torrent_info.piece_length, + torrent_info.pieces) headers = {'Content-Type': 'application/x-bittorrent', 'Content-Disposition': 'attachment; filename={0}.torrent'.format(name)} @@ -172,60 +148,46 @@ def _torrent_repo_verb(repo_image, tag, verb, **kwargs): # Lookup an *existing* derived storage for the verb. If the verb's image storage doesn't exist, # we cannot create it here, so we 406. - derived = model.image.find_derived_storage_for_image(repo_image, verb, - varying_metadata={'tag': tag}) - if not derived: + derived_image = model.lookup_derived_image(repo_image, verb, varying_metadata={'tag': tag}) + if derived_image is None: abort(406) # Return the torrent. - public_repo = model.repository.is_repository_public(repo_image.repository) - torrent = _torrent_for_storage(derived, public_repo) + public_repo = model.repository_is_public(repo_image.repository.namespace_name, + repo_image.repository.name) + torrent = _torrent_for_blob(derived_image.blob, public_repo) # Log the action. track_and_log('repo_verb', repo_image.repository, tag=tag, verb=verb, torrent=True, **kwargs) - return torrent -def _verify_repo_verb(store, namespace, repository, tag, verb, checker=None): +def _verify_repo_verb(_, namespace, repository, tag, verb, checker=None): permission = ReadRepositoryPermission(namespace, repository) - - if not permission.can() and not model.repository.repository_is_public(namespace, repository): + if not permission.can() and not model.repository_is_public(namespace, repository): abort(403) # Lookup the requested tag. - try: - tag_image = model.tag.get_tag_image(namespace, repository, tag) - except model.DataModelException: - abort(404) - - # Lookup the tag's image and storage. - repo_image = model.image.get_repo_image_extended(namespace, repository, tag_image.docker_image_id) - if not repo_image: + tag_image = model.get_tag_image(namespace, repository, tag) + if tag_image is None: abort(404) # If there is a data checker, call it first. - image_json = None - if checker is not None: - image_json = json.loads(repo_image.v1_json_metadata) - - if not checker(image_json): + if not checker(tag_image): logger.debug('Check mismatch on %s/%s:%s, verb %s', namespace, repository, tag, verb) abort(404) - return (repo_image, tag_image, image_json) + return tag_image def _repo_verb_signature(namespace, repository, tag, verb, checker=None, **kwargs): # Verify that the image exists and that we have access to it. - result = _verify_repo_verb(storage, namespace, repository, tag, verb, checker) - (repo_image, _, _) = result + repo_image = _verify_repo_verb(storage, namespace, repository, tag, verb, checker) - # Lookup the derived image storage for the verb. - derived = model.image.find_derived_storage_for_image(repo_image, verb, - varying_metadata={'tag': tag}) - if derived is None or derived.uploading: + # derived_image the derived image storage for the verb. + derived_image = model.lookup_derived_image(repo_image, verb, varying_metadata={'tag': tag}) + if derived_image is None or derived_image.blob.uploading: return make_response('', 202) # Check if we have a valid signer configured. @@ -233,18 +195,17 @@ def _repo_verb_signature(namespace, repository, tag, verb, checker=None, **kwarg abort(404) # Lookup the signature for the verb. - signature_entry = model.storage.lookup_storage_signature(derived, signer.name) - if signature_entry is None: + signature_value = model.get_derived_image_signature(derived_image, signer.name) + if signature_value is None: abort(404) # Return the signature. - return make_response(signature_entry.signature) + return make_response(signature_value) def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker=None, **kwargs): # Verify that the image exists and that we have access to it. - result = _verify_repo_verb(storage, namespace, repository, tag, verb, checker) - (repo_image, tag_image, image_json) = result + repo_image = _verify_repo_verb(storage, namespace, repository, tag, verb, checker) # Check for torrent. If found, we return a torrent for the repo verb image (if the derived # image already exists). @@ -256,36 +217,30 @@ def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker= track_and_log('repo_verb', repo_image.repository, tag=tag, verb=verb, **kwargs) metric_queue.repository_pull.Inc(labelvalues=[namespace, repository, verb]) - # Lookup/create the derived image storage for the verb and repo image. - derived = model.image.find_or_create_derived_storage(repo_image, verb, + # Lookup/create the derived image for the verb and repo image. + derived_image = model.lookup_or_create_derived_image(repo_image, verb, storage.preferred_locations[0], varying_metadata={'tag': tag}) - - if not derived.uploading: - logger.debug('Derived %s image %s exists in storage', verb, derived.uuid) - derived_layer_path = model.storage.get_layer_path(derived) + if not derived_image.blob.uploading: + logger.debug('Derived %s image %s exists in storage', verb, derived_image.ref) + derived_layer_path = model.get_blob_path(derived_image.blob) is_head_request = request.method == 'HEAD' - download_url = storage.get_direct_download_url(derived.locations, derived_layer_path, + download_url = storage.get_direct_download_url(derived_image.blob.locations, derived_layer_path, head=is_head_request) if download_url: - logger.debug('Redirecting to download URL for derived %s image %s', verb, derived.uuid) + logger.debug('Redirecting to download URL for derived %s image %s', verb, derived_image.ref) return redirect(download_url) # Close the database handle here for this process before we send the long download. database.close_db_filter(None) - logger.debug('Sending cached derived %s image %s', verb, derived.uuid) - return send_file(storage.stream_read_file(derived.locations, derived_layer_path)) + logger.debug('Sending cached derived %s image %s', verb, derived_image.ref) + return send_file(storage.stream_read_file(derived_image.blob.locations, derived_layer_path)) + logger.debug('Building and returning derived %s image %s', verb, derived_image.ref) - logger.debug('Building and returning derived %s image %s', verb, derived.uuid) - - # Load the image's JSON layer. - if not image_json: - image_json = json.loads(repo_image.v1_json_metadata) - - # Calculate a synthetic image ID. - synthetic_image_id = hashlib.sha256(tag_image.docker_image_id + ':' + verb).hexdigest() + # Calculate a derived image ID. + derived_image_id = hashlib.sha256(repo_image.image_id + ':' + verb).hexdigest() def _cleanup(): # Close any existing DB connection once the process has exited. @@ -295,16 +250,14 @@ def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker= def _store_metadata_and_cleanup(): with database.UseThenDisconnect(app.config): - model.storage.save_torrent_info(derived, app.config['BITTORRENT_PIECE_SIZE'], - hasher.final_piece_hashes()) - derived.image_size = hasher.hashed_bytes - derived.save() + model.set_torrent_info(derived_image.blob, app.config['BITTORRENT_PIECE_SIZE'], + hasher.final_piece_hashes()) + model.set_blob_size(derived_image.blob, hasher.hashed_bytes) # Create a queue process to generate the data. The queue files will read from the process # and send the results to the client and storage. handlers = [hasher.update] - args = (formatter, namespace, repository, tag, synthetic_image_id, image_json, repo_image, - handlers) + args = (formatter, namespace, repository, tag, derived_image_id, repo_image, handlers) queue_process = QueueProcess(_open_stream, 8 * 1024, 10 * 1024 * 1024, # 8K/10M chunk/max args, finished=_store_metadata_and_cleanup) @@ -321,12 +274,12 @@ def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker= queue_process.run() # Start the storage saving. - storage_args = (verb, derived.uuid, derived.locations, storage_queue_file) - QueueProcess.run_process(_write_synthetic_image_to_storage, storage_args, finished=_cleanup) + storage_args = (verb, derived_image, storage_queue_file) + QueueProcess.run_process(_write_derived_image_to_storage, storage_args, finished=_cleanup) if sign and signer.name: - signing_args = (verb, derived.uuid, signing_queue_file) - QueueProcess.run_process(_sign_synthetic_image, signing_args, finished=_cleanup) + signing_args = (verb, derived_image, signing_queue_file) + QueueProcess.run_process(_sign_derived_image, signing_args, finished=_cleanup) # Close the database handle here for this process before we send the long download. database.close_db_filter(None) @@ -336,7 +289,9 @@ def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker= def os_arch_checker(os, arch): - def checker(image_json): + def checker(repo_image): + image_json = repo_image.compat_metadata + # Verify the architecture and os. operating_system = image_json.get('os', 'linux') if operating_system != os: @@ -372,7 +327,7 @@ def get_aci_signature(server, namespace, repository, tag, os, arch): @verbs.route('/aci/////aci///', methods=['GET', 'HEAD']) @process_auth def get_aci_image(server, namespace, repository, tag, os, arch): - return _repo_verb(namespace, repository, tag, 'aci', ACIImage(), + return _repo_verb(namespace, repository, tag, 'aci', AppCImageFormatter(), sign=True, checker=os_arch_checker(os, arch), os=os, arch=arch) @@ -380,7 +335,7 @@ def get_aci_image(server, namespace, repository, tag, os, arch): @verbs.route('/squash///', methods=['GET']) @process_auth def get_squashed_tag(namespace, repository, tag): - return _repo_verb(namespace, repository, tag, 'squash', SquashedDockerImage()) + return _repo_verb(namespace, repository, tag, 'squash', SquashedDockerImageFormatter()) @route_show_if(features.BITTORRENT) @@ -390,7 +345,7 @@ def get_squashed_tag(namespace, repository, tag): @parse_repository_name() def get_tag_torrent(namespace_name, repo_name, digest): permission = ReadRepositoryPermission(namespace_name, repo_name) - public_repo = model.repository.repository_is_public(namespace_name, repo_name) + public_repo = model.repository_is_public(namespace_name, repo_name) if not permission.can() and not public_repo: abort(403) @@ -399,10 +354,9 @@ def get_tag_torrent(namespace_name, repo_name, digest): # We can not generate a private torrent cluster without a user uuid (e.g. token auth) abort(403) - try: - blob = model.blob.get_repo_blob_by_digest(namespace_name, repo_name, digest) - except model.BlobDoesNotExist: + blob = model.get_repo_blob_by_digest(namespace_name, repo_name, digest) + if blob is None: abort(404) metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'torrent']) - return _torrent_for_storage(blob, public_repo) + return _torrent_for_blob(blob, public_repo) diff --git a/formats/tarimageformatter.py b/formats/tarimageformatter.py deleted file mode 100644 index 2274af85e..000000000 --- a/formats/tarimageformatter.py +++ /dev/null @@ -1,56 +0,0 @@ -import tarfile -from util.registry.gzipwrap import GzipWrap - -class TarImageFormatter(object): - """ Base class for classes which produce a TAR containing image and layer data. """ - - def build_stream(self, namespace, repository, tag, synthetic_image_id, layer_json, - get_image_iterator, get_layer_iterator, get_image_json): - """ Builds and streams a synthetic .tar.gz that represents the formatted TAR created by this - class's implementation. - """ - return GzipWrap(self.stream_generator(namespace, repository, tag, - synthetic_image_id, layer_json, - get_image_iterator, get_layer_iterator, - get_image_json)) - - def stream_generator(self, namespace, repository, tag, synthetic_image_id, - layer_json, get_image_iterator, get_layer_iterator, get_image_json): - raise NotImplementedError - - def tar_file(self, name, contents, mtime=None): - """ Returns the TAR binary representation for a file with the given name and file contents. """ - length = len(contents) - tar_data = self.tar_file_header(name, length, mtime=mtime) - tar_data += contents - tar_data += self.tar_file_padding(length) - return tar_data - - def tar_file_padding(self, length): - """ Returns TAR file padding for file data of the given length. """ - if length % 512 != 0: - return '\0' * (512 - (length % 512)) - - return '' - - def tar_file_header(self, name, file_size, mtime=None): - """ Returns TAR file header data for a file with the given name and size. """ - info = tarfile.TarInfo(name=name) - info.type = tarfile.REGTYPE - info.size = file_size - - if mtime is not None: - info.mtime = mtime - return info.tobuf() - - def tar_folder(self, name, mtime=None): - """ Returns TAR file header data for a folder with the given name. """ - info = tarfile.TarInfo(name=name) - info.type = tarfile.DIRTYPE - - if mtime is not None: - info.mtime = mtime - - # allow the directory to be readable by non-root users - info.mode = 0755 - return info.tobuf() diff --git a/image/__init__.py b/image/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/formats/aci.py b/image/appc/__init__.py similarity index 79% rename from formats/aci.py rename to image/appc/__init__.py index c24f691bd..f3a958636 100644 --- a/formats/aci.py +++ b/image/appc/__init__.py @@ -6,20 +6,21 @@ from uuid import uuid4 from app import app from util.registry.streamlayerformat import StreamLayerMerger -from formats.tarimageformatter import TarImageFormatter +from image.common import TarImageFormatter ACNAME_REGEX = re.compile(r'[^a-z-]+') -class ACIImage(TarImageFormatter): - """ Image formatter which produces an ACI-compatible TAR. +class AppCImageFormatter(TarImageFormatter): + """ + Image formatter which produces an tarball according to the AppC specification. """ - def stream_generator(self, namespace, repository, tag, synthetic_image_id, - layer_json, get_image_iterator, get_layer_iterator, get_image_json): + def stream_generator(self, namespace, repository, tag, repo_image, + synthetic_image_id, get_image_iterator, get_layer_iterator): image_mtime = 0 - created = next(get_image_iterator()).created + created = next(get_image_iterator()).v1_metadata.created if created is not None: image_mtime = calendar.timegm(created.utctimetuple()) @@ -28,7 +29,7 @@ class ACIImage(TarImageFormatter): # rootfs - The root file system # Yield the manifest. - manifest = self._build_manifest(namespace, repository, tag, layer_json, synthetic_image_id) + manifest = self._build_manifest(namespace, repository, tag, repo_image, synthetic_image_id) yield self.tar_file('manifest', manifest, mtime=image_mtime) # Yield the merged layer dtaa. @@ -40,7 +41,9 @@ class ACIImage(TarImageFormatter): @staticmethod def _build_isolators(docker_config): - """ Builds ACI isolator config from the docker config. """ + """ + Builds ACI isolator config from the docker config. + """ def _isolate_memory(memory): return { @@ -107,22 +110,24 @@ class ACIImage(TarImageFormatter): @staticmethod def _build_ports(docker_config): - """ Builds the ports definitions for the ACI. """ + """ + Builds the ports definitions for the ACI. + + Formats: + port/tcp + port/udp + port + """ ports = [] - for docker_port_definition in ACIImage._get_docker_config_value(docker_config, 'Ports', []): - # Formats: - # port/tcp - # port/udp - # port - + for docker_port in AppCImageFormatter._get_docker_config_value(docker_config, 'Ports', []): protocol = 'tcp' port_number = -1 - if '/' in docker_port_definition: - (port_number, protocol) = docker_port_definition.split('/') + if '/' in docker_port: + (port_number, protocol) = docker_port.split('/') else: - port_number = docker_port_definition + port_number = docker_port try: port_number = int(port_number) @@ -149,9 +154,9 @@ class ACIImage(TarImageFormatter): volumes = [] def get_name(docker_volume_path): - return "volume-%s" % ACIImage._ac_name(docker_volume_path) + return "volume-%s" % AppCImageFormatter._ac_name(docker_volume_path) - for docker_volume_path in ACIImage._get_docker_config_value(docker_config, 'Volumes', []): + for docker_volume_path in AppCImageFormatter._get_docker_config_value(docker_config, 'Volumes', []): if not docker_volume_path: continue @@ -163,9 +168,9 @@ class ACIImage(TarImageFormatter): return volumes @staticmethod - def _build_manifest(namespace, repository, tag, docker_layer_data, synthetic_image_id): - """ Builds an ACI manifest from the docker layer data. """ - + def _build_manifest(namespace, repository, tag, repo_image, synthetic_image_id): + """ Builds an ACI manifest of an existing repository image. """ + docker_layer_data = repo_image.compat_metadata config = docker_layer_data.get('config', {}) source_url = "%s://%s/%s/%s:%s" % (app.config['PREFERRED_URL_SCHEME'], @@ -219,9 +224,9 @@ class ACIImage(TarImageFormatter): "eventHandlers": [], "workingDirectory": config.get('WorkingDir', '') or '/', "environment": [{"name": key, "value": value} for (key, value) in env_vars], - "isolators": ACIImage._build_isolators(config), - "mountPoints": ACIImage._build_volumes(config), - "ports": ACIImage._build_ports(config), + "isolators": AppCImageFormatter._build_isolators(config), + "mountPoints": AppCImageFormatter._build_volumes(config), + "ports": AppCImageFormatter._build_ports(config), "annotations": [ {"name": "created", "value": docker_layer_data.get('created', '')}, {"name": "homepage", "value": source_url}, diff --git a/image/common.py b/image/common.py new file mode 100644 index 000000000..733c51afc --- /dev/null +++ b/image/common.py @@ -0,0 +1,67 @@ +import tarfile +from util.registry.gzipwrap import GzipWrap + + +class TarImageFormatter(object): + """ + Base class for classes which produce a tar containing image and layer data. + """ + + def build_stream(self, namespace, repository, tag, repo_image, synthetic_image_id, + get_image_iterator, get_layer_iterator): + """ + Builds and streams a synthetic .tar.gz that represents the formatted tar created by this class's + implementation. + """ + return GzipWrap(self.stream_generator(namespace, repository, tag, repo_image, + synthetic_image_id, get_image_iterator, + get_layer_iterator)) + + def stream_generator(self, namespace, repository, tag, repo_image, synthetic_image_id, + get_image_iterator, get_layer_iterator): + raise NotImplementedError + + def tar_file(self, name, contents, mtime=None): + """ + Returns the tar binary representation for a file with the given name and file contents. + """ + length = len(contents) + tar_data = self.tar_file_header(name, length, mtime=mtime) + tar_data += contents + tar_data += self.tar_file_padding(length) + return tar_data + + def tar_file_padding(self, length): + """ + Returns tar file padding for file data of the given length. + """ + if length % 512 != 0: + return '\0' * (512 - (length % 512)) + + return '' + + def tar_file_header(self, name, file_size, mtime=None): + """ + Returns tar file header data for a file with the given name and size. + """ + info = tarfile.TarInfo(name=name) + info.type = tarfile.REGTYPE + info.size = file_size + + if mtime is not None: + info.mtime = mtime + return info.tobuf() + + def tar_folder(self, name, mtime=None): + """ + Returns tar file header data for a folder with the given name. + """ + info = tarfile.TarInfo(name=name) + info.type = tarfile.DIRTYPE + + if mtime is not None: + info.mtime = mtime + + # allow the directory to be readable by non-root users + info.mode = 0755 + return info.tobuf() diff --git a/image/docker/__init__.py b/image/docker/__init__.py new file mode 100644 index 000000000..f694dcb12 --- /dev/null +++ b/image/docker/__init__.py @@ -0,0 +1,10 @@ +""" +docker implements pure data transformations according to the many Docker specifications. +""" + +class DockerFormatException(Exception): + pass + + +class ManifestException(DockerFormatException): + pass diff --git a/image/docker/schema1.py b/image/docker/schema1.py new file mode 100644 index 000000000..72b1aa8d2 --- /dev/null +++ b/image/docker/schema1.py @@ -0,0 +1,381 @@ +""" +schema1 implements pure data transformations according to the Docker Manifest v2.1 Specification. + +https://github.com/docker/distribution/blob/master/docs/spec/manifest-v2-1.md +""" + +import hashlib +import json +import logging + +from collections import namedtuple, OrderedDict +from datetime import datetime + +from jwkest.jws import SIGNER_ALGS, keyrep +from jwt.utils import base64url_encode, base64url_decode + +from digest import digest_tools +from image.docker import ManifestException +from image.docker.v1 import DockerV1Metadata + + +logger = logging.getLogger(__name__) + + +# Content Types +DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+json' +DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+prettyjws' +DOCKER_SCHEMA1_CONTENT_TYPES = {DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE, + DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE} + +# Keys for signature-related data +DOCKER_SCHEMA1_SIGNATURES_KEY = 'signatures' +DOCKER_SCHEMA1_HEADER_KEY = 'header' +DOCKER_SCHEMA1_SIGNATURE_KEY = 'signature' +DOCKER_SCHEMA1_PROTECTED_KEY = 'protected' +DOCKER_SCHEMA1_FORMAT_LENGTH_KEY = 'formatLength' +DOCKER_SCHEMA1_FORMAT_TAIL_KEY = 'formatTail' + +# Keys for manifest-related data +DOCKER_SCHEMA1_REPO_NAME_KEY = 'name' +DOCKER_SCHEMA1_REPO_TAG_KEY = 'tag' +DOCKER_SCHEMA1_ARCH_KEY = 'architecture' +DOCKER_SCHEMA1_FS_LAYERS_KEY = 'fsLayers' +DOCKER_SCHEMA1_BLOB_SUM_KEY = 'blobSum' +DOCKER_SCHEMA1_HISTORY_KEY = 'history' +DOCKER_SCHEMA1_V1_COMPAT_KEY = 'v1Compatibility' +DOCKER_SCHEMA1_SCHEMA_VER_KEY = 'schemaVersion' + +# Format for time used in the protected payload. +_ISO_DATETIME_FORMAT_ZULU = '%Y-%m-%dT%H:%M:%SZ' + +# The algorithm we use to sign the JWS. +_JWS_SIGNING_ALGORITHM = 'RS256' + + +class MalformedSchema1Manifest(ManifestException): + """ + Raised when a manifest fails an assertion that should be true according to the Docker Manifest + v2.1 Specification. + """ + pass + + +class InvalidSchema1Signature(ManifestException): + """ + Raised when there is a failure verifying the signature of a signed Docker 2.1 Manifest. + """ + pass + + +class Schema1Layer(namedtuple('Schema1Layer', ['digest', 'v1_metadata', 'raw_v1_metadata'])): + """ + Represents all of the data about an individual layer in a given Manifest. + This is the union of the fsLayers (digest) and the history entries (v1_compatibility). + """ + + +class Schema1V1Metadata(namedtuple('Schema1V1Metadata', ['image_id', 'parent_image_id', 'created', + 'comment', 'command', 'labels'])): + """ + Represents the necessary data extracted from the v1 compatibility string in a given layer of a + Manifest. + """ + + +class DockerSchema1Manifest(object): + def __init__(self, manifest_bytes, validate=True): + self._layers = None + self._bytes = manifest_bytes + + try: + self._parsed = json.loads(manifest_bytes) + except ValueError as ve: + raise MalformedSchema1Manifest('malformed manifest data: %s' % ve) + + self._signatures = self._parsed[DOCKER_SCHEMA1_SIGNATURES_KEY] + self._tag = self._parsed[DOCKER_SCHEMA1_REPO_TAG_KEY] + + repo_name = self._parsed[DOCKER_SCHEMA1_REPO_NAME_KEY] + repo_name_tuple = repo_name.split('/') + if len(repo_name_tuple) > 1: + self._namespace, self._repo_name = repo_name_tuple + elif len(repo_name_tuple) == 1: + self._namespace = '' + self._repo_name = repo_name_tuple[0] + else: + raise MalformedSchema1Manifest('malformed repository name: %s' % repo_name) + + if validate: + self._validate() + + def _validate(self): + for signature in self._signatures: + bytes_to_verify = '{0}.{1}'.format(signature['protected'], + base64url_encode(self.payload)) + signer = SIGNER_ALGS[signature['header']['alg']] + key = keyrep(signature['header']['jwk']) + gk = key.get_key() + sig = base64url_decode(signature['signature'].encode('utf-8')) + verified = signer.verify(bytes_to_verify, sig, gk) + if not verified: + raise InvalidSchema1Signature() + + @property + def content_type(self): + return DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE + + @property + def media_type(self): + return DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE + + @property + def signatures(self): + return self._signatures + + @property + def namespace(self): + return self._namespace + + @property + def repo_name(self): + return self._repo_name + + @property + def tag(self): + return self._tag + + @property + def json(self): + return self._bytes + + @property + def bytes(self): + return self._bytes + + @property + def manifest_json(self): + return self._parsed + + @property + def digest(self): + return digest_tools.sha256_digest(self.payload) + + @property + def image_ids(self): + return {mdata.v1_metadata.image_id for mdata in self.layers} + + @property + def parent_image_ids(self): + return {mdata.v1_metadata.parent_image_id for mdata in self.layers + if mdata.v1_metadata.parent_image_id} + + @property + def checksums(self): + return list({str(mdata.digest) for mdata in self.layers}) + + @property + def leaf_layer(self): + return self.layers[-1] + + @property + def layers(self): + if self._layers is None: + self._layers = list(self._generate_layers()) + return self._layers + + def _generate_layers(self): + """ + Returns a generator of objects that have the blobSum and v1Compatibility keys in them, + starting from the base image and working toward the leaf node. + """ + for blob_sum_obj, history_obj in reversed(zip(self._parsed[DOCKER_SCHEMA1_FS_LAYERS_KEY], + self._parsed[DOCKER_SCHEMA1_HISTORY_KEY])): + + try: + image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[DOCKER_SCHEMA1_BLOB_SUM_KEY]) + except digest_tools.InvalidDigestException: + raise MalformedSchema1Manifest('could not parse manifest digest: %s' % + blob_sum_obj[DOCKER_SCHEMA1_BLOB_SUM_KEY]) + + metadata_string = history_obj[DOCKER_SCHEMA1_V1_COMPAT_KEY] + + v1_metadata = json.loads(metadata_string) + command_list = v1_metadata.get('container_config', {}).get('Cmd', None) + command = json.dumps(command_list) if command_list else None + + if not 'id' in v1_metadata: + raise MalformedSchema1Manifest('id field missing from v1Compatibility JSON') + + labels = v1_metadata.get('config', {}).get('Labels', {}) or {} + extracted = Schema1V1Metadata(v1_metadata['id'], v1_metadata.get('parent'), + v1_metadata.get('created'), v1_metadata.get('comment'), + command, labels) + yield Schema1Layer(image_digest, extracted, metadata_string) + + @property + def payload(self): + protected = str(self._signatures[0][DOCKER_SCHEMA1_PROTECTED_KEY]) + parsed_protected = json.loads(base64url_decode(protected)) + signed_content_head = self._bytes[:parsed_protected[DOCKER_SCHEMA1_FORMAT_LENGTH_KEY]] + signed_content_tail = base64url_decode(str(parsed_protected[DOCKER_SCHEMA1_FORMAT_TAIL_KEY])) + return signed_content_head + signed_content_tail + + def rewrite_invalid_image_ids(self, images_map): + """ + Rewrites Docker v1 image IDs and returns a generator of DockerV1Metadata. + + If Docker gives us a layer with a v1 image ID that already points to existing + content, but the checksums don't match, then we need to rewrite the image ID + to something new in order to ensure consistency. + """ + + # Used to synthesize a new "content addressable" image id + digest_history = hashlib.sha256() + has_rewritten_ids = False + updated_id_map = {} + + for layer in self.layers: + digest_str = str(layer.digest) + extracted_v1_metadata = layer.v1_metadata + working_image_id = extracted_v1_metadata.image_id + + # Update our digest_history hash for the new layer data. + digest_history.update(digest_str) + digest_history.update("@") + digest_history.update(layer.raw_v1_metadata.encode('utf-8')) + digest_history.update("|") + + # Ensure that the v1 image's storage matches the V2 blob. If not, we've + # found a data inconsistency and need to create a new layer ID for the V1 + # image, and all images that follow it in the ancestry chain. + digest_mismatch = (extracted_v1_metadata.image_id in images_map and + images_map[extracted_v1_metadata.image_id].content_checksum != digest_str) + if digest_mismatch or has_rewritten_ids: + working_image_id = digest_history.hexdigest() + has_rewritten_ids = True + + # Store the new docker id in the map + updated_id_map[extracted_v1_metadata.image_id] = working_image_id + + # Lookup the parent image for the layer, if any. + parent_image_id = None + if extracted_v1_metadata.parent_image_id is not None: + parent_image = images_map.get(extracted_v1_metadata.parent_image_id, None) + if parent_image is None: + raise MalformedSchema1Manifest('parent not found with image ID: %s' % + extracted_v1_metadata.parent_image_id) + parent_image_id = updated_id_map.get(parent_image.image_id, parent_image.image_id) + + # Synthesize and store the v1 metadata in the db. + v1_metadata_json = layer.raw_v1_metadata + if has_rewritten_ids: + v1_metadata_json = _updated_v1_metadata(v1_metadata_json, updated_id_map) + + updated_image = DockerV1Metadata( + namespace_name=self.namespace, + repo_name=self.repo_name, + image_id=working_image_id, + created=extracted_v1_metadata.created, + comment=extracted_v1_metadata.comment, + command=extracted_v1_metadata.command, + compat_json=v1_metadata_json, + parent_image_id=parent_image_id, + checksum=None, # TODO: Check if we need this. + content_checksum=digest_str, + ) + + images_map[updated_image.image_id] = updated_image + yield updated_image + + +class DockerSchema1ManifestBuilder(object): + """ + A convenient abstraction around creating new DockerSchema1Manifests. + """ + def __init__(self, namespace_name, repo_name, tag, architecture='amd64'): + repo_name_key = '{0}/{1}'.format(namespace_name, repo_name) + if namespace_name == '': + repo_name_key = repo_name + + self._base_payload = { + DOCKER_SCHEMA1_REPO_TAG_KEY: tag, + DOCKER_SCHEMA1_REPO_NAME_KEY: repo_name_key, + DOCKER_SCHEMA1_ARCH_KEY: architecture, + DOCKER_SCHEMA1_SCHEMA_VER_KEY: 1, + } + + self._fs_layer_digests = [] + self._history = [] + + def add_layer(self, layer_digest, v1_json_metadata): + self._fs_layer_digests.append({ + DOCKER_SCHEMA1_BLOB_SUM_KEY: layer_digest, + }) + self._history.append({ + DOCKER_SCHEMA1_V1_COMPAT_KEY: v1_json_metadata, + }) + return self + + + def build(self, json_web_key): + """ + Builds a DockerSchema1Manifest object complete with signature. + """ + payload = OrderedDict(self._base_payload) + payload.update({ + DOCKER_SCHEMA1_HISTORY_KEY: self._history, + DOCKER_SCHEMA1_FS_LAYERS_KEY: self._fs_layer_digests, + }) + + payload_str = json.dumps(payload, indent=3) + + split_point = payload_str.rfind('\n}') + + protected_payload = { + 'formatTail': base64url_encode(payload_str[split_point:]), + 'formatLength': split_point, + 'time': datetime.utcnow().strftime(_ISO_DATETIME_FORMAT_ZULU), + } + protected = base64url_encode(json.dumps(protected_payload)) + logger.debug('Generated protected block: %s', protected) + + bytes_to_sign = '{0}.{1}'.format(protected, base64url_encode(payload_str)) + + signer = SIGNER_ALGS[_JWS_SIGNING_ALGORITHM] + signature = base64url_encode(signer.sign(bytes_to_sign, json_web_key.get_key())) + logger.debug('Generated signature: %s', signature) + + public_members = set(json_web_key.public_members) + public_key = {comp: value for comp, value in json_web_key.to_dict().items() + if comp in public_members} + + signature_block = { + DOCKER_SCHEMA1_HEADER_KEY: {'jwk': public_key, 'alg': _JWS_SIGNING_ALGORITHM}, + DOCKER_SCHEMA1_SIGNATURE_KEY: signature, + DOCKER_SCHEMA1_PROTECTED_KEY: protected, + } + + logger.debug('Encoded signature block: %s', json.dumps(signature_block)) + + payload.update({DOCKER_SCHEMA1_SIGNATURES_KEY: [signature_block]}) + + return DockerSchema1Manifest(json.dumps(payload, indent=3)) + + +def _updated_v1_metadata(v1_metadata_json, updated_id_map): + """ + Updates v1_metadata with new image IDs. + """ + parsed = json.loads(v1_metadata_json) + parsed['id'] = updated_id_map[parsed['id']] + + if parsed.get('parent') and parsed['parent'] in updated_id_map: + parsed['parent'] = updated_id_map[parsed['parent']] + + if parsed.get('container_config', {}).get('Image'): + existing_image = parsed['container_config']['Image'] + if existing_image in updated_id_map: + parsed['container_config']['image'] = updated_id_map[existing_image] + + return json.dumps(parsed) diff --git a/image/docker/schema2.py b/image/docker/schema2.py new file mode 100644 index 000000000..504f5df80 --- /dev/null +++ b/image/docker/schema2.py @@ -0,0 +1,11 @@ +""" +schema2 implements pure data transformations according to the Docker Manifest v2.2 Specification. + +https://github.com/docker/distribution/blob/master/docs/spec/manifest-v2-2.md +""" + +# Content Types +DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v2+json' +DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.list.v2+json' +DOCKER_SCHEMA2_CONTENT_TYPES = {DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, + DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE} diff --git a/formats/squashed.py b/image/docker/squashed.py similarity index 76% rename from formats/squashed.py rename to image/docker/squashed.py index ba0964339..b0bc10530 100644 --- a/formats/squashed.py +++ b/image/docker/squashed.py @@ -1,36 +1,37 @@ -from app import app -from util.registry.gzipwrap import GZIP_BUFFER_SIZE -from util.registry.streamlayerformat import StreamLayerMerger -from formats.tarimageformatter import TarImageFormatter - import copy import json import math import calendar +from app import app +from image.common import TarImageFormatter +from util.registry.gzipwrap import GZIP_BUFFER_SIZE +from util.registry.streamlayerformat import StreamLayerMerger + + class FileEstimationException(Exception): - """ Exception raised by build_docker_load_stream if the estimated size of the layer TAR - was lower than the actual size. This means the sent TAR header is wrong, and we have - to fail. + """ + Exception raised by build_docker_load_stream if the estimated size of the layer tar was lower + than the actual size. This means the sent tar header is wrong, and we have to fail. """ pass -class SquashedDockerImage(TarImageFormatter): - """ Image formatter which produces a squashed image compatible with the `docker load` - command. +class SquashedDockerImageFormatter(TarImageFormatter): + """ + Image formatter which produces a squashed image compatible with the `docker load` command. """ - # Multiplier against the image size reported by Docker to account for the TAR metadata. + # Multiplier against the image size reported by Docker to account for the tar metadata. # Note: This multiplier was not formally calculated in anyway and should be adjusted overtime # if/when we encounter issues with it. Unfortunately, we cannot make it too large or the Docker - # daemon dies when trying to load the entire TAR into memory. + # daemon dies when trying to load the entire tar into memory. SIZE_MULTIPLIER = 1.2 - def stream_generator(self, namespace, repository, tag, synthetic_image_id, - layer_json, get_image_iterator, get_layer_iterator, get_image_json): + def stream_generator(self, namespace, repository, tag, repo_image, synthetic_image_id, + get_image_iterator, get_layer_iterator): image_mtime = 0 - created = next(get_image_iterator()).created + created = next(get_image_iterator()).v1_metadata.created if created is not None: image_mtime = calendar.timegm(created.utctimetuple()) @@ -39,7 +40,7 @@ class SquashedDockerImage(TarImageFormatter): # repositories - JSON file containing a repo -> tag -> image map # {image ID folder}: # json - The layer JSON - # layer.tar - The TARed contents of the layer + # layer.tar - The tarballed contents of the layer # VERSION - The docker import version: '1.0' layer_merger = StreamLayerMerger(get_layer_iterator) @@ -57,7 +58,7 @@ class SquashedDockerImage(TarImageFormatter): yield self.tar_folder(synthetic_image_id, mtime=image_mtime) # Yield the JSON layer data. - layer_json = SquashedDockerImage._build_layer_json(layer_json, synthetic_image_id) + layer_json = SquashedDockerImageFormatter._build_layer_json(repo_image, synthetic_image_id) yield self.tar_file(synthetic_image_id + '/json', json.dumps(layer_json), mtime=image_mtime) # Yield the VERSION file. @@ -69,11 +70,11 @@ class SquashedDockerImage(TarImageFormatter): # In V1 we have the actual uncompressed size, which is needed for back compat with # older versions of Docker. # In V2, we use the size given in the image JSON. - if image.storage.uncompressed_size: - estimated_file_size += image.storage.uncompressed_size + if image.blob.uncompressed_size: + estimated_file_size += image.blob.uncompressed_size else: - image_json = get_image_json(image) - estimated_file_size += image_json.get('Size', 0) * SquashedDockerImage.SIZE_MULTIPLIER + image_json = image.compat_metadata + estimated_file_size += image_json.get('Size', 0) * SquashedDockerImageFormatter.SIZE_MULTIPLIER # Make sure the estimated file size is an integer number of bytes. estimated_file_size = int(math.ceil(estimated_file_size)) @@ -105,13 +106,14 @@ class SquashedDockerImage(TarImageFormatter): # Yield any file padding to 512 bytes that is necessary. yield self.tar_file_padding(estimated_file_size) - # Last two records are empty in TAR spec. + # Last two records are empty in tar spec. yield '\0' * 512 yield '\0' * 512 @staticmethod - def _build_layer_json(layer_json, synthetic_image_id): + def _build_layer_json(repo_image, synthetic_image_id): + layer_json = repo_image.compat_metadata updated_json = copy.deepcopy(layer_json) updated_json['id'] = synthetic_image_id diff --git a/image/docker/v1.py b/image/docker/v1.py new file mode 100644 index 000000000..b6df9f21a --- /dev/null +++ b/image/docker/v1.py @@ -0,0 +1,16 @@ +""" +v1 implements pure data transformations according to the Docker Image Specification v1.1. + +https://github.com/docker/docker/blob/master/image/spec/v1.1.md +""" + +from collections import namedtuple + +class DockerV1Metadata(namedtuple('DockerV1Metadata', + ['namespace_name', 'repo_name', 'image_id', 'checksum', + 'content_checksum', 'created', 'comment', 'command', + 'parent_image_id', 'compat_json'])): + """ + DockerV1Metadata represents all of the metadata for a given Docker v1 Image. + The original form of the metadata is stored in the compat_json field. + """ diff --git a/pylintrc b/pylintrc index 7cecd0de7..123b4692d 100644 --- a/pylintrc +++ b/pylintrc @@ -9,7 +9,7 @@ # --enable=similarities". If you want to run only the classes checker, but have # no Warning level messages displayed, use"--disable=all --enable=classes # --disable=W" -disable=missing-docstring,invalid-name,too-many-locals +disable=missing-docstring,invalid-name,too-many-locals,too-few-public-methods,too-many-lines [TYPECHECK] diff --git a/requirements-nover.txt b/requirements-nover.txt index 01fce31d3..f1c093b26 100644 --- a/requirements-nover.txt +++ b/requirements-nover.txt @@ -1,65 +1,66 @@ -autobahn==0.9.3-3 -aiowsgi -trollius -flask -py-bcrypt -Flask-Principal -Flask-Login -Flask-Mail -python-dateutil -boto -pymysql==0.6.7 # Remove version when baseimage has Python 2.7.9+ -stripe -gunicorn<19.0 -gevent -mixpanel -beautifulsoup4 -marisa-trie -APScheduler==3.0.5 -xhtml2pdf -redis -hiredis -flask-restful==0.2.12 -jsonschema --e git+https://github.com/NateFerrero/oauth2lib.git#egg=oauth2lib -alembic -sqlalchemy -python-magic -reportlab==2.7 -raven -peewee -python-ldap -pycryptodome -psycopg2 -pyyaml -PyGithub -e git+https://github.com/DevTable/aniso8601-fake.git#egg=aniso8610 -e git+https://github.com/DevTable/anunidecode.git#egg=anunidecode -e git+https://github.com/DevTable/container-cloud-config.git#egg=container-cloud-config +-e git+https://github.com/DevTable/python-etcd.git@sslfix#egg=python-etcd +-e git+https://github.com/NateFerrero/oauth2lib.git#egg=oauth2lib +-e git+https://github.com/coreos/mockldap.git@v0.1.x#egg=mockldap -e git+https://github.com/coreos/py-bitbucket.git#egg=py-bitbucket -e git+https://github.com/coreos/pyapi-gitlab.git@timeout#egg=pyapi-gitlab --e git+https://github.com/coreos/mockldap.git@v0.1.x#egg=mockldap -e git+https://github.com/coreos/resumablehashlib.git#egg=resumablehashlib --e git+https://github.com/DevTable/python-etcd.git@sslfix#egg=python-etcd -gipc -pyOpenSSL -pygpgme -cachetools -mock -psutil -stringscore -python-swiftclient -python-keystoneclient +APScheduler==3.0.5 +Flask-Login +Flask-Mail +Flask-Principal Flask-Testing -pyjwt -toposort -pyjwkest -jsonpath-rw -bintrees -redlock -semantic-version +PyGithub +aiowsgi +alembic +autobahn==0.9.3-3 +beautifulsoup4 bencode +bintrees +boto +cachetools cryptography +flask +flask-restful==0.2.12 +gevent +gipc +gunicorn<19.0 +hiredis httmock +jsonpath-rw +jsonschema +marisa-trie +mixpanel +mock moto +namedlist +peewee +psutil +psycopg2 +py-bcrypt +pyOpenSSL +pycryptodome +pygpgme +pyjwkest +pyjwt +pymysql==0.6.7 # Remove version when baseimage has Python 2.7.9+ +python-dateutil +python-keystoneclient +python-ldap +python-magic +python-swiftclient +pyyaml +raven +redis +redlock +reportlab==2.7 +semantic-version +sqlalchemy +stringscore +stripe +toposort +trollius tzlocal +xhtml2pdf diff --git a/test/registry_tests.py b/test/registry_tests.py index 971476e13..a5c42fb56 100644 --- a/test/registry_tests.py +++ b/test/registry_tests.py @@ -21,8 +21,9 @@ from data import model from endpoints.v1 import v1_bp from endpoints.v2 import v2_bp from endpoints.verbs import verbs -from endpoints.v2.manifest import SignedManifestBuilder from endpoints.api import api_bp +from image.docker.schema1 import DockerSchema1ManifestBuilder + from initdb import wipe_database, initialize_database, populate_database from endpoints.csrf import generate_csrf_token from tempfile import NamedTemporaryFile @@ -425,7 +426,6 @@ class V1RegistryPullMixin(V1RegistryMixin): # Ensure we do (or do not) have a matching image ID. tag_image_id = tags_result['latest'] known_ids = [item['id'] for item in images] - self.assertEquals(not munge_shas, tag_image_id in known_ids) # Retrieve the ancestry of the tag image. @@ -545,7 +545,7 @@ class V2RegistryPushMixin(V2RegistryMixin): # Build a fake manifest. tag_name = tag_name or 'latest' - builder = SignedManifestBuilder(namespace, repository, tag_name) + builder = DockerSchema1ManifestBuilder(namespace, repository, tag_name) full_contents = {} for image_data in reversed(images): @@ -1090,6 +1090,20 @@ class RegistryTestsMixin(object): class V1RegistryTests(V1RegistryPullMixin, V1RegistryPushMixin, RegistryTestsMixin, RegistryTestCaseMixin, LiveServerTestCase): """ Tests for V1 registry. """ + def test_users(self): + # Not logged in, should 404. + self.conduct('GET', '/v1/users', expected_code=404) + + # Try some logins. + self.conduct('POST', '/v1/users', json_data={'username': 'freshuser'}, expected_code=400) + resp = self.conduct('POST', '/v1/users', + json_data={'username': 'devtable', 'password': 'password'}, + expected_code=400) + + # Because Docker + self.assertEquals('"Username or email already exists"', resp.text) + + def test_push_reponame_with_slashes(self): # Attempt to add a repository name with slashes. This should fail as we do not support it. images = [{ @@ -1190,7 +1204,7 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix self.do_auth('devtable', 'password', namespace, repository, scopes=['push', 'pull']) # Build a fake manifest. - builder = SignedManifestBuilder(namespace, repository, tag_name) + builder = DockerSchema1ManifestBuilder(namespace, repository, tag_name) builder.add_layer('sha256:' + hashlib.sha256('invalid').hexdigest(), json.dumps({'id': 'foo'})) manifest = builder.build(_JWK) @@ -1210,7 +1224,7 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix self.do_auth('devtable', 'password', namespace, repository, scopes=['push', 'pull']) # Build a fake manifest. - builder = SignedManifestBuilder(namespace, repository, tag_name) + builder = DockerSchema1ManifestBuilder(namespace, repository, tag_name) builder.add_layer('sha256:' + hashlib.sha256('invalid').hexdigest(), json.dumps({'id': 'foo'})) manifest = builder.build(_JWK) @@ -1848,7 +1862,7 @@ class SquashingTests(RegistryTestCaseMixin, V1RegistryPushMixin, LiveServerTestC self.do_push('devtable', 'newrepo', 'devtable', 'password', images=initial_images) initial_image_id = '91081df45b58dc62dd207441785eef2b895f0383fbe601c99a3cf643c79957dc' - # Try to pull the torrent of the squashed image. This should fail with a 404 since the + # Try to pull the torrent of the squashed image. This should fail with a 406 since the # squashed image doesn't yet exist. self.conduct('GET', '/c1/squash/devtable/newrepo/latest', auth=('devtable', 'password'), headers=dict(accept='application/x-bittorrent'), diff --git a/test/test_endpoints.py b/test/test_endpoints.py index d8430375b..a139ee575 100644 --- a/test/test_endpoints.py +++ b/test/test_endpoints.py @@ -18,7 +18,7 @@ from jwkest.jwk import RSAKey from app import app from data import model from data.database import ServiceKeyApprovalType -from endpoints import key_server +from endpoints import keyserver from endpoints.api import api, api_bp from endpoints.api.user import Signin from endpoints.web import web as web_bp @@ -28,7 +28,7 @@ from test.helpers import assert_action_logged try: app.register_blueprint(web_bp, url_prefix='') - app.register_blueprint(key_server.key_server, url_prefix='') + app.register_blueprint(keyserver.key_server, url_prefix='') except ValueError: # This blueprint was already registered pass @@ -355,7 +355,7 @@ class KeyServerTestCase(EndpointTestCase): def _get_test_jwt_payload(self): return { 'iss': 'sample_service', - 'aud': key_server.JWT_AUDIENCE, + 'aud': keyserver.JWT_AUDIENCE, 'exp': int(time.time()) + 60, 'iat': int(time.time()), 'nbf': int(time.time()), diff --git a/test/test_manifests.py b/test/test_manifests.py index 03f2ff539..262aa810a 100644 --- a/test/test_manifests.py +++ b/test/test_manifests.py @@ -1,11 +1,11 @@ import unittest -import time import hashlib from app import app, storage, docker_v2_signing_key from initdb import setup_database_for_testing, finished_database_for_testing from data import model, database -from endpoints.v2.manifest import _write_manifest_itself, SignedManifestBuilder +from endpoints.v2.manifest import _write_manifest +from image.docker.schema1 import DockerSchema1ManifestBuilder ADMIN_ACCESS_USER = 'devtable' @@ -69,11 +69,11 @@ class TestManifests(unittest.TestCase): model.blob.store_blob_record_and_temp_link(ADMIN_ACCESS_USER, REPO, first_blob_sha, location, 0, 0, 0) # Push the first manifest. - first_manifest = (SignedManifestBuilder(ADMIN_ACCESS_USER, REPO, FIRST_TAG) + first_manifest = (DockerSchema1ManifestBuilder(ADMIN_ACCESS_USER, REPO, FIRST_TAG) .add_layer(first_blob_sha, '{"id": "first"}') .build(docker_v2_signing_key)) - _write_manifest_itself(ADMIN_ACCESS_USER, REPO, first_manifest) + _write_manifest(ADMIN_ACCESS_USER, REPO, first_manifest) # Delete all temp tags and perform GC. self._perform_cleanup() @@ -91,12 +91,12 @@ class TestManifests(unittest.TestCase): model.blob.store_blob_record_and_temp_link(ADMIN_ACCESS_USER, REPO, third_blob_sha, location, 0, 0, 0) # Push the second manifest. - second_manifest = (SignedManifestBuilder(ADMIN_ACCESS_USER, REPO, SECOND_TAG) + second_manifest = (DockerSchema1ManifestBuilder(ADMIN_ACCESS_USER, REPO, SECOND_TAG) .add_layer(third_blob_sha, '{"id": "second", "parent": "first"}') .add_layer(second_blob_sha, '{"id": "first"}') .build(docker_v2_signing_key)) - _write_manifest_itself(ADMIN_ACCESS_USER, REPO, second_manifest) + _write_manifest(ADMIN_ACCESS_USER, REPO, second_manifest) # Delete all temp tags and perform GC. self._perform_cleanup() @@ -120,12 +120,12 @@ class TestManifests(unittest.TestCase): model.blob.store_blob_record_and_temp_link(ADMIN_ACCESS_USER, REPO, fourth_blob_sha, location, 0, 0, 0) # Push the third manifest. - third_manifest = (SignedManifestBuilder(ADMIN_ACCESS_USER, REPO, THIRD_TAG) + third_manifest = (DockerSchema1ManifestBuilder(ADMIN_ACCESS_USER, REPO, THIRD_TAG) .add_layer(third_blob_sha, '{"id": "second", "parent": "first"}') .add_layer(fourth_blob_sha, '{"id": "first"}') # Note the change in BLOB from the second manifest. .build(docker_v2_signing_key)) - _write_manifest_itself(ADMIN_ACCESS_USER, REPO, third_manifest) + _write_manifest(ADMIN_ACCESS_USER, REPO, third_manifest) # Delete all temp tags and perform GC. self._perform_cleanup() diff --git a/util/registry/torrent.py b/util/registry/torrent.py index d81caa162..ec93e1405 100644 --- a/util/registry/torrent.py +++ b/util/registry/torrent.py @@ -1,12 +1,13 @@ -import bencode import hashlib -import jwt -import resumablehashlib import time import urllib from cachetools import lru_cache +import bencode +import jwt +import resumablehashlib + from app import app, instance_keys @@ -14,6 +15,7 @@ ANNOUNCE_URL = app.config['BITTORRENT_ANNOUNCE_URL'] FILENAME_PEPPER = app.config['BITTORRENT_FILENAME_PEPPER'] REGISTRY_TITLE = app.config['REGISTRY_TITLE'] + @lru_cache(maxsize=1) def _load_private_key(private_key_file_path): with open(private_key_file_path) as private_key_file: diff --git a/util/secscan/analyzer.py b/util/secscan/analyzer.py index d178bbff9..3b2fa39fa 100644 --- a/util/secscan/analyzer.py +++ b/util/secscan/analyzer.py @@ -10,6 +10,7 @@ from data.database import Image, ExternalNotificationEvent from data.model.tag import filter_tags_have_repository_event, get_tags_for_image from data.model.image import set_secscan_status, get_image_with_storage_and_parent_base from util.secscan.api import APIRequestFailure +from util.morecollections import AttrDict logger = logging.getLogger(__name__) @@ -132,6 +133,13 @@ class LayerAnalyzer(object): }, } - spawn_notification(tags[0].repository, 'vulnerability_found', event_data) + # TODO(jzelinskie): remove when more endpoints have been converted to using + # interfaces + repository = AttrDict({ + 'namespace_name': tags[0].repository.namespace_user.username, + 'name': tags[0].repository.name, + }) + + spawn_notification(repository, 'vulnerability_found', event_data) return True, set_status diff --git a/util/secscan/notifier.py b/util/secscan/notifier.py index e3e3ce9c4..908e5668a 100644 --- a/util/secscan/notifier.py +++ b/util/secscan/notifier.py @@ -10,6 +10,7 @@ from data.database import (Image, ImageStorage, ExternalNotificationEvent, Repos from endpoints.notificationhelper import spawn_notification from util.secscan import PRIORITY_LEVELS from util.secscan.api import APIRequestFailure +from util.morecollections import AttrDict logger = logging.getLogger(__name__) @@ -101,7 +102,12 @@ def process_notification_data(notification_data): }, } - spawn_notification(repository_map[repository_id], 'vulnerability_found', event_data) + # TODO(jzelinskie): remove when more endpoints have been converted to using interfaces + repository = AttrDict({ + 'namespace_name': repository_map[repository_id].namespace_user.username, + 'name': repository_map[repository_id].name, + }) + spawn_notification(repository, 'vulnerability_found', event_data) return True diff --git a/web.py b/web.py index b33c76383..237829c0f 100644 --- a/web.py +++ b/web.py @@ -7,7 +7,7 @@ from endpoints.api import api_bp from endpoints.bitbuckettrigger import bitbuckettrigger from endpoints.githubtrigger import githubtrigger from endpoints.gitlabtrigger import gitlabtrigger -from endpoints.key_server import key_server +from endpoints.keyserver import key_server from endpoints.oauthlogin import oauthlogin from endpoints.realtime import realtime from endpoints.web import web