Fix verbs in manifestlist

All registry_tests now pass
This commit is contained in:
Joseph Schorr 2016-09-01 19:00:11 -04:00 committed by Jimmy Zelinskie
parent 783c9e7a73
commit 3c8b87e086
18 changed files with 517 additions and 247 deletions

View file

@ -957,7 +957,7 @@ class ServiceKey(BaseModel):
rotation_duration = IntegerField(null=True)
approval = ForeignKeyField(ServiceKeyApproval, null=True)
'''
class MediaType(BaseModel):
""" MediaType is an enumeration of the possible formats of various objects in the data model. """
name = CharField(index=True, unique=True)
@ -1122,6 +1122,7 @@ class ManifestLayerScan(BaseModel):
class DerivedImage(BaseModel):
""" DerivedImage represents a Manifest transcoded into an alternative format. """
uuid = CharField(default=uuid_generator, unique=True)
source_manifest = ForeignKeyField(Manifest)
derived_manifest_json = JSONField()
media_type = ForeignKeyField(MediaType)
@ -1177,8 +1178,7 @@ beta_classes = set([ManifestLayerScan, Tag, BlobPlacementLocation, ManifestLayer
BitTorrentPieces, MediaType, Label, ManifestBlob, BlobUploading, Blob,
ManifestLayerDockerV1, BlobPlacementLocationPreference, ManifestListManifest,
Manifest, DerivedImage, BlobPlacement])
is_model = lambda x: (inspect.isclass(x) and
issubclass(x, BaseModel) and
x is not BaseModel and
x not in beta_classes)
'''
is_model = lambda x: inspect.isclass(x) and issubclass(x, BaseModel) and x is not BaseModel
all_models = [model[1] for model in inspect.getmembers(sys.modules[__name__], is_model)]

View file

@ -1,12 +0,0 @@
from image import Repository
from data import model
def repository_for_repo(repo):
""" Returns a Repository object representing the repo data model instance given. """
return Repository(
id=repo.id,
name=repo.name,
namespace_name=repo.namespace_user.username,
description=repo.description,
is_public=model.repository.is_repository_public(repo)
)

View file

@ -1,8 +1,7 @@
from collections import namedtuple
from app import app, storage as store
from data import model
from data.model import db_transaction
from collections import namedtuple
from util.morecollections import AttrDict
@ -13,19 +12,6 @@ class Repository(namedtuple('Repository', ['id', 'name', 'namespace_name', 'desc
"""
def _repository_for_repo(repo):
"""
Returns a Repository object representing the repo data model instance given.
"""
return Repository(
id=repo.id,
name=repo.name,
namespace_name=repo.namespace_user.username,
description=repo.description,
is_public=model.repository.is_repository_public(repo)
)
class DockerRegistryV1DataInterface(object):
"""
Interface that represents all data store interactions required by a Docker Registry v1.
@ -409,12 +395,23 @@ class PreOCIModel(DockerRegistryV1DataInterface):
def change_user_password(cls, user, new_password):
model.user.change_password(user, new_password)
@classmethod
def _repository_for_repo(cls, repo):
""" Returns a Repository object representing the repo data model instance given. """
return Repository(
id=repo.id,
name=repo.name,
namespace_name=repo.namespace_user.username,
description=repo.description,
is_public=model.repository.is_repository_public(repo)
)
@classmethod
def get_repository(cls, namespace_name, repo_name):
repo = model.repository.get_repository(namespace_name, repo_name)
if repo is None:
return None
return _repository_for_repo(repo)
return cls._repository_for_repo(repo)
@classmethod
def create_repository(cls, namespace_name, repo_name, user=None):
@ -432,4 +429,4 @@ class PreOCIModel(DockerRegistryV1DataInterface):
def get_sorted_matching_repositories(cls, search_term, only_public, can_read, limit):
repos = model.repository.get_sorted_matching_repositories(search_term, only_public, can_read,
limit=limit)
return [_repository_for_repo(repo) for repo in repos]
return [cls._repository_for_repo(repo) for repo in repos]

View file

@ -7,10 +7,15 @@ from data import model, database
from data.model import DataModelException
from image.docker.v1 import DockerV1Metadata
_MEDIA_TYPE = "application/vnd.docker.distribution.manifest.v1+prettyjws"
class Repository(namedtuple('Repository', ['id', 'name', 'namespace_name', 'description',
'is_public'])):
"""
Repository represents a namespaced collection of tags.
"""
class ManifestJSON(namedtuple('ManifestJSON', ['digest', 'json', 'media_type'])):
"""
ManifestJSON represents a Manifest of any format.
@ -44,47 +49,6 @@ class RepositoryReference(namedtuple('RepositoryReference', ['id', 'name', 'name
"""
class Repository(namedtuple('Repository', ['id', 'name', 'namespace_name', 'description',
'is_public'])):
"""
Repository represents a namespaced collection of tags.
"""
def _repository_for_repo(repo):
"""
Returns a Repository object representing the repo data model instance given.
"""
return Repository(
id=repo.id,
name=repo.name,
namespace_name=repo.namespace_user.username,
description=repo.description,
is_public=model.repository.is_repository_public(repo)
)
def _docker_v1_metadata(namespace_name, repo_name, repo_image):
"""
Returns a DockerV1Metadata object for the given image under the repository with the given
namespace and name. Note that the namespace and name are passed here as an optimization, and are
*not checked* against the image.
"""
return DockerV1Metadata(
namespace_name=namespace_name,
repo_name=repo_name,
image_id=repo_image.docker_image_id,
checksum=repo_image.v1_checksum,
content_checksum=repo_image.storage.content_checksum,
compat_json=repo_image.v1_json_metadata,
created=repo_image.created,
comment=repo_image.comment,
command=repo_image.command,
# TODO: make sure this isn't needed anywhere, as it is expensive to lookup
parent_image_id=None,
)
class DockerRegistryV2DataInterface(object):
"""
Interface that represents all data store interactions required by a Docker Registry v1.
@ -303,12 +267,23 @@ class PreOCIModel(DockerRegistryV2DataInterface):
def repository_is_public(cls, namespace_name, repo_name):
return model.repository.repository_is_public(namespace_name, repo_name)
@classmethod
def _repository_for_repo(cls, repo):
""" Returns a Repository object representing the repo data model instance given. """
return Repository(
id=repo.id,
name=repo.name,
namespace_name=repo.namespace_user.username,
description=repo.description,
is_public=model.repository.is_repository_public(repo)
)
@classmethod
def get_repository(cls, namespace_name, repo_name):
repo = model.repository.get_repository(namespace_name, repo_name)
if repo is None:
return None
return _repository_for_repo(repo)
return cls._repository_for_repo(repo)
@classmethod
def has_active_tag(cls, namespace_name, repo_name, tag_name):
@ -349,11 +324,32 @@ class PreOCIModel(DockerRegistryV2DataInterface):
tags = model.tag.delete_manifest_by_digest(namespace_name, repo_name, digest)
return [_tag_view(tag) for tag in tags]
@classmethod
def _docker_v1_metadata(cls, namespace_name, repo_name, repo_image):
"""
Returns a DockerV1Metadata object for the given image under the repository with the given
namespace and name. Note that the namespace and name are passed here as an optimization, and are
*not checked* against the image.
"""
return DockerV1Metadata(
namespace_name=namespace_name,
repo_name=repo_name,
image_id=repo_image.docker_image_id,
checksum=repo_image.v1_checksum,
content_checksum=repo_image.storage.content_checksum,
compat_json=repo_image.v1_json_metadata,
created=repo_image.created,
comment=repo_image.comment,
command=repo_image.command,
# TODO: make sure this isn't needed anywhere, as it is expensive to lookup
parent_image_id=None,
)
@classmethod
def get_docker_v1_metadata_by_tag(cls, namespace_name, repo_name, tag_name):
try:
repo_img = model.tag.get_tag_image(namespace_name, repo_name, tag_name, include_storage=True)
return _docker_v1_metadata(namespace_name, repo_name, repo_img)
return cls._docker_v1_metadata(namespace_name, repo_name, repo_img)
except DataModelException:
return None
@ -364,7 +360,7 @@ class PreOCIModel(DockerRegistryV2DataInterface):
return {}
images_query = model.image.lookup_repository_images(repo, docker_image_ids)
return {image.docker_image_id: _docker_v1_metadata(namespace_name, repo_name, image)
return {image.docker_image_id: cls._docker_v1_metadata(namespace_name, repo_name, image)
for image in images_query}
@classmethod
@ -374,7 +370,7 @@ class PreOCIModel(DockerRegistryV2DataInterface):
return []
parents = model.image.get_parent_images(namespace_name, repo_name, repo_image)
return [_docker_v1_metadata(namespace_name, repo_name, image) for image in parents]
return [cls._docker_v1_metadata(namespace_name, repo_name, image) for image in parents]
@classmethod
def create_manifest_and_update_tag(cls, namespace_name, repo_name, tag_name, manifest_digest,
@ -406,7 +402,7 @@ class PreOCIModel(DockerRegistryV2DataInterface):
repo_image = model.image.synthesize_v1_image(repo, storage_obj, image_id, created, comment,
command, compat_json, parent_image)
return _docker_v1_metadata(repo.namespace_user.username, repo.name, repo_image)
return cls._docker_v1_metadata(repo.namespace_user.username, repo.name, repo_image)
@classmethod
def save_manifest(cls, namespace_name, repo_name, tag_name, leaf_layer_docker_id, manifest_digest,
@ -434,7 +430,7 @@ class PreOCIModel(DockerRegistryV2DataInterface):
def get_visible_repositories(cls, username, limit, offset):
query = model.repository.get_visible_repositories(username, include_public=(username is None))
query = query.limit(limit).offset(offset)
return [_repository_for_repo(repo) for repo in query]
return [cls._repository_for_repo(repo) for repo in query]
@classmethod
def create_blob_upload(cls, namespace_name, repo_name, upload_uuid, location_name,

View file

@ -1,3 +1,36 @@
from collections import namedtuple
from data import model
from image.docker.v1 import DockerV1Metadata
import json
class DerivedImage(namedtuple('DerivedImage', ['ref', 'blob', 'internal_source_image_db_id'])):
"""
DerivedImage represents a user-facing alias for an image which was derived from another image.
"""
class RepositoryReference(namedtuple('RepositoryReference', ['id', 'name', 'namespace_name'])):
"""
RepositoryReference represents a reference to a Repository, without its full metadata.
"""
class ImageWithBlob(namedtuple('Image', ['image_id', 'blob', 'compat_metadata', 'repository',
'internal_db_id', 'v1_metadata'])):
"""
ImageWithBlob represents a user-facing alias for referencing an image, along with its blob.
"""
class Blob(namedtuple('Blob', ['uuid', 'size', 'uncompressed_size', 'uploading', 'locations'])):
"""
Blob represents an opaque binary blob saved to the storage system.
"""
class TorrentInfo(namedtuple('TorrentInfo', ['piece_length', 'pieces'])):
"""
TorrentInfo represents the torrent piece information associated with a blob.
"""
class VerbsDataInterface(object):
"""
Interface that represents all data store interactions required by the registry's custom HTTP
@ -10,9 +43,280 @@ class VerbsDataInterface(object):
"""
raise NotImplementedError()
@classmethod
def get_manifest_layers_with_blobs(cls, repo_image):
"""
Returns the full set of manifest layers and their associated blobs starting at the given
repository image and working upwards to the root image.
"""
raise NotImplementedError()
@classmethod
def get_blob_path(cls, blob):
"""
Returns the storage path for the given blob.
"""
raise NotImplementedError()
@classmethod
def get_derived_image_signature(cls, derived_image, signer_name):
"""
Returns the signature associated with the derived image and a specific signer or None if none.
"""
raise NotImplementedError()
@classmethod
def set_derived_image_signature(cls, derived_image, signer_name, signature):
"""
Sets the calculated signature for the given derived image and signer to that specified.
"""
raise NotImplementedError()
@classmethod
def delete_derived_image(cls, derived_image):
"""
Deletes a derived image and all of its storage.
"""
raise NotImplementedError()
@classmethod
def set_blob_size(cls, blob, size):
"""
Sets the size field on a blob to the value specified.
"""
raise NotImplementedError()
@classmethod
def get_repo_blob_by_digest(cls, namespace_name, repo_name, digest):
"""
Returns the blob with the given digest under the matching repository or None if none.
"""
raise NotImplementedError()
@classmethod
def get_torrent_info(cls, blob):
"""
Returns the torrent information associated with the given blob or None if none.
"""
raise NotImplementedError()
@classmethod
def set_torrent_info(cls, blob, piece_length, pieces):
"""
Sets the torrent infomation associated with the given blob to that specified.
"""
raise NotImplementedError()
@classmethod
def lookup_derived_image(cls, repo_image, verb, varying_metadata=None):
"""
Looks up the derived image for the given repository image, verb and optional varying metadata
and returns it or None if none.
"""
raise NotImplementedError()
@classmethod
def lookup_or_create_derived_image(cls, repo_image, verb, location, varying_metadata=None):
"""
Looks up the derived image for the given repository image, verb and optional varying metadata
and returns it. If none exists, a new derived image is created.
"""
raise NotImplementedError()
@classmethod
def get_tag_image(cls, namespace_name, repo_name, tag_name):
"""
Returns the image associated with the live tag with the given name under the matching repository
or None if none.
"""
raise NotImplementedError()
class PreOCIModel(VerbsDataInterface):
"""
PreOCIModel implements the data model for the registry's custom HTTP verbs using a database schema
before it was changed to support the OCI specification.
"""
@classmethod
def repository_is_public(cls, namespace_name, repo_name):
return model.repository.repository_is_public(namespace_name, repo_name)
@classmethod
def _docker_v1_metadata(cls, namespace_name, repo_name, repo_image):
"""
Returns a DockerV1Metadata object for the given image under the repository with the given
namespace and name. Note that the namespace and name are passed here as an optimization, and are
*not checked* against the image. Also note that we only fill in the localized data needed by
verbs.
"""
return DockerV1Metadata(
namespace_name=namespace_name,
repo_name=repo_name,
image_id=repo_image.docker_image_id,
checksum=repo_image.v1_checksum,
compat_json=repo_image.v1_json_metadata,
created=repo_image.created,
comment=repo_image.comment,
command=repo_image.command,
# Note: These are not needed in verbs and are expensive to load, so we just skip them.
content_checksum=None,
parent_image_id=None,
)
@classmethod
def get_manifest_layers_with_blobs(cls, repo_image):
repo_image_record = model.image.get_image_by_id(repo_image.repository.namespace_name,
repo_image.repository.name,
repo_image.image_id)
parents = model.image.get_parent_images_with_placements(repo_image.repository.namespace_name,
repo_image.repository.name,
repo_image_record)
yield repo_image
for parent in parents:
metadata = {}
try:
metadata = json.loads(parent.v1_json_metadata)
except ValueError:
pass
yield ImageWithBlob(
image_id=parent.docker_image_id,
blob=cls._blob(parent.storage),
repository=repo_image.repository,
compat_metadata=metadata,
v1_metadata=cls._docker_v1_metadata(repo_image.repository.namespace_name,
repo_image.repository.name, parent),
internal_db_id=parent.id,
)
@classmethod
def get_derived_image_signature(cls, derived_image, signer_name):
storage = model.storage.get_storage_by_uuid(derived_image.blob.uuid)
signature_entry = model.storage.lookup_storage_signature(storage, signer_name)
if signature_entry is None:
return None
return signature_entry.signature
@classmethod
def set_derived_image_signature(cls, derived_image, signer_name, signature):
storage = model.storage.get_storage_by_uuid(derived_image.blob.uuid)
signature_entry = model.storage.find_or_create_storage_signature(storage, signer_name)
signature_entry.signature = signature
signature_entry.uploading = False
signature_entry.save()
@classmethod
def delete_derived_image(cls, derived_image):
model.image.delete_derived_storage_by_uuid(derived_image.blob.uuid)
@classmethod
def set_blob_size(cls, blob, size):
storage_entry = model.storage.get_storage_by_uuid(blob.uuid)
storage_entry.image_size = size
storage_entry.uploading = False
storage_entry.save()
@classmethod
def get_blob_path(cls, blob):
blob_record = model.storage.get_storage_by_uuid(blob.uuid)
return model.storage.get_layer_path(blob_record)
@classmethod
def get_repo_blob_by_digest(cls, namespace_name, repo_name, digest):
try:
blob_record = model.blob.get_repo_blob_by_digest(namespace_name, repo_name, digest)
except model.BlobDoesNotExist:
return None
return cls._blob(blob_record)
@classmethod
def get_torrent_info(cls, blob):
blob_record = model.storage.get_storage_by_uuid(blob.uuid)
try:
torrent_info = model.storage.get_torrent_info(blob_record)
except model.TorrentInfoDoesNotExist:
return None
return TorrentInfo(
pieces=torrent_info.pieces,
piece_length=torrent_info.piece_length,
)
@classmethod
def set_torrent_info(cls, blob, piece_length, pieces):
blob_record = model.storage.get_storage_by_uuid(blob.uuid)
model.storage.save_torrent_info(blob_record, piece_length, pieces)
@classmethod
def lookup_derived_image(cls, repo_image, verb, varying_metadata=None):
blob_record = model.image.find_derived_storage_for_image(repo_image.internal_db_id, verb,
varying_metadata)
if blob_record is None:
return None
return cls._derived_image(blob_record, repo_image)
@classmethod
def _derived_image(cls, blob_record, repo_image):
return DerivedImage(
ref=repo_image.internal_db_id,
blob=cls._blob(blob_record),
internal_source_image_db_id=repo_image.internal_db_id,
)
@classmethod
def _blob(cls, blob_record):
if hasattr(blob_record, 'locations'):
locations = blob_record.locations
else:
locations = model.storage.get_storage_locations(blob_record.uuid)
return Blob(
uuid=blob_record.uuid,
size=blob_record.image_size,
uncompressed_size=blob_record.uncompressed_size,
uploading=blob_record.uploading,
locations=locations,
)
@classmethod
def lookup_or_create_derived_image(cls, repo_image, verb, location, varying_metadata=None):
blob_record = model.image.find_or_create_derived_storage(repo_image.internal_db_id, verb, location,
varying_metadata)
return cls._derived_image(blob_record, repo_image)
@classmethod
def get_tag_image(cls, namespace_name, repo_name, tag_name):
try:
found = model.tag.get_tag_image(namespace_name, repo_name, tag_name, include_storage=True)
except model.DataModelException:
return None
metadata = {}
try:
metadata = json.loads(found.v1_json_metadata)
except ValueError:
pass
return ImageWithBlob(
image_id=found.docker_image_id,
blob=cls._blob(found.storage),
repository=RepositoryReference(
namespace_name=namespace_name,
name=repo_name,
id=found.repository_id,
),
compat_metadata=metadata,
v1_metadata=cls._docker_v1_metadata(namespace_name, repo_name, found),
internal_db_id=found.id,
)

View file

@ -513,7 +513,6 @@ def find_or_create_derived_storage(source_image, transformation_name, preferred_
if existing is not None:
return existing
logger.debug('Creating storage dervied from source image: %s', source_image.id)
uniqueness_hash = _get_uniqueness_hash(varying_metadata)
trans = ImageStorageTransformation.get(name=transformation_name)
new_storage = storage.create_v1_storage(preferred_location)

View file

@ -9,7 +9,7 @@ from data.database import db
from auth.auth_context import get_authenticated_user
from endpoints.notificationhelper import spawn_notification
from util.names import escape_tag
from util.morecollections import AttrDict
logger = logging.getLogger(__name__)
@ -72,7 +72,13 @@ def start_build(repository, prepared_build, pull_robot_name=None):
model.log.log_action('build_dockerfile', repository.namespace_user.username,
ip=request.remote_addr, metadata=event_log_metadata, repository=repository)
spawn_notification(repository, 'build_queued', event_log_metadata,
# TODO(jzelinskie): remove when more endpoints have been converted to using interfaces
repo = AttrDict({
'namespace_name': repository.namespace_user.username,
'name': repository.name,
})
spawn_notification(repo, 'build_queued', event_log_metadata,
subpage='build/%s' % build_request.uuid,
pathargs=['build', build_request.uuid])

View file

@ -155,6 +155,10 @@ def put_image_layer(namespace, repository, image_id):
if model.storage_exists(namespace, repository, image_id):
exact_abort(409, 'Image already exists')
v1_metadata = model.docker_v1_metadata(namespace, repository, image_id)
if v1_metadata is None:
abort(404)
logger.debug('Storing layer data')
input_stream = request.stream
@ -182,7 +186,6 @@ def put_image_layer(namespace, repository, image_id):
sr.add_handler(piece_hasher.update)
# Add a handler which computes the checksum.
v1_metadata = model.docker_v1_metadata(namespace, repository, image_id)
h, sum_hndlr = checksums.simple_checksum_handler(v1_metadata.compat_json)
sr.add_handler(sum_hndlr)

View file

@ -104,7 +104,7 @@ def write_manifest_by_tagname(namespace_name, repo_name, manifest_ref):
if manifest.tag != manifest_ref:
raise TagInvalid()
return _write_manifest(namespace_name, repo_name, manifest)
return _write_manifest_and_log(namespace_name, repo_name, manifest)
@v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['PUT'])
@ -113,16 +113,16 @@ def write_manifest_by_tagname(namespace_name, repo_name, manifest_ref):
@process_registry_jwt_auth(scopes=['pull', 'push'])
@require_repo_write
@anon_protect
def write_manifest_by_digest(namespace_name, repo_name, digest):
def write_manifest_by_digest(namespace_name, repo_name, manifest_ref):
try:
manifest = DockerSchema1Manifest(request.data)
except ManifestException as me:
raise ManifestInvalid(detail={'message': me.message})
if manifest.digest != digest:
if manifest.digest != manifest_ref:
raise ManifestInvalid(detail={'message': 'manifest digest mismatch'})
return _write_manifest(namespace_name, repo_name, manifest)
return _write_manifest_and_log(namespace_name, repo_name, manifest)
def _write_manifest(namespace_name, repo_name, manifest):
@ -178,6 +178,12 @@ def _write_manifest(namespace_name, repo_name, manifest):
model.save_manifest(namespace_name, repo_name, manifest.tag, leaf_layer_id, manifest.digest,
manifest.bytes)
return repo, storage_map
def _write_manifest_and_log(namespace_name, repo_name, manifest):
repo, storage_map = _write_manifest(namespace_name, repo_name, manifest)
# Queue all blob manifests for replication.
# TODO(jschorr): Find a way to optimize this insertion.
if features.STORAGE_REPLICATION:

View file

@ -1,5 +1,4 @@
import logging
import json
import hashlib
from flask import redirect, Blueprint, abort, send_file, make_response, request
@ -10,7 +9,8 @@ from app import app, signer, storage, metric_queue
from auth.auth import process_auth
from auth.auth_context import get_authenticated_user
from auth.permissions import ReadRepositoryPermission
from data import model, database
from data import database
from data.interfaces.verbs import PreOCIModel as model
from endpoints.common import route_show_if, parse_repository_name
from endpoints.decorators import anon_protect
from endpoints.trackhelper import track_and_log
@ -29,8 +29,7 @@ verbs = Blueprint('verbs', __name__)
logger = logging.getLogger(__name__)
def _open_stream(formatter, namespace, repository, tag, synthetic_image_id, image_json, repo_image,
handlers):
def _open_stream(formatter, namespace, repository, tag, derived_image_id, repo_image, handlers):
"""
This method generates a stream of data which will be replicated and read from the queue files.
This method runs in a separate process.
@ -38,12 +37,7 @@ def _open_stream(formatter, namespace, repository, tag, synthetic_image_id, imag
# For performance reasons, we load the full image list here, cache it, then disconnect from
# the database.
with database.UseThenDisconnect(app.config):
image_list = list(model.image.get_parent_images_with_placements(namespace, repository,
repo_image))
image_list.insert(0, repo_image)
def get_image_json(image):
return json.loads(image.v1_json_metadata)
image_list = list(model.get_manifest_layers_with_blobs(repo_image))
def get_next_image():
for current_image in image_list:
@ -52,18 +46,16 @@ def _open_stream(formatter, namespace, repository, tag, synthetic_image_id, imag
def get_next_layer():
# Re-Initialize the storage engine because some may not respond well to forking (e.g. S3)
store = Storage(app, metric_queue)
for current_image_entry in image_list:
current_image_path = model.storage.get_layer_path(current_image_entry.storage)
current_image_stream = store.stream_read_file(current_image_entry.storage.locations,
for current_image in image_list:
current_image_path = model.get_blob_path(current_image.blob)
current_image_stream = store.stream_read_file(current_image.blob.locations,
current_image_path)
current_image_id = current_image_entry.id
logger.debug('Returning image layer %s (%s): %s', current_image_id,
current_image_entry.docker_image_id, current_image_path)
logger.debug('Returning image layer %s: %s', current_image.image_id, current_image_path)
yield current_image_stream
stream = formatter.build_stream(namespace, repository, tag, synthetic_image_id, image_json,
get_next_image, get_next_layer, get_image_json)
stream = formatter.build_stream(namespace, repository, tag, repo_image, derived_image_id,
get_next_image, get_next_layer)
for handler_fn in handlers:
stream = wrap_with_handler(stream, handler_fn)
@ -71,75 +63,58 @@ def _open_stream(formatter, namespace, repository, tag, synthetic_image_id, imag
return stream.read
def _sign_synthetic_image(verb, linked_storage_uuid, queue_file):
def _sign_derived_image(verb, derived_image, queue_file):
""" Read from the queue file and sign the contents which are generated. This method runs in a
separate process. """
signature = None
try:
signature = signer.detached_sign(queue_file)
except:
logger.exception('Exception when signing %s image %s', verb, linked_storage_uuid)
logger.exception('Exception when signing %s deriving image %s', verb, derived_image.ref)
return
# Setup the database (since this is a new process) and then disconnect immediately
# once the operation completes.
if not queue_file.raised_exception:
with database.UseThenDisconnect(app.config):
try:
derived = model.storage.get_storage_by_uuid(linked_storage_uuid)
except model.storage.InvalidImageException:
return
signature_entry = model.storage.find_or_create_storage_signature(derived, signer.name)
signature_entry.signature = signature
signature_entry.uploading = False
signature_entry.save()
model.set_derived_image_signature(derived_image, signer.name, signature)
def _write_synthetic_image_to_storage(verb, linked_storage_uuid, linked_locations, queue_file):
def _write_derived_image_to_storage(verb, derived_image, queue_file):
""" Read from the generated stream and write it back to the storage engine. This method runs in a
separate process.
"""
def handle_exception(ex):
logger.debug('Exception when building %s image %s: %s', verb, linked_storage_uuid, ex)
logger.debug('Exception when building %s derived image %s: %s', verb, derived_image.ref, ex)
with database.UseThenDisconnect(app.config):
model.image.delete_derived_storage_by_uuid(linked_storage_uuid)
model.delete_derived_image(derived_image)
queue_file.add_exception_handler(handle_exception)
# Re-Initialize the storage engine because some may not respond well to forking (e.g. S3)
store = Storage(app, metric_queue)
image_path = store.v1_image_layer_path(linked_storage_uuid)
store.stream_write(linked_locations, image_path, queue_file)
image_path = model.get_blob_path(derived_image.blob)
store.stream_write(derived_image.blob.locations, image_path, queue_file)
queue_file.close()
if not queue_file.raised_exception:
# Setup the database (since this is a new process) and then disconnect immediately
# once the operation completes.
with database.UseThenDisconnect(app.config):
done_uploading = model.storage.get_storage_by_uuid(linked_storage_uuid)
done_uploading.uploading = False
done_uploading.save()
def _torrent_for_storage(storage_ref, is_public):
""" Returns a response containing the torrent file contents for the given storage. May abort
def _torrent_for_blob(blob, is_public):
""" Returns a response containing the torrent file contents for the given blob. May abort
with an error if the state is not valid (e.g. non-public, non-user request).
"""
# Make sure the storage has a size.
if not storage_ref.image_size:
if not blob.size:
abort(404)
# Lookup the torrent information for the storage.
try:
torrent_info = model.storage.get_torrent_info(storage_ref)
except model.TorrentInfoDoesNotExist:
torrent_info = model.get_torrent_info(blob)
if torrent_info is None:
abort(404)
# Lookup the webseed path for the storage.
path = model.storage.get_layer_path(storage_ref)
webseed = storage.get_direct_download_url(storage_ref.locations, path,
path = model.get_blob_path(blob)
webseed = storage.get_direct_download_url(blob.locations, path,
expires_in=app.config['BITTORRENT_WEBSEED_LIFETIME'])
if webseed is None:
# We cannot support webseeds for storages that cannot provide direct downloads.
@ -147,17 +122,17 @@ def _torrent_for_storage(storage_ref, is_public):
# Build the filename for the torrent.
if is_public:
name = public_torrent_filename(storage_ref.uuid)
name = public_torrent_filename(blob.uuid)
else:
user = get_authenticated_user()
if not user:
abort(403)
name = per_user_torrent_filename(user.uuid, storage_ref.uuid)
name = per_user_torrent_filename(user.uuid, blob.uuid)
# Return the torrent file.
torrent_file = make_torrent(name, webseed, storage_ref.image_size,
torrent_info.piece_length, torrent_info.pieces)
torrent_file = make_torrent(name, webseed, blob.size, torrent_info.piece_length,
torrent_info.pieces)
headers = {'Content-Type': 'application/x-bittorrent',
'Content-Disposition': 'attachment; filename={0}.torrent'.format(name)}
@ -173,60 +148,46 @@ def _torrent_repo_verb(repo_image, tag, verb, **kwargs):
# Lookup an *existing* derived storage for the verb. If the verb's image storage doesn't exist,
# we cannot create it here, so we 406.
derived = model.image.find_derived_storage_for_image(repo_image, verb,
varying_metadata={'tag': tag})
if not derived:
derived_image = model.lookup_derived_image(repo_image, verb, varying_metadata={'tag': tag})
if derived_image is None:
abort(406)
# Return the torrent.
public_repo = model.repository.is_repository_public(repo_image.repository)
torrent = _torrent_for_storage(derived, public_repo)
public_repo = model.repository_is_public(repo_image.repository.namespace_name,
repo_image.repository.name)
torrent = _torrent_for_blob(derived_image.blob, public_repo)
# Log the action.
track_and_log('repo_verb', repo_image.repository, tag=tag, verb=verb, torrent=True, **kwargs)
return torrent
def _verify_repo_verb(store, namespace, repository, tag, verb, checker=None):
def _verify_repo_verb(_, namespace, repository, tag, verb, checker=None):
permission = ReadRepositoryPermission(namespace, repository)
if not permission.can() and not model.repository.repository_is_public(namespace, repository):
if not permission.can() and not model.repository_is_public(namespace, repository):
abort(403)
# Lookup the requested tag.
try:
tag_image = model.tag.get_tag_image(namespace, repository, tag)
except model.DataModelException:
abort(404)
# Lookup the tag's image and storage.
repo_image = model.image.get_repo_image_extended(namespace, repository, tag_image.docker_image_id)
if not repo_image:
tag_image = model.get_tag_image(namespace, repository, tag)
if tag_image is None:
abort(404)
# If there is a data checker, call it first.
image_json = None
if checker is not None:
image_json = json.loads(repo_image.v1_json_metadata)
if not checker(image_json):
if not checker(tag_image):
logger.debug('Check mismatch on %s/%s:%s, verb %s', namespace, repository, tag, verb)
abort(404)
return (repo_image, tag_image, image_json)
return tag_image
def _repo_verb_signature(namespace, repository, tag, verb, checker=None, **kwargs):
# Verify that the image exists and that we have access to it.
result = _verify_repo_verb(storage, namespace, repository, tag, verb, checker)
(repo_image, _, _) = result
repo_image = _verify_repo_verb(storage, namespace, repository, tag, verb, checker)
# Lookup the derived image storage for the verb.
derived = model.image.find_derived_storage_for_image(repo_image, verb,
varying_metadata={'tag': tag})
if derived is None or derived.uploading:
# derived_image the derived image storage for the verb.
derived_image = model.lookup_derived_image(repo_image, verb, varying_metadata={'tag': tag})
if derived_image is None or derived_image.blob.uploading:
return make_response('', 202)
# Check if we have a valid signer configured.
@ -234,18 +195,17 @@ def _repo_verb_signature(namespace, repository, tag, verb, checker=None, **kwarg
abort(404)
# Lookup the signature for the verb.
signature_entry = model.storage.lookup_storage_signature(derived, signer.name)
if signature_entry is None:
signature_value = model.get_derived_image_signature(derived_image, signer.name)
if signature_value is None:
abort(404)
# Return the signature.
return make_response(signature_entry.signature)
return make_response(signature_value)
def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker=None, **kwargs):
# Verify that the image exists and that we have access to it.
result = _verify_repo_verb(storage, namespace, repository, tag, verb, checker)
(repo_image, tag_image, image_json) = result
repo_image = _verify_repo_verb(storage, namespace, repository, tag, verb, checker)
# Check for torrent. If found, we return a torrent for the repo verb image (if the derived
# image already exists).
@ -257,36 +217,30 @@ def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker=
track_and_log('repo_verb', repo_image.repository, tag=tag, verb=verb, **kwargs)
metric_queue.repository_pull.Inc(labelvalues=[namespace, repository, verb])
# Lookup/create the derived image storage for the verb and repo image.
derived = model.image.find_or_create_derived_storage(repo_image, verb,
# Lookup/create the derived image for the verb and repo image.
derived_image = model.lookup_or_create_derived_image(repo_image, verb,
storage.preferred_locations[0],
varying_metadata={'tag': tag})
if not derived.uploading:
logger.debug('Derived %s image %s exists in storage', verb, derived.uuid)
derived_layer_path = model.storage.get_layer_path(derived)
if not derived_image.blob.uploading:
logger.debug('Derived %s image %s exists in storage', verb, derived_image.ref)
derived_layer_path = model.get_blob_path(derived_image.blob)
is_head_request = request.method == 'HEAD'
download_url = storage.get_direct_download_url(derived.locations, derived_layer_path,
download_url = storage.get_direct_download_url(derived_image.blob.locations, derived_layer_path,
head=is_head_request)
if download_url:
logger.debug('Redirecting to download URL for derived %s image %s', verb, derived.uuid)
logger.debug('Redirecting to download URL for derived %s image %s', verb, derived_image.ref)
return redirect(download_url)
# Close the database handle here for this process before we send the long download.
database.close_db_filter(None)
logger.debug('Sending cached derived %s image %s', verb, derived.uuid)
return send_file(storage.stream_read_file(derived.locations, derived_layer_path))
logger.debug('Sending cached derived %s image %s', verb, derived_image.ref)
return send_file(storage.stream_read_file(derived_image.blob.locations, derived_layer_path))
logger.debug('Building and returning derived %s image %s', verb, derived_image.ref)
logger.debug('Building and returning derived %s image %s', verb, derived.uuid)
# Load the image's JSON layer.
if not image_json:
image_json = json.loads(repo_image.v1_json_metadata)
# Calculate a synthetic image ID.
synthetic_image_id = hashlib.sha256(tag_image.docker_image_id + ':' + verb).hexdigest()
# Calculate a derived image ID.
derived_image_id = hashlib.sha256(repo_image.image_id + ':' + verb).hexdigest()
def _cleanup():
# Close any existing DB connection once the process has exited.
@ -296,16 +250,14 @@ def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker=
def _store_metadata_and_cleanup():
with database.UseThenDisconnect(app.config):
model.storage.save_torrent_info(derived, app.config['BITTORRENT_PIECE_SIZE'],
model.set_torrent_info(derived_image.blob, app.config['BITTORRENT_PIECE_SIZE'],
hasher.final_piece_hashes())
derived.image_size = hasher.hashed_bytes
derived.save()
model.set_blob_size(derived_image.blob, hasher.hashed_bytes)
# Create a queue process to generate the data. The queue files will read from the process
# and send the results to the client and storage.
handlers = [hasher.update]
args = (formatter, namespace, repository, tag, synthetic_image_id, image_json, repo_image,
handlers)
args = (formatter, namespace, repository, tag, derived_image_id, repo_image, handlers)
queue_process = QueueProcess(_open_stream,
8 * 1024, 10 * 1024 * 1024, # 8K/10M chunk/max
args, finished=_store_metadata_and_cleanup)
@ -322,12 +274,12 @@ def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker=
queue_process.run()
# Start the storage saving.
storage_args = (verb, derived.uuid, derived.locations, storage_queue_file)
QueueProcess.run_process(_write_synthetic_image_to_storage, storage_args, finished=_cleanup)
storage_args = (verb, derived_image, storage_queue_file)
QueueProcess.run_process(_write_derived_image_to_storage, storage_args, finished=_cleanup)
if sign and signer.name:
signing_args = (verb, derived.uuid, signing_queue_file)
QueueProcess.run_process(_sign_synthetic_image, signing_args, finished=_cleanup)
signing_args = (verb, derived_image, signing_queue_file)
QueueProcess.run_process(_sign_derived_image, signing_args, finished=_cleanup)
# Close the database handle here for this process before we send the long download.
database.close_db_filter(None)
@ -337,7 +289,9 @@ def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker=
def os_arch_checker(os, arch):
def checker(image_json):
def checker(repo_image):
image_json = repo_image.compat_metadata
# Verify the architecture and os.
operating_system = image_json.get('os', 'linux')
if operating_system != os:
@ -391,7 +345,7 @@ def get_squashed_tag(namespace, repository, tag):
@parse_repository_name()
def get_tag_torrent(namespace_name, repo_name, digest):
permission = ReadRepositoryPermission(namespace_name, repo_name)
public_repo = model.repository.repository_is_public(namespace_name, repo_name)
public_repo = model.repository_is_public(namespace_name, repo_name)
if not permission.can() and not public_repo:
abort(403)
@ -400,10 +354,9 @@ def get_tag_torrent(namespace_name, repo_name, digest):
# We can not generate a private torrent cluster without a user uuid (e.g. token auth)
abort(403)
try:
blob = model.blob.get_repo_blob_by_digest(namespace_name, repo_name, digest)
except model.BlobDoesNotExist:
blob = model.get_repo_blob_by_digest(namespace_name, repo_name, digest)
if blob is None:
abort(404)
metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'torrent'])
return _torrent_for_storage(blob, public_repo)
return _torrent_for_blob(blob, public_repo)

View file

@ -17,10 +17,10 @@ class AppCImageFormatter(TarImageFormatter):
Image formatter which produces an tarball according to the AppC specification.
"""
def stream_generator(self, namespace, repository, tag, synthetic_image_id,
layer_json, get_image_iterator, get_layer_iterator, get_image_json):
def stream_generator(self, namespace, repository, tag, repo_image,
synthetic_image_id, get_image_iterator, get_layer_iterator):
image_mtime = 0
created = next(get_image_iterator()).created
created = next(get_image_iterator()).v1_metadata.created
if created is not None:
image_mtime = calendar.timegm(created.utctimetuple())
@ -29,7 +29,7 @@ class AppCImageFormatter(TarImageFormatter):
# rootfs - The root file system
# Yield the manifest.
manifest = self._build_manifest(namespace, repository, tag, layer_json, synthetic_image_id)
manifest = self._build_manifest(namespace, repository, tag, repo_image, synthetic_image_id)
yield self.tar_file('manifest', manifest, mtime=image_mtime)
# Yield the merged layer dtaa.
@ -168,9 +168,9 @@ class AppCImageFormatter(TarImageFormatter):
return volumes
@staticmethod
def _build_manifest(namespace, repository, tag, docker_layer_data, synthetic_image_id):
""" Builds an ACI manifest from the docker layer data. """
def _build_manifest(namespace, repository, tag, repo_image, synthetic_image_id):
""" Builds an ACI manifest of an existing repository image. """
docker_layer_data = repo_image.compat_metadata
config = docker_layer_data.get('config', {})
source_url = "%s://%s/%s/%s:%s" % (app.config['PREFERRED_URL_SCHEME'],

View file

@ -7,19 +7,18 @@ class TarImageFormatter(object):
Base class for classes which produce a tar containing image and layer data.
"""
def build_stream(self, namespace, repository, tag, synthetic_image_id, layer_json,
get_image_iterator, get_layer_iterator, get_image_json):
def build_stream(self, namespace, repository, tag, repo_image, synthetic_image_id,
get_image_iterator, get_layer_iterator):
"""
Builds and streams a synthetic .tar.gz that represents the formatted tar created by this class's
implementation.
"""
return GzipWrap(self.stream_generator(namespace, repository, tag,
synthetic_image_id, layer_json,
get_image_iterator, get_layer_iterator,
get_image_json))
return GzipWrap(self.stream_generator(namespace, repository, tag, repo_image,
synthetic_image_id, get_image_iterator,
get_layer_iterator))
def stream_generator(self, namespace, repository, tag, synthetic_image_id,
layer_json, get_image_iterator, get_layer_iterator, get_image_json):
def stream_generator(self, namespace, repository, tag, repo_image, synthetic_image_id,
get_image_iterator, get_layer_iterator):
raise NotImplementedError
def tar_file(self, name, contents, mtime=None):

View file

@ -88,7 +88,11 @@ class DockerSchema1Manifest(object):
self._layers = None
self._bytes = manifest_bytes
try:
self._parsed = json.loads(manifest_bytes)
except ValueError as ve:
raise MalformedSchema1Manifest('malformed manifest data: %s' % ve)
self._signatures = self._parsed[DOCKER_SCHEMA1_SIGNATURES_KEY]
self._tag = self._parsed[DOCKER_SCHEMA1_REPO_TAG_KEY]

View file

@ -28,10 +28,10 @@ class SquashedDockerImageFormatter(TarImageFormatter):
# daemon dies when trying to load the entire tar into memory.
SIZE_MULTIPLIER = 1.2
def stream_generator(self, namespace, repository, tag, synthetic_image_id,
layer_json, get_image_iterator, get_layer_iterator, get_image_json):
def stream_generator(self, namespace, repository, tag, repo_image, synthetic_image_id,
get_image_iterator, get_layer_iterator):
image_mtime = 0
created = next(get_image_iterator()).created
created = next(get_image_iterator()).v1_metadata.created
if created is not None:
image_mtime = calendar.timegm(created.utctimetuple())
@ -58,7 +58,7 @@ class SquashedDockerImageFormatter(TarImageFormatter):
yield self.tar_folder(synthetic_image_id, mtime=image_mtime)
# Yield the JSON layer data.
layer_json = SquashedDockerImageFormatter._build_layer_json(layer_json, synthetic_image_id)
layer_json = SquashedDockerImageFormatter._build_layer_json(repo_image, synthetic_image_id)
yield self.tar_file(synthetic_image_id + '/json', json.dumps(layer_json), mtime=image_mtime)
# Yield the VERSION file.
@ -70,10 +70,10 @@ class SquashedDockerImageFormatter(TarImageFormatter):
# In V1 we have the actual uncompressed size, which is needed for back compat with
# older versions of Docker.
# In V2, we use the size given in the image JSON.
if image.storage.uncompressed_size:
estimated_file_size += image.storage.uncompressed_size
if image.blob.uncompressed_size:
estimated_file_size += image.blob.uncompressed_size
else:
image_json = get_image_json(image)
image_json = image.compat_metadata
estimated_file_size += image_json.get('Size', 0) * SquashedDockerImageFormatter.SIZE_MULTIPLIER
# Make sure the estimated file size is an integer number of bytes.
@ -112,7 +112,8 @@ class SquashedDockerImageFormatter(TarImageFormatter):
@staticmethod
def _build_layer_json(layer_json, synthetic_image_id):
def _build_layer_json(repo_image, synthetic_image_id):
layer_json = repo_image.compat_metadata
updated_json = copy.deepcopy(layer_json)
updated_json['id'] = synthetic_image_id

View file

@ -1862,7 +1862,7 @@ class SquashingTests(RegistryTestCaseMixin, V1RegistryPushMixin, LiveServerTestC
self.do_push('devtable', 'newrepo', 'devtable', 'password', images=initial_images)
initial_image_id = '91081df45b58dc62dd207441785eef2b895f0383fbe601c99a3cf643c79957dc'
# Try to pull the torrent of the squashed image. This should fail with a 404 since the
# Try to pull the torrent of the squashed image. This should fail with a 406 since the
# squashed image doesn't yet exist.
self.conduct('GET', '/c1/squash/devtable/newrepo/latest', auth=('devtable', 'password'),
headers=dict(accept='application/x-bittorrent'),

View file

@ -1,11 +1,11 @@
import unittest
import time
import hashlib
from app import app, storage, docker_v2_signing_key
from initdb import setup_database_for_testing, finished_database_for_testing
from data import model, database
from endpoints.v2.manifest import _write_manifest_itself, SignedManifestBuilder
from endpoints.v2.manifest import _write_manifest
from image.docker.schema1 import DockerSchema1ManifestBuilder
ADMIN_ACCESS_USER = 'devtable'
@ -69,11 +69,11 @@ class TestManifests(unittest.TestCase):
model.blob.store_blob_record_and_temp_link(ADMIN_ACCESS_USER, REPO, first_blob_sha, location, 0, 0, 0)
# Push the first manifest.
first_manifest = (SignedManifestBuilder(ADMIN_ACCESS_USER, REPO, FIRST_TAG)
first_manifest = (DockerSchema1ManifestBuilder(ADMIN_ACCESS_USER, REPO, FIRST_TAG)
.add_layer(first_blob_sha, '{"id": "first"}')
.build(docker_v2_signing_key))
_write_manifest_itself(ADMIN_ACCESS_USER, REPO, first_manifest)
_write_manifest(ADMIN_ACCESS_USER, REPO, first_manifest)
# Delete all temp tags and perform GC.
self._perform_cleanup()
@ -91,12 +91,12 @@ class TestManifests(unittest.TestCase):
model.blob.store_blob_record_and_temp_link(ADMIN_ACCESS_USER, REPO, third_blob_sha, location, 0, 0, 0)
# Push the second manifest.
second_manifest = (SignedManifestBuilder(ADMIN_ACCESS_USER, REPO, SECOND_TAG)
second_manifest = (DockerSchema1ManifestBuilder(ADMIN_ACCESS_USER, REPO, SECOND_TAG)
.add_layer(third_blob_sha, '{"id": "second", "parent": "first"}')
.add_layer(second_blob_sha, '{"id": "first"}')
.build(docker_v2_signing_key))
_write_manifest_itself(ADMIN_ACCESS_USER, REPO, second_manifest)
_write_manifest(ADMIN_ACCESS_USER, REPO, second_manifest)
# Delete all temp tags and perform GC.
self._perform_cleanup()
@ -120,12 +120,12 @@ class TestManifests(unittest.TestCase):
model.blob.store_blob_record_and_temp_link(ADMIN_ACCESS_USER, REPO, fourth_blob_sha, location, 0, 0, 0)
# Push the third manifest.
third_manifest = (SignedManifestBuilder(ADMIN_ACCESS_USER, REPO, THIRD_TAG)
third_manifest = (DockerSchema1ManifestBuilder(ADMIN_ACCESS_USER, REPO, THIRD_TAG)
.add_layer(third_blob_sha, '{"id": "second", "parent": "first"}')
.add_layer(fourth_blob_sha, '{"id": "first"}') # Note the change in BLOB from the second manifest.
.build(docker_v2_signing_key))
_write_manifest_itself(ADMIN_ACCESS_USER, REPO, third_manifest)
_write_manifest(ADMIN_ACCESS_USER, REPO, third_manifest)
# Delete all temp tags and perform GC.
self._perform_cleanup()

View file

@ -10,6 +10,7 @@ from data.database import Image, ExternalNotificationEvent
from data.model.tag import filter_tags_have_repository_event, get_tags_for_image
from data.model.image import set_secscan_status, get_image_with_storage_and_parent_base
from util.secscan.api import APIRequestFailure
from util.morecollections import AttrDict
logger = logging.getLogger(__name__)
@ -132,6 +133,13 @@ class LayerAnalyzer(object):
},
}
spawn_notification(tags[0].repository, 'vulnerability_found', event_data)
# TODO(jzelinskie): remove when more endpoints have been converted to using
# interfaces
repository = AttrDict({
'namespace_name': tags[0].repository.namespace_user.username,
'name': tags[0].repository.name,
})
spawn_notification(repository, 'vulnerability_found', event_data)
return True, set_status

View file

@ -10,6 +10,7 @@ from data.database import (Image, ImageStorage, ExternalNotificationEvent, Repos
from endpoints.notificationhelper import spawn_notification
from util.secscan import PRIORITY_LEVELS
from util.secscan.api import APIRequestFailure
from util.morecollections import AttrDict
logger = logging.getLogger(__name__)
@ -101,7 +102,12 @@ def process_notification_data(notification_data):
},
}
spawn_notification(repository_map[repository_id], 'vulnerability_found', event_data)
# TODO(jzelinskie): remove when more endpoints have been converted to using interfaces
repository = AttrDict({
'namespace_name': repository_map[repository_id].namespace_user.username,
'name': repository_map[repository_id].name,
})
spawn_notification(repository, 'vulnerability_found', event_data)
return True