diff --git a/data/registry_model/datatypes.py b/data/registry_model/datatypes.py index 9b6674120..03ccfc4cd 100644 --- a/data/registry_model/datatypes.py +++ b/data/registry_model/datatypes.py @@ -1,6 +1,8 @@ from enum import Enum, unique from data.registry_model.datatype import datatype, requiresinput +from image.docker.schema1 import DockerSchema1Manifest + class RepositoryReference(datatype('Repository', [])): """ RepositoryReference is a reference to a repository, passed to registry interface methods. """ @@ -68,6 +70,10 @@ class Manifest(datatype('Manifest', ['digest', 'manifest_bytes'])): """ return legacy_image + def get_parsed_manifest(self, validate=True): + """ Returns the parsed manifest for this manifest. """ + return DockerSchema1Manifest(self.manifest_bytes, validate=validate) + class LegacyImage(datatype('LegacyImage', ['docker_image_id', 'created', 'comment', 'command', 'image_size', 'aggregate_size', 'uploading'])): diff --git a/data/registry_model/interface.py b/data/registry_model/interface.py index 323d83f85..203c3f23b 100644 --- a/data/registry_model/interface.py +++ b/data/registry_model/interface.py @@ -122,3 +122,12 @@ class RegistryDataInterface(object): @abstractmethod def get_security_status(self, manifest_or_legacy_image): """ Returns the security status for the given manifest or legacy image or None if none. """ + + @abstractmethod + def backfill_manifest_for_tag(self, tag): + """ Backfills a manifest for the V1 tag specified. + If a manifest already exists for the tag, returns that manifest. + + NOTE: This method will only be necessary until we've completed the backfill, at which point + it should be removed. + """ diff --git a/data/registry_model/registry_pre_oci_model.py b/data/registry_model/registry_pre_oci_model.py index 3a1a187f0..8096979f3 100644 --- a/data/registry_model/registry_pre_oci_model.py +++ b/data/registry_model/registry_pre_oci_model.py @@ -2,11 +2,14 @@ from collections import defaultdict +from peewee import IntegrityError + from data import database from data import model from data.registry_model.interface import RegistryDataInterface from data.registry_model.datatypes import (Tag, RepositoryReference, Manifest, LegacyImage, Label, SecurityScanStatus) +from image.docker.schema1 import DockerSchema1ManifestBuilder class PreOCIModel(RegistryDataInterface): @@ -199,7 +202,13 @@ class PreOCIModel(RegistryDataInterface): model.tag.restore_tag_to_image(repository_ref._db_id, tag_name, manifest_or_legacy_image.docker_image_id) - return self.get_repo_tag(repository_ref, tag_name, include_legacy_image=True) + # Generate a manifest for the tag, if necessary. + tag = self.get_repo_tag(repository_ref, tag_name, include_legacy_image=True) + if tag is None: + return None + + self.backfill_manifest_for_tag(tag) + return tag def delete_tag(self, repository_ref, tag_name): """ @@ -285,5 +294,66 @@ class PreOCIModel(RegistryDataInterface): return SecurityScanStatus.QUEUED + def backfill_manifest_for_tag(self, tag): + """ Backfills a manifest for the V1 tag specified. + If a manifest already exists for the tag, returns that manifest. + + NOTE: This method will only be necessary until we've completed the backfill, at which point + it should be removed. + """ + import features + + from app import app, docker_v2_signing_key + + # Ensure that there isn't already a manifest for the tag. + tag_manifest = model.tag.get_tag_manifest(tag._db_id) + if tag_manifest is not None: + return Manifest.for_tag_manifest(tag_manifest) + + # Create the manifest. + try: + tag_obj = database.RepositoryTag.get(id=tag._db_id) + except database.RepositoryTag.DoesNotExist: + return None + + repo = tag_obj.repository + namespace_name = repo.namespace_user.username + repo_name = repo.name + + # Find the v1 metadata for this image and its parents. + repo_image = tag_obj.image + parents = model.image.get_parent_images(namespace_name, repo_name, repo_image) + + # If the manifest is being generated under the library namespace, then we make its namespace + # empty. + manifest_namespace = namespace_name + if features.LIBRARY_SUPPORT and namespace_name == app.config['LIBRARY_NAMESPACE']: + manifest_namespace = '' + + # Create and populate the manifest builder + builder = DockerSchema1ManifestBuilder(manifest_namespace, repo_name, tag.name) + + # Add the leaf layer + builder.add_layer(repo_image.storage.content_checksum, repo_image.v1_json_metadata) + + for parent_image in parents: + builder.add_layer(parent_image.storage.content_checksum, parent_image.v1_json_metadata) + + # Sign the manifest with our signing key. + manifest = builder.build(docker_v2_signing_key) + + # Write the manifest to the DB. + blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo, + manifest.checksums) + + storage_map = {blob.content_checksum: blob.id for blob in blob_query} + try: + tag_manifest, _ = model.tag.associate_generated_tag_manifest(namespace_name, repo_name, + tag.name, manifest, storage_map) + except IntegrityError: + tag_manifest = model.tag.get_tag_manifest(tag_obj) + + return Manifest.for_tag_manifest(tag_manifest) + pre_oci_model = PreOCIModel() diff --git a/data/registry_model/test/test_pre_oci_model.py b/data/registry_model/test/test_pre_oci_model.py index dfff01bbf..9d1d530a6 100644 --- a/data/registry_model/test/test_pre_oci_model.py +++ b/data/registry_model/test/test_pre_oci_model.py @@ -4,7 +4,11 @@ import pytest from playhouse.test_utils import assert_query_count +from app import docker_v2_signing_key from data import model +from data.database import (TagManifestLabelMap, TagManifestToManifest, Manifest, ManifestBlob, + ManifestLegacyImage, ManifestLabel, TagManifest, RepositoryTag, Image, + TagManifestLabel, TagManifest, TagManifestLabel) from data.registry_model.registry_pre_oci_model import PreOCIModel from data.registry_model.datatypes import RepositoryReference @@ -315,3 +319,47 @@ def test_get_security_status(pre_oci_model): for tag in tags: assert pre_oci_model.get_security_status(tag.legacy_image) + + +@pytest.fixture() +def clear_rows(initialized_db): + # Remove all new-style rows so we can backfill. + TagManifestLabelMap.delete().execute() + ManifestLabel.delete().execute() + ManifestBlob.delete().execute() + ManifestLegacyImage.delete().execute() + TagManifestToManifest.delete().execute() + Manifest.delete().execute() + TagManifestLabel.delete().execute() + TagManifest.delete().execute() + + +@pytest.mark.parametrize('repo_namespace, repo_name', [ + ('devtable', 'simple'), + ('devtable', 'complex'), + ('devtable', 'history'), + ('buynlarge', 'orgrepo'), +]) +def test_backfill_manifest_for_tag(repo_namespace, repo_name, clear_rows, pre_oci_model): + repository_ref = pre_oci_model.lookup_repository(repo_namespace, repo_name) + tags = pre_oci_model.list_repository_tags(repository_ref) + assert tags + + for tag in tags: + assert not tag.manifest_digest + assert pre_oci_model.backfill_manifest_for_tag(tag) + + tags = pre_oci_model.list_repository_tags(repository_ref, include_legacy_images=True) + assert tags + for tag in tags: + assert tag.manifest_digest + + manifest = pre_oci_model.get_manifest_for_tag(tag) + assert manifest + + legacy_image = pre_oci_model.get_legacy_image(repository_ref, tag.legacy_image.docker_image_id, + include_parents=True) + + parsed_manifest = manifest.get_parsed_manifest() + assert parsed_manifest.leaf_layer_v1_image_id == legacy_image.docker_image_id + assert parsed_manifest.parent_image_ids == {p.docker_image_id for p in legacy_image.parents} diff --git a/endpoints/api/tag.py b/endpoints/api/tag.py index b96210965..36eeea1a3 100644 --- a/endpoints/api/tag.py +++ b/endpoints/api/tag.py @@ -10,7 +10,6 @@ from endpoints.api import (resource, nickname, require_repo_read, require_repo_w parse_args, query_param, truthy_bool, disallow_for_app_repositories) from endpoints.api.image import image_dict from endpoints.exception import NotFound, InvalidRequest -from endpoints.v2.manifest import _generate_and_store_manifest from util.names import TAG_ERROR, TAG_REGEX @@ -155,9 +154,6 @@ class RepositoryTag(RepositoryParamResource): 'original_image': existing_tag.legacy_image.docker_image_id if existing_tag else None, }, repo_name=repository) - # TODO(jschorr): Move this into the retarget_tag call - _generate_and_store_manifest(namespace, repository, tag) - return 'Updated', 201 @require_repo_write @@ -279,10 +275,6 @@ class RestoreTag(RepositoryParamResource): if not registry_model.retarget_tag(repo_ref, tag, manifest_or_legacy_image, is_reversion=True): raise InvalidRequest('Could not restore tag') - if manifest_digest is None: - # TODO(jschorr): Move this into the retarget_tag call - _generate_and_store_manifest(namespace, repository, tag) - log_action('revert_tag', namespace, log_data, repo_name=repository) return { diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index 1f0ef9748..93d343db2 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -9,6 +9,7 @@ import features from app import docker_v2_signing_key, app, metric_queue from auth.registry_jwt_auth import process_registry_jwt_auth from digest import digest_tools +from data.registry_model import registry_model from endpoints.decorators import anon_protect, parse_repository_name from endpoints.v2 import v2_bp, require_repo_read, require_repo_write from endpoints.v2.models_interface import Label @@ -52,7 +53,18 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref): else: raise ManifestUnknown() - manifest = _generate_and_store_manifest(namespace_name, repo_name, manifest_ref) + repo_ref = registry_model.lookup_repository(namespace_name, repo_name) + if repo_ref is None: + raise ManifestUnknown() + + tag = registry_model.get_repo_tag(repo_ref, manifest_ref, include_legacy_image=True) + if tag is None: + raise ManifestUnknown() + + if not registry_model.backfill_manifest_for_tag(tag): + raise ManifestUnknown() + + manifest = model.get_manifest_by_tag(namespace_name, repo_name, manifest_ref) if manifest is None: raise ManifestUnknown() @@ -259,35 +271,3 @@ def delete_manifest_by_digest(namespace_name, repo_name, manifest_ref): track_and_log('delete_tag', tag.repository, tag=tag.name, digest=manifest_ref) return Response(status=202) - - -def _generate_and_store_manifest(namespace_name, repo_name, tag_name): - """ Generates and stores a manifest for an existing V1-only tag. """ - # TODO(jschorr): Remove once we are fully on Manifest-based model. - - # Find the v1 metadata for this image and its parents. - v1_metadata = model.get_docker_v1_metadata_by_tag(namespace_name, repo_name, tag_name) - parents_v1_metadata = model.get_parents_docker_v1_metadata(namespace_name, repo_name, - v1_metadata.image_id) - - # If the manifest is being generated under the library namespace, then we make its namespace - # empty. - manifest_namespace = namespace_name - if features.LIBRARY_SUPPORT and namespace_name == app.config['LIBRARY_NAMESPACE']: - manifest_namespace = '' - - # Create and populate the manifest builder - builder = DockerSchema1ManifestBuilder(manifest_namespace, repo_name, tag_name) - - # Add the leaf layer - builder.add_layer(v1_metadata.content_checksum, v1_metadata.compat_json) - - for parent_v1_metadata in parents_v1_metadata: - builder.add_layer(parent_v1_metadata.content_checksum, parent_v1_metadata.compat_json) - - # Sign the manifest with our signing key. - manifest = builder.build(docker_v2_signing_key) - - # Write the manifest to the DB. - model.create_manifest_and_update_tag(namespace_name, repo_name, tag_name, manifest) - return manifest diff --git a/initdb.py b/initdb.py index 988b52cc6..67c9acbdf 100644 --- a/initdb.py +++ b/initdb.py @@ -23,9 +23,9 @@ from data.database import (db, all_models, Role, TeamRole, Visibility, LoginServ ApprTagKind, ApprBlobPlacementLocation, Repository) from data import model from data.queue import WorkQueue +from data.registry_model import registry_model from app import app, storage as store, tf from storage.basestorage import StoragePaths -from endpoints.v2.manifest import _generate_and_store_manifest from image.docker.schema1 import DOCKER_SCHEMA1_CONTENT_TYPES @@ -133,13 +133,15 @@ def __create_subtree(with_storage, repo, structure, creator_username, parent, ta if not isinstance(last_node_tags, list): last_node_tags = [last_node_tags] + repo_ref = registry_model.lookup_repository(repo.namespace_user.username, repo.name) for tag_name in last_node_tags: new_tag = model.tag.create_or_update_tag(repo.namespace_user.username, repo.name, tag_name, new_image.docker_image_id) derived = model.image.find_or_create_derived_storage(new_tag, 'squash', 'local_us') model.storage.find_or_create_storage_signature(derived, 'gpg2') - _generate_and_store_manifest(repo.namespace_user.username, repo.name, tag_name) + tag = registry_model.get_repo_tag(repo_ref, tag_name) + registry_model.backfill_manifest_for_tag(tag) tag_map[tag_name] = new_tag for tag_name in last_node_tags: diff --git a/test/registry/fixtures.py b/test/registry/fixtures.py index 4f869aab1..d0167053f 100644 --- a/test/registry/fixtures.py +++ b/test/registry/fixtures.py @@ -14,7 +14,8 @@ from flask_principal import Identity from app import storage from data.database import (close_db_filter, configure, DerivedStorageForImage, QueueItem, Image, - TagManifest) + TagManifest, TagManifestToManifest, Manifest, ManifestLegacyImage, + ManifestBlob) from data import model from endpoints.csrf import generate_csrf_token from util.log import logfile_path @@ -107,6 +108,10 @@ def registry_server_executor(app): return 'OK' def delete_manifests(): + ManifestLegacyImage.delete().execute() + ManifestBlob.delete().execute() + Manifest.delete().execute() + TagManifestToManifest.delete().execute() TagManifest.delete().execute() return 'OK'