Fix lookup of manifests referenced solely by a manifest list

We need to ensure we can find them if there is an active tag pointing to the parent list
This commit is contained in:
Joseph Schorr 2018-11-19 23:31:41 +02:00
parent 54904cfd6e
commit e972e4088b
5 changed files with 97 additions and 13 deletions

View file

@ -2,7 +2,7 @@ import logging
from collections import namedtuple from collections import namedtuple
from peewee import IntegrityError from peewee import IntegrityError, JOIN
from data.database import (Tag, Manifest, ManifestBlob, ManifestLegacyImage, ManifestChild, from data.database import (Tag, Manifest, ManifestBlob, ManifestLegacyImage, ManifestChild,
db_transaction) db_transaction)
@ -32,8 +32,24 @@ def lookup_manifest(repository_id, manifest_digest, allow_dead=False):
.where(Manifest.repository == repository_id) .where(Manifest.repository == repository_id)
.where(Manifest.digest == manifest_digest)) .where(Manifest.digest == manifest_digest))
if not allow_dead: if allow_dead:
query = filter_to_alive_tags(query.join(Tag)).group_by(Manifest.id) try:
return query.get()
except Manifest.DoesNotExist:
return None
# Try first to filter to those manifests referenced by an alive tag,
try:
return filter_to_alive_tags(query.join(Tag)).get()
except Manifest.DoesNotExist:
pass
# Try referenced as the child of a manifest that has an alive tag.
query = (query
.join(ManifestChild, on=(ManifestChild.child_manifest == Manifest.id))
.join(Tag, on=(Tag.manifest == ManifestChild.manifest)))
query = filter_to_alive_tags(query)
try: try:
return query.get() return query.get()

View file

@ -321,7 +321,7 @@ def filter_to_visible_tags(query):
return query.where(Tag.hidden == False) return query.where(Tag.hidden == False)
def filter_to_alive_tags(query, now_ms=None): def filter_to_alive_tags(query, now_ms=None, model=Tag):
""" Adjusts the specified Tag query to only return those tags alive. If now_ms is specified, """ Adjusts the specified Tag query to only return those tags alive. If now_ms is specified,
the given timestamp (in MS) is used in place of the current timestamp for determining wherther the given timestamp (in MS) is used in place of the current timestamp for determining wherther
a tag is alive. a tag is alive.
@ -329,7 +329,7 @@ def filter_to_alive_tags(query, now_ms=None):
if now_ms is None: if now_ms is None:
now_ms = get_epoch_timestamp_ms() now_ms = get_epoch_timestamp_ms()
return query.where((Tag.lifetime_end_ms >> None) | (Tag.lifetime_end_ms > now_ms)) return query.where((model.lifetime_end_ms >> None) | (model.lifetime_end_ms > now_ms))
def set_tag_expiration_sec_for_manifest(manifest_id, expiration_seconds): def set_tag_expiration_sec_for_manifest(manifest_id, expiration_seconds):

View file

@ -5,9 +5,10 @@ from playhouse.test_utils import assert_query_count
from app import docker_v2_signing_key, storage from app import docker_v2_signing_key, storage
from digest.digest_tools import sha256_digest from digest.digest_tools import sha256_digest
from data.database import Tag, ManifestBlob, ImageStorageLocation, ManifestChild, get_epoch_timestamp_ms from data.database import (Tag, ManifestBlob, ImageStorageLocation, ManifestChild,
get_epoch_timestamp_ms)
from data.model.oci.manifest import lookup_manifest, get_or_create_manifest from data.model.oci.manifest import lookup_manifest, get_or_create_manifest
from data.model.oci.tag import filter_to_alive_tags, get_tag from data.model.oci.tag import filter_to_alive_tags, get_tag, create_temporary_tag
from data.model.oci.shared import get_legacy_image_for_manifest from data.model.oci.shared import get_legacy_image_for_manifest
from data.model.oci.label import list_manifest_labels from data.model.oci.label import list_manifest_labels
from data.model.repository import get_repository, create_repository from data.model.repository import get_repository, create_repository
@ -47,6 +48,53 @@ def test_lookup_manifest_dead_tag(initialized_db):
dead_tag.manifest) dead_tag.manifest)
def test_lookup_manifest_child_tag(initialized_db):
repository = create_repository('devtable', 'newrepo', None)
# Populate a manifest.
layer_json = json.dumps({
'config': {},
"rootfs": {
"type": "layers",
"diff_ids": []
},
"history": [],
})
# Add a blob containing the config.
_, config_digest = _populate_blob(layer_json)
remote_digest = sha256_digest('something')
builder = DockerSchema2ManifestBuilder()
builder.set_config_digest(config_digest, len(layer_json))
builder.add_layer(remote_digest, 1234, urls=['http://hello/world'])
manifest = builder.build()
assert get_or_create_manifest(repository, manifest, storage)
# Ensure the manifest cannot currently be looked up, as it is pointed to by an alive tag.
assert lookup_manifest(repository, manifest.digest) is None
assert lookup_manifest(repository, manifest.digest, allow_dead=True) is not None
# Populate a manifest list.
list_builder = DockerSchema2ManifestListBuilder()
list_builder.add_manifest(manifest, 'amd64', 'linux')
manifest_list = list_builder.build()
# Write the manifest list, which should also write the manifests themselves.
created_tuple = get_or_create_manifest(repository, manifest_list, storage)
assert created_tuple is not None
assert lookup_manifest(repository, manifest.digest) is None
assert lookup_manifest(repository, manifest_list.digest) is None
# Point a tag at the manifest list. This should make it and its child manifest visible.
create_temporary_tag(created_tuple.manifest, 1000)
assert lookup_manifest(repository, manifest.digest) is not None
assert lookup_manifest(repository, manifest_list.digest) is not None
def _populate_blob(content): def _populate_blob(content):
digest = str(sha256_digest(content)) digest = str(sha256_digest(content))
location = ImageStorageLocation.get(name='local_us') location = ImageStorageLocation.get(name='local_us')

View file

@ -246,6 +246,10 @@ class DockerSchema2ManifestList(ManifestInterface):
def child_manifests(self, content_retriever): def child_manifests(self, content_retriever):
return self.manifests(content_retriever) return self.manifests(content_retriever)
def child_manifest_digests(self):
return [m[DOCKER_SCHEMA2_MANIFESTLIST_DIGEST_KEY]
for m in self._parsed[DOCKER_SCHEMA2_MANIFESTLIST_MANIFESTS_KEY]]
def get_manifest_labels(self, content_retriever): def get_manifest_labels(self, content_retriever):
return None return None

View file

@ -20,6 +20,7 @@ class V2ProtocolSteps(Enum):
AUTH = 'auth' AUTH = 'auth'
BLOB_HEAD_CHECK = 'blob-head-check' BLOB_HEAD_CHECK = 'blob-head-check'
GET_MANIFEST = 'get-manifest' GET_MANIFEST = 'get-manifest'
GET_MANIFEST_LIST = 'get-manifest-list'
PUT_MANIFEST = 'put-manifest' PUT_MANIFEST = 'put-manifest'
PUT_MANIFEST_LIST = 'put-manifest-list' PUT_MANIFEST_LIST = 'put-manifest-list'
MOUNT_BLOB = 'mount-blob' MOUNT_BLOB = 'mount-blob'
@ -147,7 +148,7 @@ class V2Protocol(RegistryProtocol):
headers = { headers = {
'Authorization': 'Bearer ' + token, 'Authorization': 'Bearer ' + token,
'Accept': DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE, 'Accept': ','.join(DOCKER_SCHEMA2_CONTENT_TYPES),
} }
for tag_name in tag_names: for tag_name in tag_names:
@ -155,18 +156,33 @@ class V2Protocol(RegistryProtocol):
response = self.conduct(session, 'GET', response = self.conduct(session, 'GET',
'/v2/%s/manifests/%s' % (self.repo_name(namespace, repo_name), '/v2/%s/manifests/%s' % (self.repo_name(namespace, repo_name),
tag_name), tag_name),
expected_status=(200, expected_failure, V2ProtocolSteps.GET_MANIFEST), expected_status=(200, expected_failure,
V2ProtocolSteps.GET_MANIFEST_LIST),
headers=headers) headers=headers)
if expected_failure is not None: if expected_failure is not None:
return None return None
# Parse the returned manifest list and ensure it matches. # Parse the returned manifest list and ensure it matches.
assert response.headers['Content-Type'] == DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE assert response.headers['Content-Type'] == DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE
manifest = parse_manifest_from_bytes(response.text, response.headers['Content-Type']) retrieved = parse_manifest_from_bytes(response.text, response.headers['Content-Type'])
assert manifest.schema_version == 2 assert retrieved.schema_version == 2
assert manifest.is_manifest_list assert retrieved.is_manifest_list
assert manifest.digest == manifestlist.digest assert retrieved.digest == manifestlist.digest
# Pull each of the manifests inside and ensure they can be retrieved.
for manifest_digest in retrieved.child_manifest_digests():
response = self.conduct(session, 'GET',
'/v2/%s/manifests/%s' % (self.repo_name(namespace, repo_name),
manifest_digest),
expected_status=(200, expected_failure,
V2ProtocolSteps.GET_MANIFEST),
headers=headers)
if expected_failure is not None:
return None
manifest = parse_manifest_from_bytes(response.text, response.headers['Content-Type'])
assert not manifest.is_manifest_list
assert manifest.digest == manifest_digest
def push_list(self, session, namespace, repo_name, tag_names, manifestlist, manifests, blobs, def push_list(self, session, namespace, repo_name, tag_names, manifestlist, manifests, blobs,
credentials=None, expected_failure=None, options=None): credentials=None, expected_failure=None, options=None):