Change the tags query used for OCI to list tags to be shallow

This removes the join on Manifest which was (possibly) causing the prod issue. We also simplify the lookup for pre-OCI as well, a bit.
This commit is contained in:
Joseph Schorr 2019-03-08 14:07:30 -05:00
parent 530c0705e9
commit 0d6343871e
10 changed files with 118 additions and 58 deletions

View file

@ -44,8 +44,26 @@ def get_tag(repository_id, tag_name):
return None return None
def list_alive_tags(repository_id, start_pagination_id=None, limit=None, sort_tags=False): def lookup_alive_tags_shallow(repository_id, start_pagination_id=None, limit=None):
""" Returns a list of all the tags alive in the specified repository, with optional limits. """ Returns a list of the tags alive in the specified repository. Note that the tags returned
*only* contain their ID and name. Also note that the Tags are returned ordered by ID.
"""
query = (Tag
.select(Tag.id, Tag.name)
.where(Tag.repository == repository_id)
.order_by(Tag.id))
if start_pagination_id is not None:
query = query.where(Tag.id >= start_pagination_id)
if limit is not None:
query = query.limit(limit)
return filter_to_visible_tags(filter_to_alive_tags(query))
def list_alive_tags(repository_id):
""" Returns a list of all the tags alive in the specified repository.
Tag's returned are joined with their manifest. Tag's returned are joined with their manifest.
""" """
query = (Tag query = (Tag
@ -53,16 +71,6 @@ def list_alive_tags(repository_id, start_pagination_id=None, limit=None, sort_ta
.join(Manifest) .join(Manifest)
.where(Tag.repository == repository_id)) .where(Tag.repository == repository_id))
if start_pagination_id is not None:
assert sort_tags
query = query.where(Tag.id >= start_pagination_id)
if limit is not None:
query = query.limit(limit)
if sort_tags:
query = query.order_by(Tag.id)
return filter_to_visible_tags(filter_to_alive_tags(query)) return filter_to_visible_tags(filter_to_alive_tags(query))

View file

@ -11,7 +11,7 @@ from data.model.oci.tag import (find_matching_tag, get_most_recent_tag, list_ali
get_expired_tag, get_tag, delete_tag, get_expired_tag, get_tag, delete_tag,
delete_tags_for_manifest, change_tag_expiration, delete_tags_for_manifest, change_tag_expiration,
set_tag_expiration_for_manifest, retarget_tag, set_tag_expiration_for_manifest, retarget_tag,
create_temporary_tag) create_temporary_tag, lookup_alive_tags_shallow)
from data.model.repository import get_repository, create_repository from data.model.repository import get_repository, create_repository
from test.fixtures import * from test.fixtures import *
@ -74,6 +74,24 @@ def test_list_alive_tags(initialized_db):
assert tag not in tags assert tag not in tags
def test_lookup_alive_tags_shallow(initialized_db):
found = False
for tag in filter_to_visible_tags(filter_to_alive_tags(Tag.select())):
tags = lookup_alive_tags_shallow(tag.repository)
found = True
assert tag in tags
assert found
# Ensure hidden tags cannot be listed.
tag = Tag.get()
tag.hidden = True
tag.save()
tags = lookup_alive_tags_shallow(tag.repository)
assert tag not in tags
def test_get_tag(initialized_db): def test_get_tag(initialized_db):
found = False found = False
for tag in filter_to_visible_tags(filter_to_alive_tags(Tag.select())): for tag in filter_to_visible_tags(filter_to_alive_tags(Tag.select())):

View file

@ -99,6 +99,28 @@ class Label(datatype('Label', ['key', 'value', 'uuid', 'source_type_name', 'medi
source_type_name=label.source_type.name) source_type_name=label.source_type.name)
class ShallowTag(datatype('ShallowTag', ['name'])):
""" ShallowTag represents a tag in a repository, but only contains basic information. """
@classmethod
def for_tag(cls, tag):
if tag is None:
return None
return ShallowTag(db_id=tag.id, name=tag.name)
@classmethod
def for_repository_tag(cls, repository_tag):
if repository_tag is None:
return None
return ShallowTag(db_id=repository_tag.id, name=repository_tag.name)
@property
def id(self):
""" The ID of this tag for pagination purposes only. """
return self._db_id
class Tag(datatype('Tag', ['name', 'reversion', 'manifest_digest', 'lifetime_start_ts', class Tag(datatype('Tag', ['name', 'reversion', 'manifest_digest', 'lifetime_start_ts',
'lifetime_end_ts', 'lifetime_start_ms', 'lifetime_end_ms'])): 'lifetime_end_ts', 'lifetime_start_ms', 'lifetime_end_ms'])):
""" Tag represents a tag in a repository, which points to a manifest or image. """ """ Tag represents a tag in a repository, which points to a manifest or image. """

View file

@ -112,14 +112,18 @@ class RegistryDataInterface(object):
""" """
@abstractmethod @abstractmethod
def list_repository_tags(self, repository_ref, include_legacy_images=False, def lookup_active_repository_tags(self, repository_ref, start_pagination_id, limit):
start_pagination_id=None,
limit=None,
sort_tags=False):
""" """
Returns a list of all the active tags in the repository. Note that this can be a *heavy* Returns a page of actvie tags in a repository. Note that the tags returned by this method
operation on repositories with a lot of tags, and should be avoided for more targetted are ShallowTag objects, which only contain the tag name.
operations wherever possible. """
@abstractmethod
def list_all_active_repository_tags(self, repository_ref, include_legacy_images=False):
"""
Returns a list of all the active tags in the repository. Note that this is a *HEAVY*
operation on repositories with a lot of tags, and should only be used for testing or
where other more specific operations are not possible.
""" """
@abstractmethod @abstractmethod

View file

@ -10,7 +10,7 @@ from data.model.oci.retriever import RepositoryContentRetriever
from data.database import db_transaction, Image from data.database import db_transaction, Image
from data.registry_model.interface import RegistryDataInterface from data.registry_model.interface import RegistryDataInterface
from data.registry_model.datatypes import (Tag, Manifest, LegacyImage, Label, SecurityScanStatus, from data.registry_model.datatypes import (Tag, Manifest, LegacyImage, Label, SecurityScanStatus,
Blob) Blob, ShallowTag)
from data.registry_model.shared import SharedModel from data.registry_model.shared import SharedModel
from data.registry_model.label_handlers import apply_label_to_manifest from data.registry_model.label_handlers import apply_label_to_manifest
from image.docker import ManifestException from image.docker import ManifestException
@ -199,20 +199,21 @@ class OCIModel(SharedModel, RegistryDataInterface):
""" """
return Label.for_label(oci.label.delete_manifest_label(label_uuid, manifest._db_id)) return Label.for_label(oci.label.delete_manifest_label(label_uuid, manifest._db_id))
def list_repository_tags(self, repository_ref, include_legacy_images=False, def lookup_active_repository_tags(self, repository_ref, start_pagination_id, limit):
start_pagination_id=None,
limit=None,
sort_tags=False):
""" """
Returns a list of all the active tags in the repository. Note that this can be a *heavy* Returns a page of actvie tags in a repository. Note that the tags returned by this method
operation on repositories with a lot of tags, and should be avoided for more targetted are ShallowTag objects, which only contain the tag name.
operations wherever possible.
""" """
if start_pagination_id: tags = oci.tag.lookup_alive_tags_shallow(repository_ref._db_id, start_pagination_id, limit)
assert sort_tags return [ShallowTag.for_tag(tag) for tag in tags]
tags = list(oci.tag.list_alive_tags(repository_ref._db_id, start_pagination_id, limit, def list_all_active_repository_tags(self, repository_ref, include_legacy_images=False):
sort_tags=sort_tags)) """
Returns a list of all the active tags in the repository. Note that this is a *HEAVY*
operation on repositories with a lot of tags, and should only be used for testing or
where other more specific operations are not possible.
"""
tags = list(oci.tag.list_alive_tags(repository_ref._db_id))
legacy_images_map = {} legacy_images_map = {}
if include_legacy_images: if include_legacy_images:
legacy_images_map = oci.tag.get_legacy_images_for_tags(tags) legacy_images_map = oci.tag.get_legacy_images_for_tags(tags)

View file

@ -12,7 +12,7 @@ from data.database import db_transaction
from data.registry_model.interface import RegistryDataInterface from data.registry_model.interface import RegistryDataInterface
from data.registry_model.datatypes import (Tag, Manifest, LegacyImage, Label, from data.registry_model.datatypes import (Tag, Manifest, LegacyImage, Label,
SecurityScanStatus, ManifestLayer, Blob, DerivedImage, SecurityScanStatus, ManifestLayer, Blob, DerivedImage,
RepositoryReference) RepositoryReference, ShallowTag)
from data.registry_model.shared import SharedModel from data.registry_model.shared import SharedModel
from data.registry_model.label_handlers import apply_label_to_manifest from data.registry_model.label_handlers import apply_label_to_manifest
from image.docker.schema1 import (DockerSchema1ManifestBuilder, ManifestException, from image.docker.schema1 import (DockerSchema1ManifestBuilder, ManifestException,
@ -47,7 +47,7 @@ class PreOCIModel(SharedModel, RegistryDataInterface):
""" Returns a map from tag name to its legacy image, for all tags with legacy images in """ Returns a map from tag name to its legacy image, for all tags with legacy images in
the repository. the repository.
""" """
tags = self.list_repository_tags(repository_ref, include_legacy_images=True) tags = self.list_all_active_repository_tags(repository_ref, include_legacy_images=True)
return {tag.name: tag.legacy_image.docker_image_id for tag in tags} return {tag.name: tag.legacy_image.docker_image_id for tag in tags}
def find_matching_tag(self, repository_ref, tag_names): def find_matching_tag(self, repository_ref, tag_names):
@ -263,21 +263,26 @@ class PreOCIModel(SharedModel, RegistryDataInterface):
""" """
return Label.for_label(model.label.delete_manifest_label(label_uuid, manifest._db_id)) return Label.for_label(model.label.delete_manifest_label(label_uuid, manifest._db_id))
def list_repository_tags(self, repository_ref, include_legacy_images=False, def lookup_active_repository_tags(self, repository_ref, start_pagination_id, limit):
start_pagination_id=None,
limit=None,
sort_tags=False):
""" """
Returns a list of all the active tags in the repository. Note that this can be a *heavy* Returns a page of actvie tags in a repository. Note that the tags returned by this method
operation on repositories with a lot of tags, and should be avoided for more targetted are ShallowTag objects, which only contain the tag name.
operations wherever possible. """
tags = model.tag.list_active_repo_tags(repository_ref._db_id, include_images=False,
start_id=start_pagination_id, limit=limit)
return [ShallowTag.for_repository_tag(tag) for tag in tags]
def list_all_active_repository_tags(self, repository_ref, include_legacy_images=False):
"""
Returns a list of all the active tags in the repository. Note that this is a *HEAVY*
operation on repositories with a lot of tags, and should only be used for testing or
where other more specific operations are not possible.
""" """
if not include_legacy_images: if not include_legacy_images:
tags = model.tag.list_active_repo_tags(repository_ref._db_id, start_pagination_id, limit, tags = model.tag.list_active_repo_tags(repository_ref._db_id, include_images=False)
include_images=False)
return [Tag.for_repository_tag(tag) for tag in tags] return [Tag.for_repository_tag(tag) for tag in tags]
tags = model.tag.list_active_repo_tags(repository_ref._db_id, start_pagination_id, limit) tags = model.tag.list_active_repo_tags(repository_ref._db_id)
return [Tag.for_repository_tag(tag, return [Tag.for_repository_tag(tag,
legacy_image=LegacyImage.for_image(tag.image), legacy_image=LegacyImage.for_image(tag.image),
manifest_digest=(tag.tagmanifest.digest manifest_digest=(tag.tagmanifest.digest

View file

@ -237,7 +237,7 @@ def test_batch_labels(registry_model):
]) ])
def test_repository_tags(repo_namespace, repo_name, registry_model): def test_repository_tags(repo_namespace, repo_name, registry_model):
repository_ref = registry_model.lookup_repository(repo_namespace, repo_name) repository_ref = registry_model.lookup_repository(repo_namespace, repo_name)
tags = registry_model.list_repository_tags(repository_ref, include_legacy_images=True) tags = registry_model.list_all_active_repository_tags(repository_ref, include_legacy_images=True)
assert len(tags) assert len(tags)
tags_map = registry_model.get_legacy_tags_map(repository_ref, storage) tags_map = registry_model.get_legacy_tags_map(repository_ref, storage)
@ -295,7 +295,7 @@ def test_repository_tag_history(namespace, name, expected_tag_count, has_expired
]) ])
def test_delete_tags(repo_namespace, repo_name, via_manifest, registry_model): def test_delete_tags(repo_namespace, repo_name, via_manifest, registry_model):
repository_ref = registry_model.lookup_repository(repo_namespace, repo_name) repository_ref = registry_model.lookup_repository(repo_namespace, repo_name)
tags = registry_model.list_repository_tags(repository_ref) tags = registry_model.list_all_active_repository_tags(repository_ref)
assert len(tags) assert len(tags)
# Save history before the deletions. # Save history before the deletions.
@ -318,7 +318,7 @@ def test_delete_tags(repo_namespace, repo_name, via_manifest, registry_model):
assert found_tag is None assert found_tag is None
# Ensure all tags have been deleted. # Ensure all tags have been deleted.
tags = registry_model.list_repository_tags(repository_ref) tags = registry_model.list_all_active_repository_tags(repository_ref)
assert not len(tags) assert not len(tags)
# Ensure that the tags all live in history. # Ensure that the tags all live in history.
@ -406,7 +406,7 @@ def test_change_repository_tag_expiration(registry_model):
def test_get_legacy_images_owned_by_tag(repo_namespace, repo_name, expected_non_empty, def test_get_legacy_images_owned_by_tag(repo_namespace, repo_name, expected_non_empty,
registry_model): registry_model):
repository_ref = registry_model.lookup_repository(repo_namespace, repo_name) repository_ref = registry_model.lookup_repository(repo_namespace, repo_name)
tags = registry_model.list_repository_tags(repository_ref) tags = registry_model.list_all_active_repository_tags(repository_ref)
assert len(tags) assert len(tags)
non_empty = set() non_empty = set()
@ -419,7 +419,7 @@ def test_get_legacy_images_owned_by_tag(repo_namespace, repo_name, expected_non_
def test_get_security_status(registry_model): def test_get_security_status(registry_model):
repository_ref = registry_model.lookup_repository('devtable', 'simple') repository_ref = registry_model.lookup_repository('devtable', 'simple')
tags = registry_model.list_repository_tags(repository_ref, include_legacy_images=True) tags = registry_model.list_all_active_repository_tags(repository_ref, include_legacy_images=True)
assert len(tags) assert len(tags)
for tag in tags: for tag in tags:
@ -481,7 +481,7 @@ def test_backfill_manifest_for_tag(repo_namespace, repo_name, clear_rows, pre_oc
]) ])
def test_backfill_manifest_on_lookup(repo_namespace, repo_name, clear_rows, pre_oci_model): def test_backfill_manifest_on_lookup(repo_namespace, repo_name, clear_rows, pre_oci_model):
repository_ref = pre_oci_model.lookup_repository(repo_namespace, repo_name) repository_ref = pre_oci_model.lookup_repository(repo_namespace, repo_name)
tags = pre_oci_model.list_repository_tags(repository_ref) tags = pre_oci_model.list_all_active_repository_tags(repository_ref)
assert tags assert tags
for tag in tags: for tag in tags:
@ -513,7 +513,7 @@ def test_is_namespace_enabled(namespace, expect_enabled, registry_model):
]) ])
def test_layers_and_blobs(repo_namespace, repo_name, registry_model): def test_layers_and_blobs(repo_namespace, repo_name, registry_model):
repository_ref = registry_model.lookup_repository(repo_namespace, repo_name) repository_ref = registry_model.lookup_repository(repo_namespace, repo_name)
tags = registry_model.list_repository_tags(repository_ref) tags = registry_model.list_all_active_repository_tags(repository_ref)
assert tags assert tags
for tag in tags: for tag in tags:
@ -938,7 +938,7 @@ def test_unicode_emoji(registry_model):
assert found.get_parsed_manifest().digest == manifest.digest assert found.get_parsed_manifest().digest == manifest.digest
def test_list_repository_tags(oci_model): def test_lookup_active_repository_tags(oci_model):
repository_ref = oci_model.lookup_repository('devtable', 'simple') repository_ref = oci_model.lookup_repository('devtable', 'simple')
latest_tag = oci_model.get_repo_tag(repository_ref, 'latest') latest_tag = oci_model.get_repo_tag(repository_ref, 'latest')
manifest = oci_model.get_manifest_for_tag(latest_tag) manifest = oci_model.get_manifest_for_tag(latest_tag)
@ -951,16 +951,18 @@ def test_list_repository_tags(oci_model):
tags_expected.add('somenewtag%s' % index) tags_expected.add('somenewtag%s' % index)
oci_model.retarget_tag(repository_ref, 'somenewtag%s' % index, manifest, storage) oci_model.retarget_tag(repository_ref, 'somenewtag%s' % index, manifest, storage)
assert tags_expected
# List the tags. # List the tags.
tags_found = set() tags_found = set()
tag_id = None tag_id = None
while True: while True:
tags = oci_model.list_repository_tags(repository_ref, start_pagination_id=tag_id, tags = oci_model.lookup_active_repository_tags(repository_ref, tag_id, 11)
limit=11, sort_tags=True)
assert len(tags) <= 11 assert len(tags) <= 11
for tag in tags[0:10]: for tag in tags[0:10]:
assert tag.name not in tags_found assert tag.name not in tags_found
if tag.name in tags_expected: if tag.name in tags_expected:
tags_found.add(tag.name)
tags_expected.remove(tag.name) tags_expected.remove(tag.name)
if len(tags) < 11: if len(tags) < 11:
@ -969,4 +971,5 @@ def test_list_repository_tags(oci_model):
tag_id = tags[10].id tag_id = tags[10].id
# Make sure we've found all the tags. # Make sure we've found all the tags.
assert tags_found
assert not tags_expected assert not tags_expected

View file

@ -8,7 +8,7 @@ from test.fixtures import *
def test_repository_manifest(client): def test_repository_manifest(client):
with client_with_identity('devtable', client) as cl: with client_with_identity('devtable', client) as cl:
repo_ref = registry_model.lookup_repository('devtable', 'simple') repo_ref = registry_model.lookup_repository('devtable', 'simple')
tags = registry_model.list_repository_tags(repo_ref) tags = registry_model.list_all_active_repository_tags(repo_ref)
for tag in tags: for tag in tags:
manifest_digest = tag.manifest_digest manifest_digest = tag.manifest_digest
if manifest_digest is None: if manifest_digest is None:

View file

@ -20,8 +20,7 @@ def list_all_tags(namespace_name, repo_name, start_id, limit, pagination_callbac
# NOTE: We add 1 to the limit because that's how pagination_callback knows if there are # NOTE: We add 1 to the limit because that's how pagination_callback knows if there are
# additional tags. # additional tags.
tags = registry_model.list_repository_tags(repository_ref, start_pagination_id=start_id, tags = registry_model.lookup_active_repository_tags(repository_ref, start_id, limit + 1)
limit=limit + 1, sort_tags=True)
response = jsonify({ response = jsonify({
'name': '{0}/{1}'.format(namespace_name, repo_name), 'name': '{0}/{1}'.format(namespace_name, repo_name),
'tags': [tag.name for tag in tags][0:limit], 'tags': [tag.name for tag in tags][0:limit],

View file

@ -2147,7 +2147,7 @@ class TestDeleteRepository(ApiTestCase):
# Make sure the repository has some images and tags. # Make sure the repository has some images and tags.
repo_ref = registry_model.lookup_repository(ADMIN_ACCESS_USER, 'complex') repo_ref = registry_model.lookup_repository(ADMIN_ACCESS_USER, 'complex')
self.assertTrue(len(list(registry_model.get_legacy_images(repo_ref))) > 0) self.assertTrue(len(list(registry_model.get_legacy_images(repo_ref))) > 0)
self.assertTrue(len(list(registry_model.list_repository_tags(repo_ref))) > 0) self.assertTrue(len(list(registry_model.list_all_active_repository_tags(repo_ref))) > 0)
# Add some data for the repository, in addition to is already existing images and tags. # Add some data for the repository, in addition to is already existing images and tags.
repository = model.repository.get_repository(ADMIN_ACCESS_USER, 'complex') repository = model.repository.get_repository(ADMIN_ACCESS_USER, 'complex')