From 0d6343871e04601faa57c3199edb72f69b12fe5c Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Fri, 8 Mar 2019 14:07:30 -0500 Subject: [PATCH 1/2] Change the tags query used for OCI to list tags to be shallow This removes the join on Manifest which was (possibly) causing the prod issue. We also simplify the lookup for pre-OCI as well, a bit. --- data/model/oci/tag.py | 32 ++++++++++++------- data/model/oci/test/test_oci_tag.py | 20 +++++++++++- data/registry_model/datatypes.py | 22 +++++++++++++ data/registry_model/interface.py | 18 +++++++---- data/registry_model/registry_oci_model.py | 25 ++++++++------- data/registry_model/registry_pre_oci_model.py | 29 ++++++++++------- data/registry_model/test/test_interface.py | 23 +++++++------ endpoints/api/test/test_manifest.py | 2 +- endpoints/v2/tag.py | 3 +- test/test_api_usage.py | 2 +- 10 files changed, 118 insertions(+), 58 deletions(-) diff --git a/data/model/oci/tag.py b/data/model/oci/tag.py index de2ced020..b90efc1f4 100644 --- a/data/model/oci/tag.py +++ b/data/model/oci/tag.py @@ -44,8 +44,26 @@ def get_tag(repository_id, tag_name): return None -def list_alive_tags(repository_id, start_pagination_id=None, limit=None, sort_tags=False): - """ Returns a list of all the tags alive in the specified repository, with optional limits. +def lookup_alive_tags_shallow(repository_id, start_pagination_id=None, limit=None): + """ Returns a list of the tags alive in the specified repository. Note that the tags returned + *only* contain their ID and name. Also note that the Tags are returned ordered by ID. + """ + query = (Tag + .select(Tag.id, Tag.name) + .where(Tag.repository == repository_id) + .order_by(Tag.id)) + + if start_pagination_id is not None: + query = query.where(Tag.id >= start_pagination_id) + + if limit is not None: + query = query.limit(limit) + + return filter_to_visible_tags(filter_to_alive_tags(query)) + + +def list_alive_tags(repository_id): + """ Returns a list of all the tags alive in the specified repository. Tag's returned are joined with their manifest. """ query = (Tag @@ -53,16 +71,6 @@ def list_alive_tags(repository_id, start_pagination_id=None, limit=None, sort_ta .join(Manifest) .where(Tag.repository == repository_id)) - if start_pagination_id is not None: - assert sort_tags - query = query.where(Tag.id >= start_pagination_id) - - if limit is not None: - query = query.limit(limit) - - if sort_tags: - query = query.order_by(Tag.id) - return filter_to_visible_tags(filter_to_alive_tags(query)) diff --git a/data/model/oci/test/test_oci_tag.py b/data/model/oci/test/test_oci_tag.py index d31256abb..5e1b6a2bb 100644 --- a/data/model/oci/test/test_oci_tag.py +++ b/data/model/oci/test/test_oci_tag.py @@ -11,7 +11,7 @@ from data.model.oci.tag import (find_matching_tag, get_most_recent_tag, list_ali get_expired_tag, get_tag, delete_tag, delete_tags_for_manifest, change_tag_expiration, set_tag_expiration_for_manifest, retarget_tag, - create_temporary_tag) + create_temporary_tag, lookup_alive_tags_shallow) from data.model.repository import get_repository, create_repository from test.fixtures import * @@ -74,6 +74,24 @@ def test_list_alive_tags(initialized_db): assert tag not in tags +def test_lookup_alive_tags_shallow(initialized_db): + found = False + for tag in filter_to_visible_tags(filter_to_alive_tags(Tag.select())): + tags = lookup_alive_tags_shallow(tag.repository) + found = True + assert tag in tags + + assert found + + # Ensure hidden tags cannot be listed. + tag = Tag.get() + tag.hidden = True + tag.save() + + tags = lookup_alive_tags_shallow(tag.repository) + assert tag not in tags + + def test_get_tag(initialized_db): found = False for tag in filter_to_visible_tags(filter_to_alive_tags(Tag.select())): diff --git a/data/registry_model/datatypes.py b/data/registry_model/datatypes.py index 60002c7bf..f3bbb543b 100644 --- a/data/registry_model/datatypes.py +++ b/data/registry_model/datatypes.py @@ -99,6 +99,28 @@ class Label(datatype('Label', ['key', 'value', 'uuid', 'source_type_name', 'medi source_type_name=label.source_type.name) +class ShallowTag(datatype('ShallowTag', ['name'])): + """ ShallowTag represents a tag in a repository, but only contains basic information. """ + @classmethod + def for_tag(cls, tag): + if tag is None: + return None + + return ShallowTag(db_id=tag.id, name=tag.name) + + @classmethod + def for_repository_tag(cls, repository_tag): + if repository_tag is None: + return None + + return ShallowTag(db_id=repository_tag.id, name=repository_tag.name) + + @property + def id(self): + """ The ID of this tag for pagination purposes only. """ + return self._db_id + + class Tag(datatype('Tag', ['name', 'reversion', 'manifest_digest', 'lifetime_start_ts', 'lifetime_end_ts', 'lifetime_start_ms', 'lifetime_end_ms'])): """ Tag represents a tag in a repository, which points to a manifest or image. """ diff --git a/data/registry_model/interface.py b/data/registry_model/interface.py index d346201a8..c94df96a6 100644 --- a/data/registry_model/interface.py +++ b/data/registry_model/interface.py @@ -112,14 +112,18 @@ class RegistryDataInterface(object): """ @abstractmethod - def list_repository_tags(self, repository_ref, include_legacy_images=False, - start_pagination_id=None, - limit=None, - sort_tags=False): + def lookup_active_repository_tags(self, repository_ref, start_pagination_id, limit): """ - Returns a list of all the active tags in the repository. Note that this can be a *heavy* - operation on repositories with a lot of tags, and should be avoided for more targetted - operations wherever possible. + Returns a page of actvie tags in a repository. Note that the tags returned by this method + are ShallowTag objects, which only contain the tag name. + """ + + @abstractmethod + def list_all_active_repository_tags(self, repository_ref, include_legacy_images=False): + """ + Returns a list of all the active tags in the repository. Note that this is a *HEAVY* + operation on repositories with a lot of tags, and should only be used for testing or + where other more specific operations are not possible. """ @abstractmethod diff --git a/data/registry_model/registry_oci_model.py b/data/registry_model/registry_oci_model.py index fac95426b..b570156ee 100644 --- a/data/registry_model/registry_oci_model.py +++ b/data/registry_model/registry_oci_model.py @@ -10,7 +10,7 @@ from data.model.oci.retriever import RepositoryContentRetriever from data.database import db_transaction, Image from data.registry_model.interface import RegistryDataInterface from data.registry_model.datatypes import (Tag, Manifest, LegacyImage, Label, SecurityScanStatus, - Blob) + Blob, ShallowTag) from data.registry_model.shared import SharedModel from data.registry_model.label_handlers import apply_label_to_manifest from image.docker import ManifestException @@ -199,20 +199,21 @@ class OCIModel(SharedModel, RegistryDataInterface): """ return Label.for_label(oci.label.delete_manifest_label(label_uuid, manifest._db_id)) - def list_repository_tags(self, repository_ref, include_legacy_images=False, - start_pagination_id=None, - limit=None, - sort_tags=False): + def lookup_active_repository_tags(self, repository_ref, start_pagination_id, limit): """ - Returns a list of all the active tags in the repository. Note that this can be a *heavy* - operation on repositories with a lot of tags, and should be avoided for more targetted - operations wherever possible. + Returns a page of actvie tags in a repository. Note that the tags returned by this method + are ShallowTag objects, which only contain the tag name. """ - if start_pagination_id: - assert sort_tags + tags = oci.tag.lookup_alive_tags_shallow(repository_ref._db_id, start_pagination_id, limit) + return [ShallowTag.for_tag(tag) for tag in tags] - tags = list(oci.tag.list_alive_tags(repository_ref._db_id, start_pagination_id, limit, - sort_tags=sort_tags)) + def list_all_active_repository_tags(self, repository_ref, include_legacy_images=False): + """ + Returns a list of all the active tags in the repository. Note that this is a *HEAVY* + operation on repositories with a lot of tags, and should only be used for testing or + where other more specific operations are not possible. + """ + tags = list(oci.tag.list_alive_tags(repository_ref._db_id)) legacy_images_map = {} if include_legacy_images: legacy_images_map = oci.tag.get_legacy_images_for_tags(tags) diff --git a/data/registry_model/registry_pre_oci_model.py b/data/registry_model/registry_pre_oci_model.py index cb1718790..e22c905ce 100644 --- a/data/registry_model/registry_pre_oci_model.py +++ b/data/registry_model/registry_pre_oci_model.py @@ -12,7 +12,7 @@ from data.database import db_transaction from data.registry_model.interface import RegistryDataInterface from data.registry_model.datatypes import (Tag, Manifest, LegacyImage, Label, SecurityScanStatus, ManifestLayer, Blob, DerivedImage, - RepositoryReference) + RepositoryReference, ShallowTag) from data.registry_model.shared import SharedModel from data.registry_model.label_handlers import apply_label_to_manifest from image.docker.schema1 import (DockerSchema1ManifestBuilder, ManifestException, @@ -47,7 +47,7 @@ class PreOCIModel(SharedModel, RegistryDataInterface): """ Returns a map from tag name to its legacy image, for all tags with legacy images in the repository. """ - tags = self.list_repository_tags(repository_ref, include_legacy_images=True) + tags = self.list_all_active_repository_tags(repository_ref, include_legacy_images=True) return {tag.name: tag.legacy_image.docker_image_id for tag in tags} def find_matching_tag(self, repository_ref, tag_names): @@ -263,21 +263,26 @@ class PreOCIModel(SharedModel, RegistryDataInterface): """ return Label.for_label(model.label.delete_manifest_label(label_uuid, manifest._db_id)) - def list_repository_tags(self, repository_ref, include_legacy_images=False, - start_pagination_id=None, - limit=None, - sort_tags=False): + def lookup_active_repository_tags(self, repository_ref, start_pagination_id, limit): """ - Returns a list of all the active tags in the repository. Note that this can be a *heavy* - operation on repositories with a lot of tags, and should be avoided for more targetted - operations wherever possible. + Returns a page of actvie tags in a repository. Note that the tags returned by this method + are ShallowTag objects, which only contain the tag name. + """ + tags = model.tag.list_active_repo_tags(repository_ref._db_id, include_images=False, + start_id=start_pagination_id, limit=limit) + return [ShallowTag.for_repository_tag(tag) for tag in tags] + + def list_all_active_repository_tags(self, repository_ref, include_legacy_images=False): + """ + Returns a list of all the active tags in the repository. Note that this is a *HEAVY* + operation on repositories with a lot of tags, and should only be used for testing or + where other more specific operations are not possible. """ if not include_legacy_images: - tags = model.tag.list_active_repo_tags(repository_ref._db_id, start_pagination_id, limit, - include_images=False) + tags = model.tag.list_active_repo_tags(repository_ref._db_id, include_images=False) return [Tag.for_repository_tag(tag) for tag in tags] - tags = model.tag.list_active_repo_tags(repository_ref._db_id, start_pagination_id, limit) + tags = model.tag.list_active_repo_tags(repository_ref._db_id) return [Tag.for_repository_tag(tag, legacy_image=LegacyImage.for_image(tag.image), manifest_digest=(tag.tagmanifest.digest diff --git a/data/registry_model/test/test_interface.py b/data/registry_model/test/test_interface.py index 1c7e62e0b..353321a7e 100644 --- a/data/registry_model/test/test_interface.py +++ b/data/registry_model/test/test_interface.py @@ -237,7 +237,7 @@ def test_batch_labels(registry_model): ]) def test_repository_tags(repo_namespace, repo_name, registry_model): repository_ref = registry_model.lookup_repository(repo_namespace, repo_name) - tags = registry_model.list_repository_tags(repository_ref, include_legacy_images=True) + tags = registry_model.list_all_active_repository_tags(repository_ref, include_legacy_images=True) assert len(tags) tags_map = registry_model.get_legacy_tags_map(repository_ref, storage) @@ -295,7 +295,7 @@ def test_repository_tag_history(namespace, name, expected_tag_count, has_expired ]) def test_delete_tags(repo_namespace, repo_name, via_manifest, registry_model): repository_ref = registry_model.lookup_repository(repo_namespace, repo_name) - tags = registry_model.list_repository_tags(repository_ref) + tags = registry_model.list_all_active_repository_tags(repository_ref) assert len(tags) # Save history before the deletions. @@ -318,7 +318,7 @@ def test_delete_tags(repo_namespace, repo_name, via_manifest, registry_model): assert found_tag is None # Ensure all tags have been deleted. - tags = registry_model.list_repository_tags(repository_ref) + tags = registry_model.list_all_active_repository_tags(repository_ref) assert not len(tags) # Ensure that the tags all live in history. @@ -406,7 +406,7 @@ def test_change_repository_tag_expiration(registry_model): def test_get_legacy_images_owned_by_tag(repo_namespace, repo_name, expected_non_empty, registry_model): repository_ref = registry_model.lookup_repository(repo_namespace, repo_name) - tags = registry_model.list_repository_tags(repository_ref) + tags = registry_model.list_all_active_repository_tags(repository_ref) assert len(tags) non_empty = set() @@ -419,7 +419,7 @@ def test_get_legacy_images_owned_by_tag(repo_namespace, repo_name, expected_non_ def test_get_security_status(registry_model): repository_ref = registry_model.lookup_repository('devtable', 'simple') - tags = registry_model.list_repository_tags(repository_ref, include_legacy_images=True) + tags = registry_model.list_all_active_repository_tags(repository_ref, include_legacy_images=True) assert len(tags) for tag in tags: @@ -481,7 +481,7 @@ def test_backfill_manifest_for_tag(repo_namespace, repo_name, clear_rows, pre_oc ]) def test_backfill_manifest_on_lookup(repo_namespace, repo_name, clear_rows, pre_oci_model): repository_ref = pre_oci_model.lookup_repository(repo_namespace, repo_name) - tags = pre_oci_model.list_repository_tags(repository_ref) + tags = pre_oci_model.list_all_active_repository_tags(repository_ref) assert tags for tag in tags: @@ -513,7 +513,7 @@ def test_is_namespace_enabled(namespace, expect_enabled, registry_model): ]) def test_layers_and_blobs(repo_namespace, repo_name, registry_model): repository_ref = registry_model.lookup_repository(repo_namespace, repo_name) - tags = registry_model.list_repository_tags(repository_ref) + tags = registry_model.list_all_active_repository_tags(repository_ref) assert tags for tag in tags: @@ -938,7 +938,7 @@ def test_unicode_emoji(registry_model): assert found.get_parsed_manifest().digest == manifest.digest -def test_list_repository_tags(oci_model): +def test_lookup_active_repository_tags(oci_model): repository_ref = oci_model.lookup_repository('devtable', 'simple') latest_tag = oci_model.get_repo_tag(repository_ref, 'latest') manifest = oci_model.get_manifest_for_tag(latest_tag) @@ -951,16 +951,18 @@ def test_list_repository_tags(oci_model): tags_expected.add('somenewtag%s' % index) oci_model.retarget_tag(repository_ref, 'somenewtag%s' % index, manifest, storage) + assert tags_expected + # List the tags. tags_found = set() tag_id = None while True: - tags = oci_model.list_repository_tags(repository_ref, start_pagination_id=tag_id, - limit=11, sort_tags=True) + tags = oci_model.lookup_active_repository_tags(repository_ref, tag_id, 11) assert len(tags) <= 11 for tag in tags[0:10]: assert tag.name not in tags_found if tag.name in tags_expected: + tags_found.add(tag.name) tags_expected.remove(tag.name) if len(tags) < 11: @@ -969,4 +971,5 @@ def test_list_repository_tags(oci_model): tag_id = tags[10].id # Make sure we've found all the tags. + assert tags_found assert not tags_expected diff --git a/endpoints/api/test/test_manifest.py b/endpoints/api/test/test_manifest.py index 7237ac020..164c26061 100644 --- a/endpoints/api/test/test_manifest.py +++ b/endpoints/api/test/test_manifest.py @@ -8,7 +8,7 @@ from test.fixtures import * def test_repository_manifest(client): with client_with_identity('devtable', client) as cl: repo_ref = registry_model.lookup_repository('devtable', 'simple') - tags = registry_model.list_repository_tags(repo_ref) + tags = registry_model.list_all_active_repository_tags(repo_ref) for tag in tags: manifest_digest = tag.manifest_digest if manifest_digest is None: diff --git a/endpoints/v2/tag.py b/endpoints/v2/tag.py index 2d75d7350..352cb19dd 100644 --- a/endpoints/v2/tag.py +++ b/endpoints/v2/tag.py @@ -20,8 +20,7 @@ def list_all_tags(namespace_name, repo_name, start_id, limit, pagination_callbac # NOTE: We add 1 to the limit because that's how pagination_callback knows if there are # additional tags. - tags = registry_model.list_repository_tags(repository_ref, start_pagination_id=start_id, - limit=limit + 1, sort_tags=True) + tags = registry_model.lookup_active_repository_tags(repository_ref, start_id, limit + 1) response = jsonify({ 'name': '{0}/{1}'.format(namespace_name, repo_name), 'tags': [tag.name for tag in tags][0:limit], diff --git a/test/test_api_usage.py b/test/test_api_usage.py index db5b912c5..f56b2ddd1 100644 --- a/test/test_api_usage.py +++ b/test/test_api_usage.py @@ -2147,7 +2147,7 @@ class TestDeleteRepository(ApiTestCase): # Make sure the repository has some images and tags. repo_ref = registry_model.lookup_repository(ADMIN_ACCESS_USER, 'complex') self.assertTrue(len(list(registry_model.get_legacy_images(repo_ref))) > 0) - self.assertTrue(len(list(registry_model.list_repository_tags(repo_ref))) > 0) + self.assertTrue(len(list(registry_model.list_all_active_repository_tags(repo_ref))) > 0) # Add some data for the repository, in addition to is already existing images and tags. repository = model.repository.get_repository(ADMIN_ACCESS_USER, 'complex') From 2027d13d2bc1e7d116f593f94192adf41b575700 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Fri, 8 Mar 2019 14:59:33 -0500 Subject: [PATCH 2/2] Change tag history call in OCI to not load the contents of the manifest Just in case that load is slow on the DB --- data/model/oci/tag.py | 5 +++-- data/registry_model/datatypes.py | 6 +++++- endpoints/api/tag.py | 3 --- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/data/model/oci/tag.py b/data/model/oci/tag.py index b90efc1f4..0a7706c96 100644 --- a/data/model/oci/tag.py +++ b/data/model/oci/tag.py @@ -78,10 +78,11 @@ def list_repository_tag_history(repository_id, page, page_size, specific_tag_nam active_tags_only=False): """ Returns a tuple of the full set of tags found in the specified repository, including those that are no longer alive (unless active_tags_only is True), and whether additional tags exist. - If specific_tag_name is given, the tags are further filtered by name. + If specific_tag_name is given, the tags are further filtered by name. Note that the + returned Manifest will not contain the manifest contents. """ query = (Tag - .select(Tag, Manifest) + .select(Tag, Manifest.id, Manifest.digest, Manifest.media_type) .join(Manifest) .where(Tag.repository == repository_id) .order_by(Tag.lifetime_start_ms.desc(), Tag.name) diff --git a/data/registry_model/datatypes.py b/data/registry_model/datatypes.py index f3bbb543b..b0352cce2 100644 --- a/data/registry_model/datatypes.py +++ b/data/registry_model/datatypes.py @@ -217,9 +217,12 @@ class Manifest(datatype('Manifest', ['digest', 'media_type', 'internal_manifest_ if manifest is None: return None + # NOTE: `manifest_bytes` will be None if not selected by certain join queries. + manifest_bytes = (Bytes.for_string_or_unicode(manifest.manifest_bytes) + if manifest.manifest_bytes is not None else None) return Manifest(db_id=manifest.id, digest=manifest.digest, - internal_manifest_bytes=Bytes.for_string_or_unicode(manifest.manifest_bytes), + internal_manifest_bytes=manifest_bytes, media_type=ManifestTable.media_type.get_name(manifest.media_type_id), inputs=dict(legacy_image=legacy_image, tag_manifest=False)) @@ -245,6 +248,7 @@ class Manifest(datatype('Manifest', ['digest', 'media_type', 'internal_manifest_ def get_parsed_manifest(self, validate=True): """ Returns the parsed manifest for this manifest. """ + assert self.internal_manifest_bytes return parse_manifest_from_bytes(self.internal_manifest_bytes, self.media_type, validate=validate) diff --git a/endpoints/api/tag.py b/endpoints/api/tag.py index 280853fdf..b6ce4e0ad 100644 --- a/endpoints/api/tag.py +++ b/endpoints/api/tag.py @@ -41,9 +41,6 @@ def _tag_dict(tag): if tag.manifest: tag_info['is_manifest_list'] = tag.manifest.is_manifest_list - if 'size' not in tag_info: - tag_info['size'] = tag.manifest.layers_compressed_size - if tag.lifetime_start_ts > 0: last_modified = format_date(datetime.utcfromtimestamp(tag.lifetime_start_ts)) tag_info['last_modified'] = last_modified