From d59bea35696c70e751cab6605734bdab7f7f2fa5 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 3 Dec 2018 14:19:53 -0500 Subject: [PATCH] Enable a configurable whitelist of namespaces for V22 If a namespace is present in the whitelist, all calls are sent to the OCI model instead of the Pre OCI model Note that this does increase overhead for registry calls (since we need to lookup the namespace for every single call), but it should only be temporary until we've migrated all users over to the OCI data model --- app.py | 5 ++ data/registry_model/__init__.py | 11 ++- data/registry_model/datatypes.py | 9 ++- data/registry_model/interface.py | 2 +- data/registry_model/modelsplitter.py | 82 ++++++++++++++++++++++ data/registry_model/test/test_interface.py | 14 ++-- test/test_api_usage.py | 4 +- 7 files changed, 115 insertions(+), 12 deletions(-) create mode 100644 data/registry_model/modelsplitter.py diff --git a/app.py b/app.py index bf4ae4fe0..dd2258299 100644 --- a/app.py +++ b/app.py @@ -30,6 +30,7 @@ from data.queue import WorkQueue, BuildMetricQueueReporter from data.userevent import UserEventsBuilderModule from data.userfiles import Userfiles from data.users import UserAuthentication +from data.registry_model import registry_model from path_converters import RegexConverter, RepositoryPathConverter, APIRepositoryPathConverter from oauth.services.github import GithubOAuthService from oauth.services.gitlab import GitLabOAuthService @@ -88,6 +89,10 @@ config_provider.update_app_config(app.config) environ_config = json.loads(os.environ.get(OVERRIDE_CONFIG_KEY, '{}')) app.config.update(environ_config) +# Split the registry model based on config. +# TODO(jschorr): Remove once we are fully on the OCI data model. +registry_model.setup_split(app.config.get('V22_NAMESPACE_WHITELIST') or set()) + # Allow user to define a custom storage preference for the local instance. _distributed_storage_preference = os.environ.get('QUAY_DISTRIBUTED_STORAGE_PREFERENCE', '').split() if _distributed_storage_preference: diff --git a/data/registry_model/__init__.py b/data/registry_model/__init__.py index 5c2e0934f..79b20ff61 100644 --- a/data/registry_model/__init__.py +++ b/data/registry_model/__init__.py @@ -3,6 +3,7 @@ import logging from data.registry_model.registry_pre_oci_model import pre_oci_model from data.registry_model.registry_oci_model import oci_model +from data.registry_model.modelsplitter import SplitModel logger = logging.getLogger(__name__) @@ -11,6 +12,12 @@ class RegistryModelProxy(object): def __init__(self): self._model = oci_model if os.getenv('OCI_DATA_MODEL') == 'true' else pre_oci_model + def setup_split(self, v22_whitelist): + logger.info('===============================') + logger.info('Enabling split registry model with namespace whitelist `%s`', v22_whitelist) + logger.info('===============================') + self._model = SplitModel(v22_whitelist) + def set_for_testing(self, use_oci_model): self._model = oci_model if use_oci_model else pre_oci_model logger.debug('Changed registry model to `%s` for testing', self._model) @@ -19,4 +26,6 @@ class RegistryModelProxy(object): return getattr(self._model, attr) registry_model = RegistryModelProxy() -logger.debug('Using registry model `%s`', registry_model._model) +logger.info('===============================') +logger.info('Using registry model `%s`', registry_model._model) +logger.info('===============================') diff --git a/data/registry_model/datatypes.py b/data/registry_model/datatypes.py index e7d79ea17..4cfc69185 100644 --- a/data/registry_model/datatypes.py +++ b/data/registry_model/datatypes.py @@ -186,7 +186,7 @@ class Manifest(datatype('Manifest', ['digest', 'media_type', 'manifest_bytes'])) return Manifest(db_id=tag_manifest.id, digest=tag_manifest.digest, manifest_bytes=tag_manifest.json_data, media_type=DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE, # Always in legacy. - inputs=dict(legacy_image=legacy_image)) + inputs=dict(legacy_image=legacy_image, tag_manifest=True)) @classmethod def for_manifest(cls, manifest, legacy_image): @@ -197,7 +197,12 @@ class Manifest(datatype('Manifest', ['digest', 'media_type', 'manifest_bytes'])) digest=manifest.digest, manifest_bytes=manifest.manifest_bytes, media_type=manifest.media_type.name, - inputs=dict(legacy_image=legacy_image)) + inputs=dict(legacy_image=legacy_image, tag_manifest=False)) + + @property + @requiresinput('tag_manifest') + def _is_tag_manifest(self, tag_manifest): + return tag_manifest @property @requiresinput('legacy_image') diff --git a/data/registry_model/interface.py b/data/registry_model/interface.py index 5fcf8f2d7..ce9f00e88 100644 --- a/data/registry_model/interface.py +++ b/data/registry_model/interface.py @@ -269,7 +269,7 @@ class RegistryDataInterface(object): @abstractmethod def lookup_blob_upload(self, repository_ref, blob_upload_id): - """ Looks up the blob upload withn the given ID under the specified repository and returns it + """ Looks up the blob upload with the given ID under the specified repository and returns it or None if none. """ diff --git a/data/registry_model/modelsplitter.py b/data/registry_model/modelsplitter.py new file mode 100644 index 000000000..8f428be7c --- /dev/null +++ b/data/registry_model/modelsplitter.py @@ -0,0 +1,82 @@ +import inspect +import logging + +from data.database import DerivedStorageForImage, TagManifest, Manifest, Image +from data.registry_model.registry_oci_model import oci_model +from data.registry_model.registry_pre_oci_model import pre_oci_model +from data.registry_model.datatypes import LegacyImage, Manifest as ManifestDataType + + +logger = logging.getLogger(__name__) + + +class SplitModel(object): + def __init__(self, v22_namespace_whitelist): + self.v22_namespace_whitelist = set(v22_namespace_whitelist) + + def supports_schema2(self, namespace_name): + """ Returns whether the implementation of the data interface supports schema 2 format + manifests. """ + return namespace_name in self.v22_namespace_whitelist + + def _namespace_from_kwargs(self, args_dict): + if 'namespace_name' in args_dict: + return args_dict['namespace_name'] + + if 'repository_ref' in args_dict: + return args_dict['repository_ref'].namespace_name + + if 'tag' in args_dict: + return args_dict['tag'].repository.namespace_name + + if 'manifest' in args_dict: + manifest = args_dict['manifest'] + if manifest._is_tag_manifest: + return TagManifest.get(id=manifest._db_id).tag.repository.namespace_user.username + else: + return Manifest.get(id=manifest._db_id).repository.namespace_user.username + + if 'manifest_or_legacy_image' in args_dict: + manifest_or_legacy_image = args_dict['manifest_or_legacy_image'] + if isinstance(manifest_or_legacy_image, LegacyImage): + return Image.get(id=manifest_or_legacy_image._db_id).repository.namespace_user.username + else: + manifest = manifest_or_legacy_image + if manifest._is_tag_manifest: + return TagManifest.get(id=manifest._db_id).tag.repository.namespace_user.username + else: + return Manifest.get(id=manifest._db_id).repository.namespace_user.username + + if 'derived_image' in args_dict: + return (DerivedStorageForImage + .get(id=args_dict['derived_image']._db_id) + .source_image + .repository + .namespace_user + .username) + + if 'blob' in args_dict: + return '' # Blob functions are shared, so no need to do anything. + + if 'blob_upload' in args_dict: + return '' # Blob functions are shared, so no need to do anything. + + raise Exception('Unknown namespace for dict `%s`' % args_dict) + + def __getattr__(self, attr): + def method(*args, **kwargs): + argnames = inspect.getargspec(getattr(oci_model, attr))[0] + if not argnames and isinstance(args[0], ManifestDataType): + args_dict = dict(manifest=args[0]) + else: + args_dict = {argnames[index + 1]: value for index, value in enumerate(args)} + + namespace_name = self._namespace_from_kwargs(args_dict) + if namespace_name in self.v22_namespace_whitelist: + logger.debug('Calling method `%s` under OCI data model for namespace `%s`', + attr, namespace_name) + return getattr(oci_model, attr)(*args, **kwargs) + else: + return getattr(pre_oci_model, attr)(*args, **kwargs) + + return method diff --git a/data/registry_model/test/test_interface.py b/data/registry_model/test/test_interface.py index eb291865f..9729bf8a2 100644 --- a/data/registry_model/test/test_interface.py +++ b/data/registry_model/test/test_interface.py @@ -21,6 +21,7 @@ from data.registry_model.registry_pre_oci_model import PreOCIModel from data.registry_model.registry_oci_model import OCIModel from data.registry_model.datatypes import RepositoryReference from data.registry_model.blobuploader import upload_blob, BlobUploadSettings +from data.registry_model.modelsplitter import SplitModel from image.docker.types import ManifestImageLayer from image.docker.schema1 import DockerSchema1ManifestBuilder from image.docker.schema2.manifest import DockerSchema2ManifestBuilder @@ -28,9 +29,9 @@ from image.docker.schema2.manifest import DockerSchema2ManifestBuilder from test.fixtures import * -@pytest.fixture(params=[PreOCIModel, OCIModel]) +@pytest.fixture(params=[PreOCIModel(), OCIModel(), SplitModel({'buynlarge'})]) def registry_model(request, initialized_db): - return request.param() + return request.param @pytest.fixture() def pre_oci_model(initialized_db): @@ -251,7 +252,7 @@ def test_repository_tags(repo_namespace, repo_name, registry_model): def test_repository_tag_history(registry_model): repository_ref = registry_model.lookup_repository('devtable', 'history') - with assert_query_count(2): + with assert_query_count(4 if isinstance(registry_model, SplitModel) else 2): history, has_more = registry_model.list_repository_tag_history(repository_ref) assert not has_more assert len(history) == 2 @@ -290,9 +291,10 @@ def test_delete_tags(repo_namespace, repo_name, via_manifest, registry_model): assert registry_model.delete_tags_for_manifest(manifest) # Make sure the tag is no longer found. - with assert_query_count(1): - found_tag = registry_model.get_repo_tag(repository_ref, tag.name, include_legacy_image=True) - assert found_tag is None + # TODO(jschorr): Uncomment once we're done with the SplitModel. + #with assert_query_count(1): + found_tag = registry_model.get_repo_tag(repository_ref, tag.name, include_legacy_image=True) + assert found_tag is None # Ensure all tags have been deleted. tags = registry_model.list_repository_tags(repository_ref) diff --git a/test/test_api_usage.py b/test/test_api_usage.py index 808cb3406..996a86a59 100644 --- a/test/test_api_usage.py +++ b/test/test_api_usage.py @@ -2220,11 +2220,11 @@ class TestGetRepository(ApiTestCase): self.login(ADMIN_ACCESS_USER) # base + repo + is_starred + tags - with assert_query_count(BASE_LOGGEDIN_QUERY_COUNT + 6): + with assert_query_count(BASE_LOGGEDIN_QUERY_COUNT + 6 + 2): self.getJsonResponse(Repository, params=dict(repository=ADMIN_ACCESS_USER + '/simple')) # base + repo + is_starred + tags - with assert_query_count(BASE_LOGGEDIN_QUERY_COUNT + 6): + with assert_query_count(BASE_LOGGEDIN_QUERY_COUNT + 6 + 2): json = self.getJsonResponse(Repository, params=dict(repository=ADMIN_ACCESS_USER + '/gargantuan'))