From 46edebe6b0ab53ba635bef64df1b8b108c73343e Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Thu, 23 Aug 2018 16:36:04 -0400 Subject: [PATCH 1/3] Change secscan API endpoints to use new registry model interface --- data/registry_model/datatypes.py | 10 +++ data/registry_model/interface.py | 4 ++ data/registry_model/registry_pre_oci_model.py | 24 ++++++- .../registry_model/test/test_pre_oci_model.py | 9 +++ endpoints/api/secscan.py | 62 ++++++++----------- util/secscan/api.py | 9 ++- 6 files changed, 81 insertions(+), 37 deletions(-) diff --git a/data/registry_model/datatypes.py b/data/registry_model/datatypes.py index dd229f6cf..89a9ad2f2 100644 --- a/data/registry_model/datatypes.py +++ b/data/registry_model/datatypes.py @@ -1,3 +1,5 @@ +from enum import Enum, unique + from data.registry_model.datatype import datatype, requiresinput class RepositoryReference(datatype('Repository', [])): @@ -107,3 +109,11 @@ class LegacyImage(datatype('LegacyImage', ['docker_image_id', 'created', 'commen return [] return [Tag.for_repository_tag(tag) for tag in tags] + + +@unique +class SecurityScanStatus(Enum): + """ Security scan status enum """ + SCANNED = 'scanned' + FAILED = 'failed' + QUEUED = 'queued' diff --git a/data/registry_model/interface.py b/data/registry_model/interface.py index bedbb02ce..323d83f85 100644 --- a/data/registry_model/interface.py +++ b/data/registry_model/interface.py @@ -118,3 +118,7 @@ class RegistryDataInterface(object): @abstractmethod def get_legacy_images_owned_by_tag(self, tag): """ Returns all legacy images *solely owned and used* by the given tag. """ + + @abstractmethod + def get_security_status(self, manifest_or_legacy_image): + """ Returns the security status for the given manifest or legacy image or None if none. """ diff --git a/data/registry_model/registry_pre_oci_model.py b/data/registry_model/registry_pre_oci_model.py index beb9c5866..a7f5eabfe 100644 --- a/data/registry_model/registry_pre_oci_model.py +++ b/data/registry_model/registry_pre_oci_model.py @@ -5,7 +5,8 @@ from collections import defaultdict from data import database from data import model from data.registry_model.interface import RegistryDataInterface -from data.registry_model.datatypes import Tag, RepositoryReference, Manifest, LegacyImage, Label +from data.registry_model.datatypes import (Tag, RepositoryReference, Manifest, LegacyImage, Label, + SecurityScanStatus) class PreOCIModel(RegistryDataInterface): @@ -267,5 +268,26 @@ class PreOCIModel(RegistryDataInterface): return [LegacyImage.for_image(image, images_map=images_map) for image in images] + def get_security_status(self, manifest_or_legacy_image): + """ Returns the security status for the given manifest or legacy image or None if none. """ + image = None + + if isinstance(manifest_or_legacy_image, Manifest): + try: + tag_manifest = database.TagManifest.get(id=manifest_or_legacy_image._db_id) + image = tag_manifest.tag.image + except database.TagManifest.DoesNotExist: + return None + else: + try: + image = database.Image.get(id=manifest_or_legacy_image._db_id) + except database.Image.DoesNotExist: + return None + + if image.security_indexed_engine is not None and image.security_indexed_engine >= 0: + return SecurityScanStatus.SCANNED if image.security_indexed else SecurityScanStatus.FAILED + + return SecurityScanStatus.QUEUED + pre_oci_model = PreOCIModel() diff --git a/data/registry_model/test/test_pre_oci_model.py b/data/registry_model/test/test_pre_oci_model.py index 50c1a36f9..a5b203c66 100644 --- a/data/registry_model/test/test_pre_oci_model.py +++ b/data/registry_model/test/test_pre_oci_model.py @@ -293,3 +293,12 @@ def test_get_legacy_images_owned_by_tag(repo_namespace, repo_name, expected_non_ non_empty.add(tag.name) assert non_empty == set(expected_non_empty) + + +def test_get_security_status(pre_oci_model): + repository_ref = pre_oci_model.lookup_repository('devtable', 'simple') + tags = pre_oci_model.list_repository_tags(repository_ref, include_legacy_images=True) + assert len(tags) + + for tag in tags: + assert pre_oci_model.get_security_status(tag.legacy_image) diff --git a/endpoints/api/secscan.py b/endpoints/api/secscan.py index 3fd04a66d..a5ae88043 100644 --- a/endpoints/api/secscan.py +++ b/endpoints/api/secscan.py @@ -4,7 +4,8 @@ import logging import features from app import secscan_api -from data import model +from data.registry_model import registry_model +from data.registry_model.datatypes import SecurityScanStatus from endpoints.api import (require_repo_read, path_param, RepositoryParamResource, resource, nickname, show_if, parse_args, query_param, truthy_bool, disallow_for_app_repositories) @@ -15,37 +16,24 @@ from util.secscan.api import APIRequestFailure logger = logging.getLogger(__name__) - -class SCAN_STATUS(object): - """ Security scan status enum """ - SCANNED = 'scanned' - FAILED = 'failed' - QUEUED = 'queued' - - -def _get_status(repo_image): - """ Returns the SCAN_STATUS value for the given image. """ - if repo_image.security_indexed_engine is not None and repo_image.security_indexed_engine >= 0: - return SCAN_STATUS.SCANNED if repo_image.security_indexed else SCAN_STATUS.FAILED - - return SCAN_STATUS.QUEUED - -def _security_status_for_image(namespace, repository, repo_image, include_vulnerabilities=True): +def _security_info(manifest_or_legacy_image, include_vulnerabilities=True): """ Returns a dict representing the result of a call to the security status API for the given - image. + manifest or image. """ - if not repo_image.security_indexed: - logger.debug('Image %s under repository %s/%s not security indexed', - repo_image.docker_image_id, namespace, repository) + status = registry_model.get_security_status(manifest_or_legacy_image) + if status is None: + raise NotFound() + + if status != SecurityScanStatus.SCANNED: return { - 'status': _get_status(repo_image), + 'status': status.value, } try: if include_vulnerabilities: - data = secscan_api.get_layer_data(repo_image, include_vulnerabilities=True) + data = secscan_api.get_layer_data(manifest_or_legacy_image, include_vulnerabilities=True) else: - data = secscan_api.get_layer_data(repo_image, include_features=True) + data = secscan_api.get_layer_data(manifest_or_legacy_image, include_features=True) except APIRequestFailure as arf: raise DownstreamIssue(arf.message) @@ -53,7 +41,7 @@ def _security_status_for_image(namespace, repository, repo_image, include_vulner raise NotFound() return { - 'status': _get_status(repo_image), + 'status': status.value, 'data': data, } @@ -73,12 +61,16 @@ class RepositoryImageSecurity(RepositoryParamResource): default=False) def get(self, namespace, repository, imageid, parsed_args): """ Fetches the features and vulnerabilities (if any) for a repository image. """ - repo_image = model.image.get_repo_image(namespace, repository, imageid) - if repo_image is None: + repo_ref = registry_model.lookup_repository(namespace, repository) + if repo_ref is None: raise NotFound() - return _security_status_for_image(namespace, repository, repo_image, - parsed_args.vulnerabilities) + legacy_image = registry_model.get_legacy_image(repo_ref, imageid) + if legacy_image is None: + raise NotFound() + + return _security_info(legacy_image, parsed_args.vulnerabilities) + @resource(MANIFEST_DIGEST_ROUTE + '/security') @show_if(features.SECURITY_SCANNER) @@ -94,12 +86,12 @@ class RepositoryManifestSecurity(RepositoryParamResource): @query_param('vulnerabilities', 'Include vulnerabilities informations', type=truthy_bool, default=False) def get(self, namespace, repository, manifestref, parsed_args): - try: - tag_manifest = model.tag.load_manifest_by_digest(namespace, repository, manifestref) - except model.DataModelException: + repo_ref = registry_model.lookup_repository(namespace, repository) + if repo_ref is None: raise NotFound() - repo_image = tag_manifest.tag.image + manifest = registry_model.lookup_manifest_by_digest(repo_ref, manifestref, allow_dead=True) + if manifest is None: + raise NotFound() - return _security_status_for_image(namespace, repository, repo_image, - parsed_args.vulnerabilities) + return _security_info(manifest, parsed_args.vulnerabilities) diff --git a/util/secscan/api.py b/util/secscan/api.py index 25ef36807..cc6e282f2 100644 --- a/util/secscan/api.py +++ b/util/secscan/api.py @@ -7,9 +7,10 @@ from urlparse import urljoin import requests -from data.database import CloseForLongOperation from data import model +from data.database import CloseForLongOperation, TagManifest, Image from data.model.storage import get_storage_locations +from data.registry_model.datatypes import Manifest, LegacyImage from util.abchelpers import nooper from util.failover import failover, FailoverException from util.secscan.validator import SecurityConfigValidator @@ -61,6 +62,12 @@ _API_METHOD_PING = 'metrics' def compute_layer_id(layer): """ Returns the ID for the layer in the security scanner. """ + # NOTE: this is temporary until we switch to Clair V3. + if isinstance(layer, Manifest): + layer = TagManifest.get(id=layer._db_id).tag.image + elif isinstance(layer, LegacyImage): + layer = Image.get(id=layer._db_id) + return '%s.%s' % (layer.docker_image_id, layer.storage.uuid) From a96c5a7f6409498bbe1137101af0cbb79bf1ba5b Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Fri, 24 Aug 2018 11:25:24 -0400 Subject: [PATCH 2/3] Optimize the new registry data model to avoid unnecessary queries --- data/model/image.py | 6 ++- data/model/tag.py | 10 +++-- data/registry_model/datatypes.py | 3 +- .../registry_model/test/test_pre_oci_model.py | 41 ++++++++++++------- 4 files changed, 41 insertions(+), 19 deletions(-) diff --git a/data/model/image.py b/data/model/image.py index d7b6149a1..527f1ccb7 100644 --- a/data/model/image.py +++ b/data/model/image.py @@ -357,7 +357,11 @@ def set_image_metadata(docker_image_id, namespace_name, repository_name, created def get_image(repo, docker_image_id): try: - return Image.get(Image.docker_image_id == docker_image_id, Image.repository == repo) + return (Image + .select(Image, ImageStorage) + .join(ImageStorage) + .where(Image.docker_image_id == docker_image_id, Image.repository == repo) + .get()) except Image.DoesNotExist: return None diff --git a/data/model/tag.py b/data/model/tag.py index 2c24e71d9..dbff7ff73 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -208,8 +208,9 @@ def list_active_repo_tags(repo): and (if present), their manifest. """ query = _tag_alive(RepositoryTag - .select(RepositoryTag, Image, TagManifest.digest) + .select(RepositoryTag, Image, ImageStorage, TagManifest.digest) .join(Image) + .join(ImageStorage) .where(RepositoryTag.repository == repo, RepositoryTag.hidden == False) .switch(RepositoryTag) .join(TagManifest, JOIN.LEFT_OUTER)) @@ -470,8 +471,9 @@ def get_tag_image(namespace_name, repository_name, tag_name, include_storage=Fal def list_repository_tag_history(repo_obj, page=1, size=100, specific_tag=None): query = (RepositoryTag - .select(RepositoryTag, Image) + .select(RepositoryTag, Image, ImageStorage) .join(Image) + .join(ImageStorage) .switch(RepositoryTag) .where(RepositoryTag.repository == repo_obj) .where(RepositoryTag.hidden == False) @@ -591,7 +593,9 @@ def get_active_tag(namespace, repo_name, tag_name): def get_active_tag_for_repo(repo, tag_name): try: return _tag_alive(RepositoryTag - .select() + .select(RepositoryTag, Image, ImageStorage) + .join(Image) + .join(ImageStorage) .where(RepositoryTag.name == tag_name, RepositoryTag.repository == repo)).get() except RepositoryTag.DoesNotExist: diff --git a/data/registry_model/datatypes.py b/data/registry_model/datatypes.py index 89a9ad2f2..9b6674120 100644 --- a/data/registry_model/datatypes.py +++ b/data/registry_model/datatypes.py @@ -70,7 +70,7 @@ class Manifest(datatype('Manifest', ['digest', 'manifest_bytes'])): class LegacyImage(datatype('LegacyImage', ['docker_image_id', 'created', 'comment', 'command', - 'image_size', 'uploading'])): + 'image_size', 'aggregate_size', 'uploading'])): """ LegacyImage represents a Docker V1-style image found in a repository. """ @classmethod def for_image(cls, image, images_map=None, tags_map=None): @@ -85,6 +85,7 @@ class LegacyImage(datatype('LegacyImage', ['docker_image_id', 'created', 'commen comment=image.comment, command=image.command, image_size=image.storage.image_size, + aggregate_size=image.aggregate_size, uploading=image.storage.uploading) @property diff --git a/data/registry_model/test/test_pre_oci_model.py b/data/registry_model/test/test_pre_oci_model.py index a5b203c66..dfff01bbf 100644 --- a/data/registry_model/test/test_pre_oci_model.py +++ b/data/registry_model/test/test_pre_oci_model.py @@ -2,9 +2,12 @@ from datetime import datetime, timedelta import pytest +from playhouse.test_utils import assert_query_count + from data import model from data.registry_model.registry_pre_oci_model import PreOCIModel from data.registry_model.datatypes import RepositoryReference + from test.fixtures import * @pytest.fixture() @@ -95,8 +98,11 @@ def test_legacy_images(repo_namespace, repo_name, pre_oci_model): found_image = pre_oci_model.get_legacy_image(repository_ref, image.docker_image_id, include_parents=True) - assert found_image.docker_image_id == image.docker_image_id - assert found_image.parents == image.parents + with assert_query_count(4 if found_image.parents else 3): + found_image = pre_oci_model.get_legacy_image(repository_ref, image.docker_image_id, + include_parents=True) + assert found_image.docker_image_id == image.docker_image_id + assert found_image.parents == image.parents # Check that the tags list can be retrieved. assert image.tags is not None @@ -157,26 +163,32 @@ def test_manifest_labels(pre_oci_model): ]) def test_repository_tags(repo_namespace, repo_name, pre_oci_model): repository_ref = pre_oci_model.lookup_repository(repo_namespace, repo_name) - tags = pre_oci_model.list_repository_tags(repository_ref) - assert len(tags) + + with assert_query_count(1): + tags = pre_oci_model.list_repository_tags(repository_ref, include_legacy_images=True) + assert len(tags) for tag in tags: - found_tag = pre_oci_model.get_repo_tag(repository_ref, tag.name, include_legacy_image=True) - assert found_tag == tag + with assert_query_count(2): + found_tag = pre_oci_model.get_repo_tag(repository_ref, tag.name, include_legacy_image=True) + assert found_tag == tag if found_tag.legacy_image is None: continue - found_image = pre_oci_model.get_legacy_image(repository_ref, - found_tag.legacy_image.docker_image_id) - assert found_image == found_tag.legacy_image + with assert_query_count(2): + found_image = pre_oci_model.get_legacy_image(repository_ref, + found_tag.legacy_image.docker_image_id) + assert found_image == found_tag.legacy_image def test_repository_tag_history(pre_oci_model): repository_ref = pre_oci_model.lookup_repository('devtable', 'history') - history, has_more = pre_oci_model.list_repository_tag_history(repository_ref) - assert not has_more - assert len(history) == 2 + + with assert_query_count(2): + history, has_more = pre_oci_model.list_repository_tag_history(repository_ref) + assert not has_more + assert len(history) == 2 @pytest.mark.parametrize('repo_namespace, repo_name', [ @@ -199,8 +211,9 @@ def test_delete_tags(repo_namespace, repo_name, pre_oci_model): assert pre_oci_model.delete_tag(repository_ref, tag.name) # Make sure the tag is no longer found. - found_tag = pre_oci_model.get_repo_tag(repository_ref, tag.name, include_legacy_image=True) - assert found_tag is None + with assert_query_count(1): + found_tag = pre_oci_model.get_repo_tag(repository_ref, tag.name, include_legacy_image=True) + assert found_tag is None # Ensure all tags have been deleted. tags = pre_oci_model.list_repository_tags(repository_ref) From a9ebb183f9c90fb734e8a2fd159f3723fd54558c Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Fri, 24 Aug 2018 11:25:36 -0400 Subject: [PATCH 3/3] Change repositories API endpoint to use the new registry data model --- endpoints/api/repository_models_pre_oci.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/endpoints/api/repository_models_pre_oci.py b/endpoints/api/repository_models_pre_oci.py index 7c60138f3..0ef62773c 100644 --- a/endpoints/api/repository_models_pre_oci.py +++ b/endpoints/api/repository_models_pre_oci.py @@ -5,6 +5,8 @@ from datetime import datetime, timedelta from auth.permissions import ReadRepositoryPermission from data import model from data.appr_model import channel as channel_model, release as release_model +from data.registry_model import registry_model +from data.registry_model.datatypes import RepositoryReference from endpoints.appr.models_cnr import model as appr_model from endpoints.api.repository_models_interface import RepositoryDataInterface, RepositoryBaseElement, Repository, \ ApplicationRepository, ImageRepositoryRepository, Tag, Channel, Release, Count @@ -154,13 +156,16 @@ class PreOCIModel(RepositoryDataInterface): for release in releases ]) - tags = model.tag.list_active_repo_tags(repo) + repo_ref = RepositoryReference.for_repo_obj(repo) + tags = registry_model.list_repository_tags(repo_ref, include_legacy_images=True) + start_date = datetime.now() - timedelta(days=MAX_DAYS_IN_3_MONTHS) counts = model.log.get_repository_action_counts(repo, start_date) return ImageRepositoryRepository(base, [ - Tag(tag.name, tag.image.docker_image_id, tag.image.aggregate_size, tag.lifetime_start_ts, - tag.tagmanifest.digest if hasattr(tag, 'tagmanifest') else None, + Tag(tag.name, tag.legacy_image.docker_image_id, tag.legacy_image.aggregate_size, + tag.lifetime_start_ts, + tag.manifest_digest, tag.lifetime_end_ts) for tag in tags ], [Count(count.date, count.count) for count in counts], repo.badge_token, repo.trust_enabled)