From ab166c4448f19428cc925a9215ec681f9caae794 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Wed, 23 Dec 2015 13:08:01 -0500 Subject: [PATCH] Delete the image diff feature Fixes #1077 --- app.py | 1 - config.py | 1 - endpoints/api/image.py | 28 +---------- endpoints/v1/registry.py | 56 --------------------- initdb.py | 7 --- screenshots/screenshots.js | 13 ----- static/directives/tour-content.html | 12 +---- static/js/pages/image-view.js | 29 ----------- static/partials/image-view.html | 22 --------- storage/basestorage.py | 4 -- test/test_api_security.py | 56 +-------------------- test/test_changes.py | 73 --------------------------- util/registry/changes.py | 77 ----------------------------- workers/diffsworker.py | 37 -------------- 14 files changed, 3 insertions(+), 413 deletions(-) delete mode 100644 test/test_changes.py delete mode 100644 util/registry/changes.py delete mode 100644 workers/diffsworker.py diff --git a/app.py b/app.py index 2c96969af..beb4c027f 100644 --- a/app.py +++ b/app.py @@ -155,7 +155,6 @@ dex_login = DexOAuthConfig(app.config, 'DEX_LOGIN_CONFIG') oauth_apps = [github_login, github_trigger, gitlab_trigger, google_login, dex_login] -image_diff_queue = WorkQueue(app.config['DIFFS_QUEUE_NAME'], tf) image_replication_queue = WorkQueue(app.config['REPLICATION_QUEUE_NAME'], tf) dockerfile_build_queue = WorkQueue(app.config['DOCKERFILE_BUILD_QUEUE_NAME'], tf, reporter=MetricQueueReporter(metric_queue)) diff --git a/config.py b/config.py index 460dbe986..175efd3bc 100644 --- a/config.py +++ b/config.py @@ -127,7 +127,6 @@ class DefaultConfig(object): STATUS_TAGS[tag_name] = tag_svg.read() NOTIFICATION_QUEUE_NAME = 'notification' - DIFFS_QUEUE_NAME = 'imagediff' DOCKERFILE_BUILD_QUEUE_NAME = 'dockerfilebuild' REPLICATION_QUEUE_NAME = 'imagestoragereplication' SECSCAN_NOTIFICATION_QUEUE_NAME = 'secscan_notification' diff --git a/endpoints/api/image.py b/endpoints/api/image.py index 057e02cae..55c8da0a2 100644 --- a/endpoints/api/image.py +++ b/endpoints/api/image.py @@ -1,10 +1,8 @@ -""" List and lookup repository images, and download image diffs. """ +""" List and lookup repository images. """ import json from collections import defaultdict - -from app import storage as store from endpoints.api import (resource, nickname, require_repo_read, RepositoryParamResource, format_date, NotFound, path_param) from data import model @@ -104,27 +102,3 @@ class RepositoryImage(RepositoryParamResource): return historical_image_view(image, image_map) - -@resource('/v1/repository//image//changes') -@path_param('repository', 'The full path of the repository. e.g. namespace/name') -@path_param('image_id', 'The Docker image ID') -class RepositoryImageChanges(RepositoryParamResource): - """ Resource for handling repository image change lists. """ - - @cache_control_flask_restful(max_age=60*60) # Cache for one hour - @require_repo_read - @nickname('getImageChanges') - def get(self, namespace, repository, image_id): - """ Get the list of changes for the specified image. """ - image = model.image.get_repo_image_extended(namespace, repository, image_id) - - if not image: - raise NotFound() - - diffs_path = store.image_file_diffs_path(image.storage.uuid) - - try: - response_json = json.loads(store.get_content(image.storage.locations, diffs_path)) - return response_json - except IOError: - raise NotFound() diff --git a/endpoints/v1/registry.py b/endpoints/v1/registry.py index f91dcfcd9..e7557468c 100644 --- a/endpoints/v1/registry.py +++ b/endpoints/v1/registry.py @@ -12,7 +12,6 @@ from auth.auth import process_auth, extract_namespace_repo_from_session from auth.auth_context import get_authenticated_user from auth.registry_jwt_auth import get_granted_username from digest import checksums -from util.registry import changes from util.http import abort, exact_abort from util.registry.filelike import SocketReader from auth.permissions import (ReadRepositoryPermission, @@ -482,58 +481,3 @@ def put_image_json(namespace, repository, image_id): return make_response('true', 200) - -def process_image_changes(namespace, repository, image_id): - logger.debug('Generating diffs for image: %s', image_id) - - repo_image = model.image.get_repo_image_extended(namespace, repository, image_id) - if not repo_image: - logger.warning('No image for id: %s', image_id) - return None, None - - uuid = repo_image.storage.uuid - - image_diffs_path = store.image_file_diffs_path(uuid) - image_trie_path = store.image_file_trie_path(uuid) - - if store.exists(repo_image.storage.locations, image_diffs_path): - logger.debug('Diffs already exist for image: %s', image_id) - return image_trie_path, repo_image.storage.locations - - image = model.image.get_image_by_id(namespace, repository, image_id) - parents = model.image.get_parent_images(namespace, repository, image) - - # Compute the diffs and fs for the parent first if necessary - parent_trie_path = None - if parents: - parent_trie_path, parent_locations = process_image_changes(namespace, repository, - parents[0].docker_image_id) - - # Read in the collapsed layer state of the filesystem for the parent - parent_trie = changes.empty_fs() - if parent_trie_path: - parent_trie_bytes = store.get_content(parent_locations, parent_trie_path) - parent_trie.frombytes(parent_trie_bytes) - - # Read in the file entries from the layer tar file - layer_path = model.storage.get_layer_path(repo_image.storage) - with store.stream_read_file(image.storage.locations, layer_path) as layer_tar_stream: - removed_files = set() - layer_files = changes.files_and_dirs_from_tar(layer_tar_stream, - removed_files) - - new_metadata = changes.compute_new_diffs_and_fs(parent_trie, layer_files, removed_files) - (new_trie, added, changed, removed) = new_metadata - - # Write out the new trie - store.put_content(image.storage.locations, image_trie_path, new_trie.tobytes()) - - # Write out the diffs - diffs = {} - sections = ('added', 'changed', 'removed') - for section, source_trie in zip(sections, new_metadata[1:]): - diffs[section] = list(source_trie) - diffs[section].sort() - store.put_content(image.storage.locations, image_diffs_path, json.dumps(diffs, indent=2)) - - return image_trie_path, image.storage.locations diff --git a/initdb.py b/initdb.py index f75420911..b37ec9dcd 100644 --- a/initdb.py +++ b/initdb.py @@ -118,13 +118,6 @@ def __create_subtree(repo, structure, creator_username, parent, tag_map): model.image.set_image_size(docker_image_id, repo.namespace_user.username, repo.name, compressed_size, int(compressed_size * 1.4)) - # Populate the diff file - diff_path = store.image_file_diffs_path(new_image.storage.uuid) - source_diff = SAMPLE_DIFFS[image_num % len(SAMPLE_DIFFS)] - - with open(source_diff, 'r') as source_file: - store.stream_write(new_image_locations, diff_path, source_file) - parent = new_image if last_node_tags: diff --git a/screenshots/screenshots.js b/screenshots/screenshots.js index 16401d510..b464b101c 100644 --- a/screenshots/screenshots.js +++ b/screenshots/screenshots.js @@ -112,19 +112,6 @@ casper.thenOpen(rootUrl + 'repository/devtable/' + repo + '?tab=tags', function( this.wait(1000); }); -casper.then(function() { - this.log('Generating repository image diff screenshot.'); -}); - -casper.thenClick('.image-link a', function() { - this.waitForText('Image Layers'); - this.wait(1000); -}); - -casper.then(function() { - this.capture(outputDir + 'repo-changes.png'); -}); - casper.then(function() { this.log('Generating organization view screenshot.'); }); diff --git a/static/directives/tour-content.html b/static/directives/tour-content.html index ad5b90a70..1dc0f4f6e 100644 --- a/static/directives/tour-content.html +++ b/static/directives/tour-content.html @@ -41,7 +41,7 @@ Like to use Dockerfiles to build your images? Simply upload your Dockerfile (and any additional files it needs) and we'll build your Dockerfile into an image and push it to your repository.
- If you store your Dockerfile in GitHub, add a Build Trigger to your repository and we'll start a Dockerfile build for every change you make. + If you store your Dockerfile in GitHub, Bitbucket, or Gitlab, add a Build Trigger to your repository and we'll start a Dockerfile build for every change you make.
@@ -60,16 +60,6 @@ -
-
-
-
Docker diff whenever you need it
-
- We wanted to know what was changing in each image of our repositories just as much as you do. So we added diffs. Now you can see exactly which files were added, changed, or removed for each image. We've also provided two awesome ways to view your changes, either in a Dockerfile-like view, or in a drill down tree view. -
-
-
- - -
-
-

Image File Changes

-
-
No file changes
-
- There were no file system changes in this image layer. -
-
- -
-
-
-
-
-
diff --git a/storage/basestorage.py b/storage/basestorage.py index dee4937c9..fd9ca30e1 100644 --- a/storage/basestorage.py +++ b/storage/basestorage.py @@ -35,10 +35,6 @@ class StoragePaths(object): base_path = self._image_path(storage_uuid) return '{0}files.trie'.format(base_path) - def image_file_diffs_path(self, storage_uuid): - base_path = self._image_path(storage_uuid) - return '{0}diffs.json'.format(base_path) - class BaseStorage(StoragePaths): def __init__(self): diff --git a/test/test_api_security.py b/test/test_api_security.py index a50a21bda..8df483c69 100644 --- a/test/test_api_security.py +++ b/test/test_api_security.py @@ -13,7 +13,7 @@ from endpoints.api import api_bp, api from endpoints.api.team import TeamMember, TeamMemberList, OrganizationTeam, TeamMemberInvite from endpoints.api.tag import RepositoryTagImages, RepositoryTag, ListRepositoryTags, RevertTag from endpoints.api.search import EntitySearch -from endpoints.api.image import RepositoryImageChanges, RepositoryImage, RepositoryImageList +from endpoints.api.image import RepositoryImage, RepositoryImageList from endpoints.api.build import (FileDropResource, RepositoryBuildStatus, RepositoryBuildLogs, RepositoryBuildList, RepositoryBuildResource) from endpoints.api.robot import (UserRobotList, OrgRobot, OrgRobotList, UserRobot, @@ -1592,60 +1592,6 @@ class TestBuildTriggerAnalyze0byeBuynlargeOrgrepo(ApiTestCase): self._run_test('POST', 404, 'devtable', {'config': {}}) -class TestRepositoryImageChangesPtsgPublicPublicrepo(ApiTestCase): - def setUp(self): - ApiTestCase.setUp(self) - self._set_url(RepositoryImageChanges, image_id="PTSG", repository="public/publicrepo") - - def test_get_anonymous(self): - self._run_test('GET', 404, None, None) - - def test_get_freshuser(self): - self._run_test('GET', 404, 'freshuser', None) - - def test_get_reader(self): - self._run_test('GET', 404, 'reader', None) - - def test_get_devtable(self): - self._run_test('GET', 404, 'devtable', None) - - -class TestRepositoryImageChangesPtsgDevtableShared(ApiTestCase): - def setUp(self): - ApiTestCase.setUp(self) - self._set_url(RepositoryImageChanges, image_id="PTSG", repository="devtable/shared") - - def test_get_anonymous(self): - self._run_test('GET', 401, None, None) - - def test_get_freshuser(self): - self._run_test('GET', 403, 'freshuser', None) - - def test_get_reader(self): - self._run_test('GET', 404, 'reader', None) - - def test_get_devtable(self): - self._run_test('GET', 404, 'devtable', None) - - -class TestRepositoryImageChangesPtsgBuynlargeOrgrepo(ApiTestCase): - def setUp(self): - ApiTestCase.setUp(self) - self._set_url(RepositoryImageChanges, image_id="PTSG", repository="buynlarge/orgrepo") - - def test_get_anonymous(self): - self._run_test('GET', 401, None, None) - - def test_get_freshuser(self): - self._run_test('GET', 403, 'freshuser', None) - - def test_get_reader(self): - self._run_test('GET', 404, 'reader', None) - - def test_get_devtable(self): - self._run_test('GET', 404, 'devtable', None) - - class TestRepositoryBuildStatusFg86PublicPublicrepo(ApiTestCase): def setUp(self): ApiTestCase.setUp(self) diff --git a/test/test_changes.py b/test/test_changes.py deleted file mode 100644 index ca0b4f0d2..000000000 --- a/test/test_changes.py +++ /dev/null @@ -1,73 +0,0 @@ -import unittest -import tarfile - -from StringIO import StringIO -from util.registry.streamlayerformat import AUFS_WHITEOUT -from util.registry.changes import files_and_dirs_from_tar - -class TestChanges(unittest.TestCase): - def create_layer(self, **kwargs): - output = StringIO() - with tarfile.open(fileobj=output, mode='w:gz') as tar: - for current_contents in kwargs: - current_filename = kwargs[current_contents] - - if current_contents == '_': - # This is a deleted file. - if current_filename.endswith('/'): - current_filename = current_filename[:-1] - - parts = current_filename.split('/') - if len(parts) > 1: - current_filename = '/'.join(parts[:-1]) + '/' + AUFS_WHITEOUT + parts[-1] - else: - current_filename = AUFS_WHITEOUT + parts[-1] - - current_contents = '' - - info = tarfile.TarInfo(name=current_filename) - info.size = len(current_contents) - tar.addfile(info, fileobj=StringIO(current_contents)) - - return output.getvalue() - - - def test_single_layer(self): - tar_layer = self.create_layer( - foo = 'some_file', - bar = 'another_file', - meh = 'third_file') - - deleted_prefixes = set() - result = list(files_and_dirs_from_tar(StringIO(tar_layer), deleted_prefixes)) - - self.assertIn('/some_file', result) - self.assertIn('/another_file', result) - self.assertIn('/third_file', result) - - self.assertFalse(deleted_prefixes) - - - def test_delete_file(self): - tar_layer = self.create_layer( - foo = 'some_file', - _ = 'another_file', - meh = 'third_file') - - deleted_prefixes = set() - result = list(files_and_dirs_from_tar(StringIO(tar_layer), deleted_prefixes)) - - self.assertIn('/some_file', result) - self.assertIn('/third_file', result) - - self.assertIn('another_file', deleted_prefixes) - - def test_pax_tarfile_error(self): - # If this fails it will raise an exception - with open('test/data/sample/layers/paxheader.tar') as tarstream: - result = list(files_and_dirs_from_tar(tarstream, set())) - - self.assertIn('/file', result) - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/util/registry/changes.py b/util/registry/changes.py deleted file mode 100644 index 0e7f31c80..000000000 --- a/util/registry/changes.py +++ /dev/null @@ -1,77 +0,0 @@ -import marisa_trie -import os - -import util.vendor.paxtarfile as tarfile - -from util.registry.aufs import is_aufs_metadata, get_deleted_prefix - -ALLOWED_TYPES = {tarfile.REGTYPE, tarfile.AREGTYPE} - -def files_and_dirs_from_tar(source_stream, removed_prefix_collector): - try: - tar_stream = tarfile.open(mode='r|*', fileobj=source_stream) - except tarfile.ReadError: - # Empty tar file - return - - for tar_info in tar_stream: - absolute = os.path.relpath(tar_info.name.decode('utf-8'), './') - - # Skip metadata. - if is_aufs_metadata(absolute): - continue - - # Add prefixes of removed paths to the collector. - deleted_prefix = get_deleted_prefix(absolute) - if deleted_prefix is not None: - removed_prefix_collector.add(deleted_prefix) - continue - - # Otherwise, yield the path if it is in the allowed types. - if tar_info.type in ALLOWED_TYPES: - yield '/' + absolute - - -def __compute_removed(base_trie, removed_prefixes): - for prefix in removed_prefixes: - for filename in base_trie.keys(prefix): - yield filename - - -def __compute_added_changed(base_trie, delta_trie): - added = set() - changed = set() - - for filename in delta_trie.keys(): - if filename not in base_trie: - added.add(filename) - else: - changed.add(filename) - - return added, changed - - -def __new_fs(base_trie, added, removed): - for filename in base_trie.keys(): - if filename not in removed: - yield filename - - for filename in added: - yield filename - - -def empty_fs(): - return marisa_trie.Trie() - - -def compute_new_diffs_and_fs(base_trie, filename_source, - removed_prefix_collector): - new_trie = marisa_trie.Trie(filename_source) - (new_added, new_changed) = __compute_added_changed(base_trie, new_trie) - - new_removed = marisa_trie.Trie(__compute_removed(base_trie, - removed_prefix_collector)) - - new_fs = marisa_trie.Trie(__new_fs(base_trie, new_added, new_removed)) - - return (new_fs, new_added, new_changed, new_removed.keys()) diff --git a/workers/diffsworker.py b/workers/diffsworker.py deleted file mode 100644 index 406e3564e..000000000 --- a/workers/diffsworker.py +++ /dev/null @@ -1,37 +0,0 @@ -import logging - -from app import image_diff_queue -from data import model -from endpoints.v1.registry import process_image_changes -from workers.queueworker import QueueWorker - - -logger = logging.getLogger(__name__) - - -class DiffsWorker(QueueWorker): - def process_queue_item(self, job_details): - image_id = job_details['image_id'] - repository = job_details['repository'] - namespace = model.user.get_namespace_by_user_id(job_details['namespace_user_id']) - - try: - process_image_changes(namespace, repository, image_id) - except model.DataModelException: - # This exception is unrecoverable, and the item should continue and be - # marked as complete. - msg = ('Image does not exist in database \'%s\' for repo \'%s/\'%s\'' % - (image_id, namespace, repository)) - logger.warning(msg) - except IOError: - # This exception is unrecoverable, and the item should continue and be - # marked as complete. - msg = ("Data could not be retrieved for image %s under repo %s/%s" % - (image_id, namespace, repository)) - logger.exception(msg) - - return True - -if __name__ == "__main__": - worker = DiffsWorker(image_diff_queue) - worker.start()