Delete the image diff feature

Fixes #1077
This commit is contained in:
Joseph Schorr 2015-12-23 13:08:01 -05:00
parent d6fd9a07c2
commit ab166c4448
14 changed files with 3 additions and 413 deletions

1
app.py
View file

@ -155,7 +155,6 @@ dex_login = DexOAuthConfig(app.config, 'DEX_LOGIN_CONFIG')
oauth_apps = [github_login, github_trigger, gitlab_trigger, google_login, dex_login]
image_diff_queue = WorkQueue(app.config['DIFFS_QUEUE_NAME'], tf)
image_replication_queue = WorkQueue(app.config['REPLICATION_QUEUE_NAME'], tf)
dockerfile_build_queue = WorkQueue(app.config['DOCKERFILE_BUILD_QUEUE_NAME'], tf,
reporter=MetricQueueReporter(metric_queue))

View file

@ -127,7 +127,6 @@ class DefaultConfig(object):
STATUS_TAGS[tag_name] = tag_svg.read()
NOTIFICATION_QUEUE_NAME = 'notification'
DIFFS_QUEUE_NAME = 'imagediff'
DOCKERFILE_BUILD_QUEUE_NAME = 'dockerfilebuild'
REPLICATION_QUEUE_NAME = 'imagestoragereplication'
SECSCAN_NOTIFICATION_QUEUE_NAME = 'secscan_notification'

View file

@ -1,10 +1,8 @@
""" List and lookup repository images, and download image diffs. """
""" List and lookup repository images. """
import json
from collections import defaultdict
from app import storage as store
from endpoints.api import (resource, nickname, require_repo_read, RepositoryParamResource,
format_date, NotFound, path_param)
from data import model
@ -104,27 +102,3 @@ class RepositoryImage(RepositoryParamResource):
return historical_image_view(image, image_map)
@resource('/v1/repository/<repopath:repository>/image/<image_id>/changes')
@path_param('repository', 'The full path of the repository. e.g. namespace/name')
@path_param('image_id', 'The Docker image ID')
class RepositoryImageChanges(RepositoryParamResource):
""" Resource for handling repository image change lists. """
@cache_control_flask_restful(max_age=60*60) # Cache for one hour
@require_repo_read
@nickname('getImageChanges')
def get(self, namespace, repository, image_id):
""" Get the list of changes for the specified image. """
image = model.image.get_repo_image_extended(namespace, repository, image_id)
if not image:
raise NotFound()
diffs_path = store.image_file_diffs_path(image.storage.uuid)
try:
response_json = json.loads(store.get_content(image.storage.locations, diffs_path))
return response_json
except IOError:
raise NotFound()

View file

@ -12,7 +12,6 @@ from auth.auth import process_auth, extract_namespace_repo_from_session
from auth.auth_context import get_authenticated_user
from auth.registry_jwt_auth import get_granted_username
from digest import checksums
from util.registry import changes
from util.http import abort, exact_abort
from util.registry.filelike import SocketReader
from auth.permissions import (ReadRepositoryPermission,
@ -482,58 +481,3 @@ def put_image_json(namespace, repository, image_id):
return make_response('true', 200)
def process_image_changes(namespace, repository, image_id):
logger.debug('Generating diffs for image: %s', image_id)
repo_image = model.image.get_repo_image_extended(namespace, repository, image_id)
if not repo_image:
logger.warning('No image for id: %s', image_id)
return None, None
uuid = repo_image.storage.uuid
image_diffs_path = store.image_file_diffs_path(uuid)
image_trie_path = store.image_file_trie_path(uuid)
if store.exists(repo_image.storage.locations, image_diffs_path):
logger.debug('Diffs already exist for image: %s', image_id)
return image_trie_path, repo_image.storage.locations
image = model.image.get_image_by_id(namespace, repository, image_id)
parents = model.image.get_parent_images(namespace, repository, image)
# Compute the diffs and fs for the parent first if necessary
parent_trie_path = None
if parents:
parent_trie_path, parent_locations = process_image_changes(namespace, repository,
parents[0].docker_image_id)
# Read in the collapsed layer state of the filesystem for the parent
parent_trie = changes.empty_fs()
if parent_trie_path:
parent_trie_bytes = store.get_content(parent_locations, parent_trie_path)
parent_trie.frombytes(parent_trie_bytes)
# Read in the file entries from the layer tar file
layer_path = model.storage.get_layer_path(repo_image.storage)
with store.stream_read_file(image.storage.locations, layer_path) as layer_tar_stream:
removed_files = set()
layer_files = changes.files_and_dirs_from_tar(layer_tar_stream,
removed_files)
new_metadata = changes.compute_new_diffs_and_fs(parent_trie, layer_files, removed_files)
(new_trie, added, changed, removed) = new_metadata
# Write out the new trie
store.put_content(image.storage.locations, image_trie_path, new_trie.tobytes())
# Write out the diffs
diffs = {}
sections = ('added', 'changed', 'removed')
for section, source_trie in zip(sections, new_metadata[1:]):
diffs[section] = list(source_trie)
diffs[section].sort()
store.put_content(image.storage.locations, image_diffs_path, json.dumps(diffs, indent=2))
return image_trie_path, image.storage.locations

View file

@ -118,13 +118,6 @@ def __create_subtree(repo, structure, creator_username, parent, tag_map):
model.image.set_image_size(docker_image_id, repo.namespace_user.username, repo.name,
compressed_size, int(compressed_size * 1.4))
# Populate the diff file
diff_path = store.image_file_diffs_path(new_image.storage.uuid)
source_diff = SAMPLE_DIFFS[image_num % len(SAMPLE_DIFFS)]
with open(source_diff, 'r') as source_file:
store.stream_write(new_image_locations, diff_path, source_file)
parent = new_image
if last_node_tags:

View file

@ -112,19 +112,6 @@ casper.thenOpen(rootUrl + 'repository/devtable/' + repo + '?tab=tags', function(
this.wait(1000);
});
casper.then(function() {
this.log('Generating repository image diff screenshot.');
});
casper.thenClick('.image-link a', function() {
this.waitForText('Image Layers');
this.wait(1000);
});
casper.then(function() {
this.capture(outputDir + 'repo-changes.png');
});
casper.then(function() {
this.log('Generating organization view screenshot.');
});

View file

@ -41,7 +41,7 @@
Like to use <b>Dockerfiles</b> to build your images? Simply upload your Dockerfile (and any additional files it needs) and we'll build your Dockerfile into an image and push it to your repository.
</div>
<div class="tour-section-description">
If you store your Dockerfile in <i class="fa fa-github fa-lg" style="margin: 6px;"></i><b>GitHub</b>, add a <b>Build Trigger</b> to your repository and we'll start a Dockerfile build for every change you make.
If you store your Dockerfile in <i class="fa fa-github fa-lg" style="margin: 6px;"></i><b>GitHub</b>, <b>Bitbucket</b>, or <b>Gitlab</b>, add a <b>Build Trigger</b> to your repository and we'll start a Dockerfile build for every change you make.
</div>
</div>
</div>
@ -60,16 +60,6 @@
</div>
</div>
<div class="tour-section row">
<div class="col-md-7"><img src="/static/img/repo-changes.png" title="View Image" data-screenshot-url="https://quay.io/repository/devtable/image/..." class="img-responsive"></div>
<div class="col-md-5">
<div class="tour-section-title">Docker diff whenever you need it</div>
<div class="tour-section-description">
We wanted to know what was changing in each image of our repositories just as much as you do. So we added diffs. Now you can see exactly which files were <b>added</b>, <b>changed</b>, or <b>removed</b> for each image. We've also provided two awesome ways to view your changes, either in a Dockerfile-like view, or in a drill down tree view.
</div>
</div>
</div>
<div class="row testimonial" quay-require="['BILLING']">
<div class="tour-action" quay-require="['BILLING']">
<a href="/plans?trial-plan=personal">

View file

@ -85,35 +85,6 @@
});
};
$scope.downloadChanges = function() {
if ($scope.changesResource) { return; }
var params = {
'repository': namespace + '/' + name,
'image_id': imageid
};
$scope.changesResource = ApiService.getImageChangesAsResource(params).get(function(changes) {
var combinedChanges = [];
var addCombinedChanges = function(c, kind) {
for (var i = 0; i < c.length; ++i) {
combinedChanges.push({
'kind': kind,
'file': c[i]
});
}
};
addCombinedChanges(changes.added, 'added');
addCombinedChanges(changes.removed, 'removed');
addCombinedChanges(changes.changed, 'changed');
$scope.combinedChanges = combinedChanges;
$scope.imageChanges = changes;
$scope.initializeTree();
});
};
$scope.initializeTree = function() {
if ($scope.tree || !$scope.combinedChanges.length) { return; }

View file

@ -21,10 +21,6 @@
<span class="cor-tab" tab-active="true" tab-title="Layers" tab-target="#layers">
<i class="fa ci-layers"></i>
</span>
<span class="cor-tab" tab-title="Changes" tab-target="#changes"
tab-init="downloadChanges()">
<i class="fa fa-code-fork"></i>
</span>
<span class="cor-tab" tab-title="Security Scan" tab-target="#security"
tab-init="loadImageVulnerabilities()"
quay-show="Features.SECURITY_SCANNER">
@ -46,24 +42,6 @@
ng-repeat="parent in reversedHistory"></div>
</div>
<!-- Changes -->
<div id="changes" class="tab-pane">
<div class="resource-view" resource="changesResource" error-message="'Could not load image changes'">
<h3>Image File Changes</h3>
<div class="empty" ng-if="!combinedChanges.length">
<div class="empty-primary-msg">No file changes</div>
<div class="empty-secondary-msg">
There were no file system changes in this image layer.
</div>
</div>
<div ng-show="combinedChanges.length">
<div id="changes-tree-container" class="changes-container"
onresize="tree && tree.notifyResized()"></div>
</div>
</div>
</div>
<!-- Security -->
<div id="security" class="tab-pane" quay-require="['SECURITY_SCANNER']">
<div class="resource-view" resource="vulnerabilitiesResource" error-message="'Could not load security information for image'">

View file

@ -35,10 +35,6 @@ class StoragePaths(object):
base_path = self._image_path(storage_uuid)
return '{0}files.trie'.format(base_path)
def image_file_diffs_path(self, storage_uuid):
base_path = self._image_path(storage_uuid)
return '{0}diffs.json'.format(base_path)
class BaseStorage(StoragePaths):
def __init__(self):

View file

@ -13,7 +13,7 @@ from endpoints.api import api_bp, api
from endpoints.api.team import TeamMember, TeamMemberList, OrganizationTeam, TeamMemberInvite
from endpoints.api.tag import RepositoryTagImages, RepositoryTag, ListRepositoryTags, RevertTag
from endpoints.api.search import EntitySearch
from endpoints.api.image import RepositoryImageChanges, RepositoryImage, RepositoryImageList
from endpoints.api.image import RepositoryImage, RepositoryImageList
from endpoints.api.build import (FileDropResource, RepositoryBuildStatus, RepositoryBuildLogs,
RepositoryBuildList, RepositoryBuildResource)
from endpoints.api.robot import (UserRobotList, OrgRobot, OrgRobotList, UserRobot,
@ -1592,60 +1592,6 @@ class TestBuildTriggerAnalyze0byeBuynlargeOrgrepo(ApiTestCase):
self._run_test('POST', 404, 'devtable', {'config': {}})
class TestRepositoryImageChangesPtsgPublicPublicrepo(ApiTestCase):
def setUp(self):
ApiTestCase.setUp(self)
self._set_url(RepositoryImageChanges, image_id="PTSG", repository="public/publicrepo")
def test_get_anonymous(self):
self._run_test('GET', 404, None, None)
def test_get_freshuser(self):
self._run_test('GET', 404, 'freshuser', None)
def test_get_reader(self):
self._run_test('GET', 404, 'reader', None)
def test_get_devtable(self):
self._run_test('GET', 404, 'devtable', None)
class TestRepositoryImageChangesPtsgDevtableShared(ApiTestCase):
def setUp(self):
ApiTestCase.setUp(self)
self._set_url(RepositoryImageChanges, image_id="PTSG", repository="devtable/shared")
def test_get_anonymous(self):
self._run_test('GET', 401, None, None)
def test_get_freshuser(self):
self._run_test('GET', 403, 'freshuser', None)
def test_get_reader(self):
self._run_test('GET', 404, 'reader', None)
def test_get_devtable(self):
self._run_test('GET', 404, 'devtable', None)
class TestRepositoryImageChangesPtsgBuynlargeOrgrepo(ApiTestCase):
def setUp(self):
ApiTestCase.setUp(self)
self._set_url(RepositoryImageChanges, image_id="PTSG", repository="buynlarge/orgrepo")
def test_get_anonymous(self):
self._run_test('GET', 401, None, None)
def test_get_freshuser(self):
self._run_test('GET', 403, 'freshuser', None)
def test_get_reader(self):
self._run_test('GET', 404, 'reader', None)
def test_get_devtable(self):
self._run_test('GET', 404, 'devtable', None)
class TestRepositoryBuildStatusFg86PublicPublicrepo(ApiTestCase):
def setUp(self):
ApiTestCase.setUp(self)

View file

@ -1,73 +0,0 @@
import unittest
import tarfile
from StringIO import StringIO
from util.registry.streamlayerformat import AUFS_WHITEOUT
from util.registry.changes import files_and_dirs_from_tar
class TestChanges(unittest.TestCase):
def create_layer(self, **kwargs):
output = StringIO()
with tarfile.open(fileobj=output, mode='w:gz') as tar:
for current_contents in kwargs:
current_filename = kwargs[current_contents]
if current_contents == '_':
# This is a deleted file.
if current_filename.endswith('/'):
current_filename = current_filename[:-1]
parts = current_filename.split('/')
if len(parts) > 1:
current_filename = '/'.join(parts[:-1]) + '/' + AUFS_WHITEOUT + parts[-1]
else:
current_filename = AUFS_WHITEOUT + parts[-1]
current_contents = ''
info = tarfile.TarInfo(name=current_filename)
info.size = len(current_contents)
tar.addfile(info, fileobj=StringIO(current_contents))
return output.getvalue()
def test_single_layer(self):
tar_layer = self.create_layer(
foo = 'some_file',
bar = 'another_file',
meh = 'third_file')
deleted_prefixes = set()
result = list(files_and_dirs_from_tar(StringIO(tar_layer), deleted_prefixes))
self.assertIn('/some_file', result)
self.assertIn('/another_file', result)
self.assertIn('/third_file', result)
self.assertFalse(deleted_prefixes)
def test_delete_file(self):
tar_layer = self.create_layer(
foo = 'some_file',
_ = 'another_file',
meh = 'third_file')
deleted_prefixes = set()
result = list(files_and_dirs_from_tar(StringIO(tar_layer), deleted_prefixes))
self.assertIn('/some_file', result)
self.assertIn('/third_file', result)
self.assertIn('another_file', deleted_prefixes)
def test_pax_tarfile_error(self):
# If this fails it will raise an exception
with open('test/data/sample/layers/paxheader.tar') as tarstream:
result = list(files_and_dirs_from_tar(tarstream, set()))
self.assertIn('/file', result)
if __name__ == '__main__':
unittest.main()

View file

@ -1,77 +0,0 @@
import marisa_trie
import os
import util.vendor.paxtarfile as tarfile
from util.registry.aufs import is_aufs_metadata, get_deleted_prefix
ALLOWED_TYPES = {tarfile.REGTYPE, tarfile.AREGTYPE}
def files_and_dirs_from_tar(source_stream, removed_prefix_collector):
try:
tar_stream = tarfile.open(mode='r|*', fileobj=source_stream)
except tarfile.ReadError:
# Empty tar file
return
for tar_info in tar_stream:
absolute = os.path.relpath(tar_info.name.decode('utf-8'), './')
# Skip metadata.
if is_aufs_metadata(absolute):
continue
# Add prefixes of removed paths to the collector.
deleted_prefix = get_deleted_prefix(absolute)
if deleted_prefix is not None:
removed_prefix_collector.add(deleted_prefix)
continue
# Otherwise, yield the path if it is in the allowed types.
if tar_info.type in ALLOWED_TYPES:
yield '/' + absolute
def __compute_removed(base_trie, removed_prefixes):
for prefix in removed_prefixes:
for filename in base_trie.keys(prefix):
yield filename
def __compute_added_changed(base_trie, delta_trie):
added = set()
changed = set()
for filename in delta_trie.keys():
if filename not in base_trie:
added.add(filename)
else:
changed.add(filename)
return added, changed
def __new_fs(base_trie, added, removed):
for filename in base_trie.keys():
if filename not in removed:
yield filename
for filename in added:
yield filename
def empty_fs():
return marisa_trie.Trie()
def compute_new_diffs_and_fs(base_trie, filename_source,
removed_prefix_collector):
new_trie = marisa_trie.Trie(filename_source)
(new_added, new_changed) = __compute_added_changed(base_trie, new_trie)
new_removed = marisa_trie.Trie(__compute_removed(base_trie,
removed_prefix_collector))
new_fs = marisa_trie.Trie(__new_fs(base_trie, new_added, new_removed))
return (new_fs, new_added, new_changed, new_removed.keys())

View file

@ -1,37 +0,0 @@
import logging
from app import image_diff_queue
from data import model
from endpoints.v1.registry import process_image_changes
from workers.queueworker import QueueWorker
logger = logging.getLogger(__name__)
class DiffsWorker(QueueWorker):
def process_queue_item(self, job_details):
image_id = job_details['image_id']
repository = job_details['repository']
namespace = model.user.get_namespace_by_user_id(job_details['namespace_user_id'])
try:
process_image_changes(namespace, repository, image_id)
except model.DataModelException:
# This exception is unrecoverable, and the item should continue and be
# marked as complete.
msg = ('Image does not exist in database \'%s\' for repo \'%s/\'%s\'' %
(image_id, namespace, repository))
logger.warning(msg)
except IOError:
# This exception is unrecoverable, and the item should continue and be
# marked as complete.
msg = ("Data could not be retrieved for image %s under repo %s/%s" %
(image_id, namespace, repository))
logger.exception(msg)
return True
if __name__ == "__main__":
worker = DiffsWorker(image_diff_queue)
worker.start()