Complete the diff generating functionality.

This commit is contained in:
yackob03 2013-10-18 14:31:14 -04:00
parent decb324411
commit a1164269be
6 changed files with 113 additions and 32 deletions

View file

@ -1,5 +1,6 @@
import logging
import stripe
import re
from flask import request, make_response, jsonify, abort
from flask.ext.login import login_required, current_user, logout_user
@ -7,6 +8,8 @@ from flask.ext.principal import identity_changed, AnonymousIdentity
from functools import wraps
from collections import defaultdict
import storage
from data import model
from app import app
from util.email import send_confirmation_email, send_recovery_email
@ -17,8 +20,9 @@ from auth.permissions import (ReadRepositoryPermission,
AdministerRepositoryPermission)
from endpoints import registry
from endpoints.web import common_login
import re
store = storage.load()
logger = logging.getLogger(__name__)
@ -366,11 +370,13 @@ def list_repository_images(namespace, repository):
def get_repository_changes(namespace, repository, image_id):
permission = ReadRepositoryPermission(namespace, repository)
if permission.can() or model.repository_is_public(namespace, repository):
return jsonify({
'added': [],
'changed': [],
'removed': []
})
diffs_path = store.image_file_diffs_path(namespace, repository, image_id)
try:
response_json = store.get_content(diffs_path)
return make_response(response_json)
except IOError:
abort(404)
abort(403)

View file

@ -1,4 +1,3 @@
import logging
import json
@ -11,7 +10,7 @@ import storage
from app import app
from auth.auth import process_auth, extract_namespace_repo_from_session
from util import checksums
from util import checksums, changes
from auth.permissions import (ReadRepositoryPermission,
ModifyRepositoryPermission)
from data import model
@ -315,6 +314,49 @@ def delete_repository_storage(namespace, repository):
def process_image_changes(namespace, repository, image_id):
return
logger.debug('Generating diffs for image: %s' % image_id)
image_diffs_path = store.image_file_diffs_path(namespace, repository,
image_id)
if store.exists(image_diffs_path):
logger.debug('Diffs already exist for image: %s' % image_id)
return
image = model.get_image_by_id(namespace, repository, image_id)
model.get_parent_images()
parents = model.get_parent_images(image)
# Compute the diffs and fs for the parent first if necessary
parent_trie_path = None
if parents:
parent_trie_path = process_image_changes(namespace, repository,
parents[-1].docker_image_id)
# Read in the collapsed layer state of the filesystem for the parent
parent_trie = changes.empty_fs()
if parent_trie_path:
with store.stream_read_file(parent_trie_path) as parent_trie_stream:
parent_trie.read(parent_trie_stream)
# Read in the file entries from the layer tar file
layer_path = store.image_layer_path(namespace, repository, image_id)
with store.stream_read_file(layer_path) as layer_tar_stream:
removed_files = set()
layer_files = changes.files_and_dirs_from_tar(layer_tar_stream,
removed_files)
new_metadata = changes.compute_new_diffs_and_fs(parent_trie, layer_files,
removed_files)
(new_trie, added, changed, removed) = new_metadata
# Write out the new trie
new_trie_path = store.image_file_trie_path(namespace, repository, image_id)
store.put_content(new_trie_path, new_trie.tobytes())
# Write out the diffs
diffs = {}
sections = ('added', 'changed', 'removed')
for section, source_trie in zip(sections, new_metadata[1:]):
diffs[section] = list(source_trie)
store.put_content(image_diffs_path, json.dumps(diffs, indent=2))
return new_trie_path

View file

@ -58,9 +58,9 @@ class Storage(object):
return '{0}/{1}/{2}/{3}/files.trie'.format(self.images, namespace,
repository, image_id)
def image_file_diffs_trie_path(self, namespace, repository, image_id):
return '{0}/{1}/{2}/{3}/diffs.pkl'.format(self.images, namespace,
repository, image_id)
def image_file_diffs_path(self, namespace, repository, image_id):
return '{0}/{1}/{2}/{3}/diffs.json'.format(self.images, namespace,
repository, image_id)
def get_content(self, path):
raise NotImplementedError
@ -71,6 +71,9 @@ class Storage(object):
def stream_read(self, path):
raise NotImplementedError
def stream_read_file(self, path):
raise NotImplementedError
def stream_write(self, path, fp):
raise NotImplementedError

View file

@ -38,6 +38,10 @@ class LocalStorage(Storage):
break
yield buf
def stream_read_file(self, path):
path = self._init_path(path)
return open(path, mode='rb')
def stream_write(self, path, fp):
# Size is mandatory
path = self._init_path(path, create=True)

View file

@ -60,6 +60,13 @@ class S3Storage(Storage):
break
yield buf
def stream_read_file(self, path):
path = self._init_path(path)
key = boto.s3.key.Key(self._s3_bucket, path)
if not key.exists():
raise IOError('No such key: \'{0}\''.format(path))
return key
def stream_write(self, path, fp):
# Minimum size of upload part size on S3 is 5MB
buffer_size = 5 * 1024 * 1024

View file

@ -1,5 +1,6 @@
import marisa_trie
import os
import tarfile
AUFS_METADATA = u'.wh..wh.'
@ -8,33 +9,34 @@ AUFS_WHITEOUT = u'.wh.'
AUFS_WHITEOUT_PREFIX_LENGTH = len(AUFS_WHITEOUT)
def files_and_dirs(root_path, removed_prefix_collector):
for root, dirs, files in os.walk(unicode(root_path)):
def files_and_dirs_from_tar(source_stream, removed_prefix_collector):
tar_stream = tarfile.open(mode='r|*', fileobj=source_stream)
for tar_info in tar_stream:
absolute = os.path.relpath(unicode(tar_info.name), './')
filename = os.path.basename(absolute)
rel_root = os.path.relpath(root, root_path)
yield rel_root
if (filename.startswith(AUFS_METADATA) or
absolute.startswith(AUFS_METADATA)):
# Skip
continue
for one_file in files:
if one_file.startswith(AUFS_METADATA):
# Skip
continue
elif filename.startswith(AUFS_WHITEOUT):
filename = filename[AUFS_WHITEOUT_PREFIX_LENGTH:]
removed_prefix_collector.add(absolute)
continue
elif one_file.startswith(AUFS_WHITEOUT):
filename = one_file[AUFS_WHITEOUT_PREFIX_LENGTH:]
removed_prefix_collector.add(os.path.join(rel_root, filename))
continue
else:
yield os.path.join(rel_root, one_file)
else:
yield absolute
def compute_removed(base_trie, removed_prefixes):
def __compute_removed(base_trie, removed_prefixes):
for prefix in removed_prefixes:
for filename in base_trie.keys(prefix):
yield filename
def compute_added_changed(base_trie, delta_trie):
def __compute_added_changed(base_trie, delta_trie):
added = set()
changed = set()
@ -47,10 +49,27 @@ def compute_added_changed(base_trie, delta_trie):
return added, changed
def new_fs(base_trie, added, removed):
def __new_fs(base_trie, added, removed):
for filename in base_trie.keys():
if filename not in removed:
yield filename
for filename in added:
yield filename
yield filename
def empty_fs():
return marisa_trie.Trie()
def compute_new_diffs_and_fs(base_trie, filename_source,
removed_prefix_collector):
new_trie = marisa_trie.Trie(filename_source)
(new_added, new_changed) = __compute_added_changed(base_trie, new_trie)
new_removed = marisa_trie.Trie(__compute_removed(base_trie,
removed_prefix_collector))
new_fs = marisa_trie.Trie(__new_fs(base_trie, new_added, new_removed))
return (new_fs, new_added, new_changed, new_removed.keys())