- Handle missing images properly

- Add support for deleting directories
- Add a slew of tests for deletion of directories and other kinds of deletion and layering
This commit is contained in:
Joseph Schorr 2014-10-14 21:40:02 -04:00
parent eef7edab49
commit da28bc4ce9
4 changed files with 228 additions and 56 deletions

View file

@ -69,8 +69,9 @@ def get_squashed_tag(namespace, repository, tag):
permission = ReadRepositoryPermission(namespace, repository)
if permission.can() or model.repository_is_public(namespace, repository):
# Lookup the requested tag.
try:
tag_image = model.get_tag_image(namespace, repository, tag)
if not tag_image:
except model.DataModelException:
abort(404)
# Lookup the tag's image and storage.

View file

@ -8,13 +8,20 @@ class TestStreamLayerMerger(unittest.TestCase):
def create_layer(self, **kwargs):
output = StringIO()
with tarfile.open(fileobj=output, mode='w:gz') as tar:
for filename in kwargs:
current_filename = filename
current_contents = kwargs[filename]
for current_contents in kwargs:
current_filename = kwargs[current_contents]
if current_contents is None:
if current_contents == '_':
# This is a deleted file.
current_filename = AUFS_WHITEOUT + current_filename
if current_filename.endswith('/'):
current_filename = current_filename[:-1]
parts = current_filename.split('/')
if len(parts) > 1:
current_filename = '/'.join(parts[:-1]) + '/' + AUFS_WHITEOUT + parts[-1]
else:
current_filename = AUFS_WHITEOUT + parts[-1]
current_contents = ''
info = tarfile.TarInfo(name=current_filename)
@ -47,9 +54,9 @@ class TestStreamLayerMerger(unittest.TestCase):
def test_single_layer(self):
tar_layer = self.create_layer(
some_file = 'foo',
another_file = 'bar',
third_file = 'meh')
foo = 'some_file',
bar = 'another_file',
meh = 'third_file')
squashed = self.squash_layers([tar_layer])
@ -59,12 +66,12 @@ class TestStreamLayerMerger(unittest.TestCase):
def test_multiple_layers(self):
second_layer = self.create_layer(
some_file = 'foo',
another_file = 'bar',
third_file = 'meh')
foo = 'some_file',
bar = 'another_file',
meh = 'third_file')
first_layer = self.create_layer(
top_file = 'top')
top = 'top_file')
squashed = self.squash_layers([first_layer, second_layer])
@ -73,14 +80,30 @@ class TestStreamLayerMerger(unittest.TestCase):
self.assertHasFile(squashed, 'third_file', 'meh')
self.assertHasFile(squashed, 'top_file', 'top')
def test_multiple_layers_overwrite(self):
def test_multiple_layers_dot(self):
second_layer = self.create_layer(
some_file = 'foo',
another_file = 'bar',
third_file = 'meh')
foo = './some_file',
bar = 'another_file',
meh = './third_file')
first_layer = self.create_layer(
another_file = 'top')
top = 'top_file')
squashed = self.squash_layers([first_layer, second_layer])
self.assertHasFile(squashed, './some_file', 'foo')
self.assertHasFile(squashed, 'another_file', 'bar')
self.assertHasFile(squashed, './third_file', 'meh')
self.assertHasFile(squashed, 'top_file', 'top')
def test_multiple_layers_overwrite(self):
second_layer = self.create_layer(
foo = 'some_file',
bar = 'another_file',
meh = 'third_file')
first_layer = self.create_layer(
top = 'another_file')
squashed = self.squash_layers([first_layer, second_layer])
@ -88,14 +111,46 @@ class TestStreamLayerMerger(unittest.TestCase):
self.assertHasFile(squashed, 'third_file', 'meh')
self.assertHasFile(squashed, 'another_file', 'top')
def test_deleted_file(self):
def test_multiple_layers_overwrite_base_dot(self):
second_layer = self.create_layer(
some_file = 'foo',
another_file = 'bar',
third_file = 'meh')
foo = 'some_file',
bar = './another_file',
meh = 'third_file')
first_layer = self.create_layer(
another_file = None)
top = 'another_file')
squashed = self.squash_layers([first_layer, second_layer])
self.assertHasFile(squashed, 'some_file', 'foo')
self.assertHasFile(squashed, 'third_file', 'meh')
self.assertHasFile(squashed, 'another_file', 'top')
self.assertDoesNotHaveFile(squashed, './another_file')
def test_multiple_layers_overwrite_top_dot(self):
second_layer = self.create_layer(
foo = 'some_file',
bar = 'another_file',
meh = 'third_file')
first_layer = self.create_layer(
top = './another_file')
squashed = self.squash_layers([first_layer, second_layer])
self.assertHasFile(squashed, 'some_file', 'foo')
self.assertHasFile(squashed, 'third_file', 'meh')
self.assertHasFile(squashed, './another_file', 'top')
self.assertDoesNotHaveFile(squashed, 'another_file')
def test_deleted_file(self):
second_layer = self.create_layer(
foo = 'some_file',
bar = 'another_file',
meh = 'third_file')
first_layer = self.create_layer(
_ = 'another_file')
squashed = self.squash_layers([first_layer, second_layer])
@ -105,15 +160,15 @@ class TestStreamLayerMerger(unittest.TestCase):
def test_deleted_readded_file(self):
third_layer = self.create_layer(
another_file = 'bar')
bar = 'another_file')
second_layer = self.create_layer(
some_file = 'foo',
another_file = None,
third_file = 'meh')
foo = 'some_file',
_ = 'another_file',
meh = 'third_file')
first_layer = self.create_layer(
another_file = 'newagain')
newagain = 'another_file')
squashed = self.squash_layers([first_layer, second_layer, third_layer])
@ -123,15 +178,15 @@ class TestStreamLayerMerger(unittest.TestCase):
def test_deleted_in_lower_layer(self):
third_layer = self.create_layer(
deleted_file = 'bar')
bar = 'deleted_file')
second_layer = self.create_layer(
some_file = 'foo',
deleted_file = None,
third_file = 'meh')
foo = 'some_file',
_ = 'deleted_file',
meh = 'third_file')
first_layer = self.create_layer(
top_file = 'top')
top = 'top_file')
squashed = self.squash_layers([first_layer, second_layer, third_layer])
@ -140,7 +195,120 @@ class TestStreamLayerMerger(unittest.TestCase):
self.assertHasFile(squashed, 'top_file', 'top')
self.assertDoesNotHaveFile(squashed, 'deleted_file')
def test_deleted_in_lower_layer_with_added_dot(self):
third_layer = self.create_layer(
something = './deleted_file')
second_layer = self.create_layer(
_ = 'deleted_file')
squashed = self.squash_layers([second_layer, third_layer])
self.assertDoesNotHaveFile(squashed, 'deleted_file')
def test_deleted_in_lower_layer_with_deleted_dot(self):
third_layer = self.create_layer(
something = './deleted_file')
second_layer = self.create_layer(
_ = './deleted_file')
squashed = self.squash_layers([second_layer, third_layer])
self.assertDoesNotHaveFile(squashed, 'deleted_file')
def test_directory(self):
second_layer = self.create_layer(
foo = 'foo/some_file',
bar = 'foo/another_file')
first_layer = self.create_layer(
top = 'foo/some_file')
squashed = self.squash_layers([first_layer, second_layer])
self.assertHasFile(squashed, 'foo/some_file', 'top')
self.assertHasFile(squashed, 'foo/another_file', 'bar')
def test_sub_directory(self):
second_layer = self.create_layer(
foo = 'foo/some_file',
bar = 'foo/bar/another_file')
first_layer = self.create_layer(
top = 'foo/some_file')
squashed = self.squash_layers([first_layer, second_layer])
self.assertHasFile(squashed, 'foo/some_file', 'top')
self.assertHasFile(squashed, 'foo/bar/another_file', 'bar')
def test_delete_directory(self):
second_layer = self.create_layer(
foo = 'foo/some_file',
bar = 'foo/another_file')
first_layer = self.create_layer(
_ = 'foo/')
squashed = self.squash_layers([first_layer, second_layer])
self.assertDoesNotHaveFile(squashed, 'foo/some_file')
self.assertDoesNotHaveFile(squashed, 'foo/another_file')
def test_delete_sub_directory(self):
second_layer = self.create_layer(
foo = 'foo/some_file',
bar = 'foo/bar/another_file')
first_layer = self.create_layer(
_ = 'foo/bar/')
squashed = self.squash_layers([first_layer, second_layer])
self.assertDoesNotHaveFile(squashed, 'foo/bar/another_file')
self.assertHasFile(squashed, 'foo/some_file', 'foo')
def test_delete_sub_directory_with_dot(self):
second_layer = self.create_layer(
foo = 'foo/some_file',
bar = 'foo/bar/another_file')
first_layer = self.create_layer(
_ = './foo/bar/')
squashed = self.squash_layers([first_layer, second_layer])
self.assertDoesNotHaveFile(squashed, 'foo/bar/another_file')
self.assertHasFile(squashed, 'foo/some_file', 'foo')
def test_delete_sub_directory_with_subdot(self):
second_layer = self.create_layer(
foo = './foo/some_file',
bar = './foo/bar/another_file')
first_layer = self.create_layer(
_ = 'foo/bar/')
squashed = self.squash_layers([first_layer, second_layer])
self.assertDoesNotHaveFile(squashed, 'foo/bar/another_file')
self.assertDoesNotHaveFile(squashed, './foo/bar/another_file')
self.assertHasFile(squashed, './foo/some_file', 'foo')
def test_delete_directory_recreate(self):
third_layer = self.create_layer(
foo = 'foo/some_file',
bar = 'foo/another_file')
second_layer = self.create_layer(
_ = 'foo/')
first_layer = self.create_layer(
baz = 'foo/some_file')
squashed = self.squash_layers([first_layer, second_layer, third_layer])
self.assertHasFile(squashed, 'foo/some_file', 'baz')
self.assertDoesNotHaveFile(squashed, 'foo/another_file')
if __name__ == '__main__':
unittest.main()

View file

@ -4,31 +4,28 @@ AUFS_METADATA = u'.wh..wh.'
AUFS_WHITEOUT = u'.wh.'
AUFS_WHITEOUT_PREFIX_LENGTH = len(AUFS_WHITEOUT)
def is_aufs_metadata(filepath):
""" Returns whether the given filepath references an AUFS metadata file. """
filename = os.path.basename(filepath)
return filename.startswith(AUFS_METADATA) or filepath.startswith(AUFS_METADATA)
def is_aufs_metadata(absolute):
""" Returns whether the given absolute references an AUFS metadata file. """
filename = os.path.basename(absolute)
return filename.startswith(AUFS_METADATA) or absolute.startswith(AUFS_METADATA)
def get_deleted_filename(filepath):
def get_deleted_filename(absolute):
""" Returns the name of the deleted file referenced by the AUFS whiteout file at
the given path or None if the file path does not reference a whiteout file.
"""
filename = os.path.basename(filepath)
filename = os.path.basename(absolute)
if not filename.startswith(AUFS_WHITEOUT):
return None
return filename[AUFS_WHITEOUT_PREFIX_LENGTH:]
def get_deleted_prefix(filepath):
def get_deleted_prefix(absolute):
""" Returns the path prefix of the deleted file referenced by the AUFS whiteout file at
the given path or None if the file path does not reference a whiteout file.
"""
deleted_filename = get_deleted_filename(filepath)
deleted_filename = get_deleted_filename(absolute)
if deleted_filename is None:
return None
dirname = os.path.dirname(filepath)
if not dirname:
return deleted_filename
return os.path.join('/', dirname, deleted_filename)
dirname = os.path.dirname(absolute)
return os.path.join('/', dirname, deleted_filename)[1:]

View file

@ -12,9 +12,13 @@ AUFS_WHITEOUT_PREFIX_LENGTH = len(AUFS_WHITEOUT)
class StreamLayerMerger(object):
""" Class which creates a generator of the combined TAR data for a set of Docker layers. """
def __init__(self, layer_iterator):
self.trie = marisa_trie.Trie()
self.path_trie = marisa_trie.Trie()
self.path_encountered = []
self.prefix_trie = marisa_trie.Trie()
self.prefix_encountered = []
self.layer_iterator = layer_iterator
self.encountered = []
def get_generator(self):
for current_layer in self.layer_iterator():
@ -60,8 +64,9 @@ class StreamLayerMerger(object):
# Close the layer stream now that we're done with it.
tar_file.close()
# Update the trie with the new encountered entries.
self.trie = marisa_trie.Trie(self.encountered)
# Update the tries.
self.path_trie = marisa_trie.Trie(self.path_encountered)
self.prefix_trie = marisa_trie.Trie(self.prefix_encountered)
# Last two records are empty in TAR spec.
yield '\0' * 512
@ -78,14 +83,15 @@ class StreamLayerMerger(object):
# Add any prefix of deleted paths to the prefix list.
deleted_prefix = get_deleted_prefix(absolute)
if deleted_prefix is not None:
self.encountered.append(deleted_prefix)
self.prefix_encountered.append(deleted_prefix)
return False
# Check if this file has already been encountered somewhere. If so,
# skip it.
if unicode(absolute) in self.trie:
ubsolute = unicode(absolute)
if ubsolute in self.path_trie or any(self.prefix_trie.iter_prefixes(ubsolute)):
return False
# Otherwise, add the path to the encountered list and return it.
self.encountered.append(absolute)
self.path_encountered.append(absolute)
return True