100 lines
2.8 KiB
Python
100 lines
2.8 KiB
Python
|
import marisa_trie
|
||
|
import os
|
||
|
import tarfile
|
||
|
import StringIO
|
||
|
import traceback
|
||
|
|
||
|
AUFS_METADATA = u'.wh..wh.'
|
||
|
|
||
|
AUFS_WHITEOUT = u'.wh.'
|
||
|
AUFS_WHITEOUT_PREFIX_LENGTH = len(AUFS_WHITEOUT)
|
||
|
|
||
|
class StreamLayerMerger(object):
|
||
|
""" Class which creates a generator of the combined TAR data for a set of Docker layers. """
|
||
|
def __init__(self, layer_iterator):
|
||
|
self.trie = marisa_trie.Trie()
|
||
|
self.layer_iterator = layer_iterator
|
||
|
self.encountered = []
|
||
|
|
||
|
def get_generator(self):
|
||
|
for current_layer in self.layer_iterator():
|
||
|
# Read the current layer as TAR. If it is empty, we just continue
|
||
|
# to the next layer.
|
||
|
try:
|
||
|
tar_file = tarfile.open(mode='r|*', fileobj=current_layer)
|
||
|
except tarfile.ReadError as re:
|
||
|
continue
|
||
|
|
||
|
# For each of the tar entries, yield them IF and ONLY IF we have not
|
||
|
# encountered the path before.
|
||
|
|
||
|
# 9MB (+ padding below) so that it matches the 10MB expected by Gzip.
|
||
|
chunk_size = 1024 * 1024 * 9
|
||
|
|
||
|
for tar_info in tar_file:
|
||
|
result = self.process_tar_info(tar_info)
|
||
|
if not result:
|
||
|
continue
|
||
|
|
||
|
(tarinfo, filebuf) = result
|
||
|
|
||
|
yield tarinfo.tobuf()
|
||
|
|
||
|
if filebuf:
|
||
|
length = 0
|
||
|
file_stream = tar_file.extractfile(tarinfo)
|
||
|
while True:
|
||
|
current_block = file_stream.read(chunk_size)
|
||
|
if not len(current_block):
|
||
|
break
|
||
|
|
||
|
yield current_block
|
||
|
length += len(current_block)
|
||
|
|
||
|
file_stream.close()
|
||
|
|
||
|
# Files must be padding to 512 byte multiples.
|
||
|
if length % 512 != 0:
|
||
|
yield '\0' * (512 - (length % 512))
|
||
|
|
||
|
# Close the layer stream now that we're done with it.
|
||
|
tar_file.close()
|
||
|
|
||
|
# Update the trie with the new encountered entries.
|
||
|
self.trie = marisa_trie.Trie(self.encountered)
|
||
|
|
||
|
# Last two records are empty in TAR spec.
|
||
|
yield '\0' * 512
|
||
|
yield '\0' * 512
|
||
|
|
||
|
|
||
|
def process_tar_info(self, tar_info):
|
||
|
absolute = os.path.relpath(tar_info.name.decode('utf-8'), './')
|
||
|
dirname = os.path.dirname(absolute)
|
||
|
filename = os.path.basename(absolute)
|
||
|
|
||
|
# Skip directories and metadata
|
||
|
if (filename.startswith(AUFS_METADATA) or
|
||
|
absolute.startswith(AUFS_METADATA)):
|
||
|
# Skip
|
||
|
return None
|
||
|
|
||
|
elif filename.startswith(AUFS_WHITEOUT):
|
||
|
removed_filename = filename[AUFS_WHITEOUT_PREFIX_LENGTH:]
|
||
|
removed_prefix = os.path.join('/', dirname, removed_filename)
|
||
|
self.encountered.append(removed_prefix)
|
||
|
return None
|
||
|
|
||
|
# Check if this file has already been encountered somewhere. If so,
|
||
|
# skip it.
|
||
|
if unicode(absolute) in self.trie:
|
||
|
return None
|
||
|
|
||
|
self.encountered.append(absolute)
|
||
|
|
||
|
if tar_info.isdir() or tar_info.issym() or tar_info.islnk():
|
||
|
return (tar_info, False)
|
||
|
|
||
|
elif tar_info.isfile():
|
||
|
return (tar_info, True)
|