import marisa_trie import os import tarfile from aufs import is_aufs_metadata, get_deleted_prefix from util.tarlayerformat import TarLayerFormat AUFS_METADATA = u'.wh..wh.' AUFS_WHITEOUT = u'.wh.' AUFS_WHITEOUT_PREFIX_LENGTH = len(AUFS_WHITEOUT) class StreamLayerMerger(TarLayerFormat): """ Class which creates a generator of the combined TAR data for a set of Docker layers. """ def __init__(self, layer_iterator, path_prefix=None): super(StreamLayerMerger, self).__init__(layer_iterator, path_prefix) self.path_trie = marisa_trie.Trie() self.path_encountered = [] self.prefix_trie = marisa_trie.Trie() self.prefix_encountered = [] def after_tar_layer(self, current_layer): # Update the tries. self.path_trie = marisa_trie.Trie(self.path_encountered) self.prefix_trie = marisa_trie.Trie(self.prefix_encountered) def check_tar_info(self, tar_info): absolute = os.path.relpath(tar_info.name.decode('utf-8'), './') # Skip metadata. if is_aufs_metadata(absolute): return False # Add any prefix of deleted paths to the prefix list. deleted_prefix = get_deleted_prefix(absolute) if deleted_prefix is not None: self.prefix_encountered.append(deleted_prefix) return False # Check if this file has already been encountered somewhere. If so, # skip it. ubsolute = unicode(absolute) if ubsolute in self.path_trie: return False # Check if this file is under a deleted path. for prefix in self.prefix_trie.iter_prefixes(ubsolute): if not os.path.relpath(ubsolute, prefix).startswith('..'): return False # Otherwise, add the path to the encountered list and return it. self.path_encountered.append(absolute) return True