- Merge branch 'master' into sha-lom
- Extract out the tar handling from streamlayerformat into tarlayerformat - Add a new tarfileappender class to make it easy to append data to gzipped tars - Fix the gzipwrap to properly close - Have the .git injection use the new appender
This commit is contained in:
		
						commit
						d43109d7cb
					
				
					 48 changed files with 1232 additions and 532 deletions
				
			
		|  | @ -2,72 +2,29 @@ import marisa_trie | |||
| import os | ||||
| import tarfile | ||||
| from aufs import is_aufs_metadata, get_deleted_prefix | ||||
| 
 | ||||
| from util.tarlayerformat import TarLayerFormat | ||||
| 
 | ||||
| AUFS_METADATA = u'.wh..wh.' | ||||
| 
 | ||||
| AUFS_WHITEOUT = u'.wh.' | ||||
| AUFS_WHITEOUT_PREFIX_LENGTH = len(AUFS_WHITEOUT) | ||||
| 
 | ||||
| class StreamLayerMerger(object): | ||||
| class StreamLayerMerger(TarLayerFormat): | ||||
|   """ Class which creates a generator of the combined TAR data for a set of Docker layers. """ | ||||
|   def __init__(self, layer_iterator): | ||||
|     self.trie = marisa_trie.Trie() | ||||
|     self.layer_iterator = layer_iterator | ||||
|     self.encountered = [] | ||||
|     super(StreamLayerMerger, self).__init__(layer_iterator) | ||||
| 
 | ||||
|   def get_generator(self): | ||||
|     for current_layer in self.layer_iterator(): | ||||
|       # Read the current layer as TAR. If it is empty, we just continue | ||||
|       # to the next layer. | ||||
|       try: | ||||
|         tar_file = tarfile.open(mode='r|*', fileobj=current_layer) | ||||
|       except tarfile.ReadError as re: | ||||
|         continue | ||||
|     self.path_trie = marisa_trie.Trie() | ||||
|     self.path_encountered = [] | ||||
| 
 | ||||
|       # For each of the tar entries, yield them IF and ONLY IF we have not | ||||
|       # encountered the path before. | ||||
|     self.prefix_trie = marisa_trie.Trie() | ||||
|     self.prefix_encountered = [] | ||||
| 
 | ||||
|       # 9MB (+ padding below) so that it matches the 10MB expected by Gzip. | ||||
|       chunk_size = 1024 * 1024 * 9 | ||||
| 
 | ||||
|       for tar_info in tar_file: | ||||
|         if not self.check_tar_info(tar_info): | ||||
|           continue | ||||
| 
 | ||||
|         # Yield the tar header. | ||||
|         yield tar_info.tobuf() | ||||
| 
 | ||||
|         # Try to extract any file contents for the tar. If found, we yield them as well. | ||||
|         if tar_info.isreg(): | ||||
|           file_stream = tar_file.extractfile(tar_info) | ||||
|           if file_stream is not None: | ||||
|             length = 0 | ||||
|             while True: | ||||
|               current_block = file_stream.read(chunk_size) | ||||
|               if not len(current_block): | ||||
|                 break | ||||
| 
 | ||||
|               yield current_block | ||||
|               length += len(current_block) | ||||
| 
 | ||||
|             file_stream.close() | ||||
| 
 | ||||
|             # Files must be padding to 512 byte multiples. | ||||
|             if length % 512 != 0: | ||||
|               yield '\0' * (512 - (length % 512)) | ||||
| 
 | ||||
|       # Close the layer stream now that we're done with it. | ||||
|       tar_file.close() | ||||
| 
 | ||||
|       # Update the trie with the new encountered entries. | ||||
|       self.trie = marisa_trie.Trie(self.encountered) | ||||
|   def after_tar_layer(stream, current_layer): | ||||
|       # Update the tries. | ||||
|       self.path_trie = marisa_trie.Trie(self.path_encountered) | ||||
|       self.prefix_trie = marisa_trie.Trie(self.prefix_encountered) | ||||
|        | ||||
|     # Last two records are empty in TAR spec. | ||||
|     yield '\0' * 512 | ||||
|     yield '\0' * 512 | ||||
| 
 | ||||
| 
 | ||||
|   def check_tar_info(self, tar_info): | ||||
|     absolute = os.path.relpath(tar_info.name.decode('utf-8'), './') | ||||
| 
 | ||||
|  | @ -76,16 +33,22 @@ class StreamLayerMerger(object): | |||
|       return False | ||||
| 
 | ||||
|     # Add any prefix of deleted paths to the prefix list. | ||||
|     deleted_prefix = get_deleted_prefix(absolute) | ||||
|     deleted_prefix = get_deleted_prefix(absolute)     | ||||
|     if deleted_prefix is not None: | ||||
|       self.encountered.append(deleted_prefix) | ||||
|       self.prefix_encountered.append(deleted_prefix) | ||||
|       return False | ||||
| 
 | ||||
|     # Check if this file has already been encountered somewhere. If so, | ||||
|     # skip it. | ||||
|     if unicode(absolute) in self.trie: | ||||
|     ubsolute = unicode(absolute) | ||||
|     if ubsolute in self.path_trie: | ||||
|       return False | ||||
| 
 | ||||
|     # Check if this file is under a deleted path. | ||||
|     for prefix in self.prefix_trie.iter_prefixes(ubsolute): | ||||
|       if not os.path.relpath(ubsolute, prefix).startswith('..'): | ||||
|         return False | ||||
| 
 | ||||
|     # Otherwise, add the path to the encountered list and return it. | ||||
|     self.encountered.append(absolute) | ||||
|     self.path_encountered.append(absolute) | ||||
|     return True | ||||
|  |  | |||
		Reference in a new issue