Retarget hard links pointing to deleted files by emitting the deleted file contents under the first hard link instance. This fixes a breakage in the squashed TAR where we were pointing hard links to missing data.

Fixes https://jira.coreos.com/browse/QUAY-885
This commit is contained in:
Joseph Schorr 2018-03-21 16:05:27 -04:00
parent 041a7fcd36
commit 110366f656
8 changed files with 337 additions and 252 deletions

View file

@ -1,53 +1,70 @@
import marisa_trie
import os
import tarfile
import marisa_trie
from util.registry.aufs import is_aufs_metadata, get_deleted_prefix
from util.registry.tarlayerformat import TarLayerFormat
AUFS_METADATA = u'.wh..wh.'
AUFS_WHITEOUT = u'.wh.'
AUFS_WHITEOUT_PREFIX_LENGTH = len(AUFS_WHITEOUT)
class StreamLayerMerger(TarLayerFormat):
""" Class which creates a generator of the combined TAR data for a set of Docker layers. """
def __init__(self, layer_iterator, path_prefix=None):
super(StreamLayerMerger, self).__init__(layer_iterator, path_prefix)
def __init__(self, get_tar_stream_iterator, path_prefix=None):
super(StreamLayerMerger, self).__init__(get_tar_stream_iterator, path_prefix)
self.path_trie = marisa_trie.Trie()
self.path_encountered = []
self.path_encountered = set()
self.prefix_trie = marisa_trie.Trie()
self.prefix_encountered = []
self.deleted_prefix_trie = marisa_trie.Trie()
self.deleted_prefixes_encountered = set()
def after_tar_layer(self, current_layer):
# Update the tries.
self.path_trie = marisa_trie.Trie(self.path_encountered)
self.prefix_trie = marisa_trie.Trie(self.prefix_encountered)
def after_tar_layer(self):
# Update the tries.
self.path_trie = marisa_trie.Trie(self.path_encountered)
self.deleted_prefix_trie = marisa_trie.Trie(self.deleted_prefixes_encountered)
def check_tar_info(self, tar_info):
absolute = os.path.relpath(tar_info.name.decode('utf-8'), './')
@staticmethod
def _normalize_path(path):
return os.path.relpath(path.decode('utf-8'), './')
def _check_deleted(self, absolute):
ubsolute = unicode(absolute)
for prefix in self.deleted_prefix_trie.iter_prefixes(ubsolute):
if not os.path.relpath(ubsolute, prefix).startswith('..'):
return True
return False
def is_skipped_file(self, filename):
absolute = StreamLayerMerger._normalize_path(filename)
# Skip metadata.
if is_aufs_metadata(absolute):
return False
return True
# Add any prefix of deleted paths to the prefix list.
deleted_prefix = get_deleted_prefix(absolute)
if deleted_prefix is not None:
self.prefix_encountered.append(deleted_prefix)
return False
# Check if the file is under a deleted path.
if self._check_deleted(absolute):
return True
# Check if this file has already been encountered somewhere. If so,
# skip it.
ubsolute = unicode(absolute)
if ubsolute in self.path_trie:
return True
return False
def should_append_file(self, filename):
if self.is_skipped_file(filename):
return False
# Check if this file is under a deleted path.
for prefix in self.prefix_trie.iter_prefixes(ubsolute):
if not os.path.relpath(ubsolute, prefix).startswith('..'):
return False
absolute = StreamLayerMerger._normalize_path(filename)
# Add any prefix of deleted paths to the prefix list.
deleted_prefix = get_deleted_prefix(absolute)
if deleted_prefix is not None:
self.deleted_prefixes_encountered.add(deleted_prefix)
return False
# Otherwise, add the path to the encountered list and return it.
self.path_encountered.append(absolute)
self.path_encountered.add(absolute)
return True