84 lines
2.1 KiB
Python
84 lines
2.1 KiB
Python
import marisa_trie
|
|
import os
|
|
import tarfile
|
|
|
|
|
|
AUFS_METADATA = u'.wh..wh.'
|
|
|
|
AUFS_WHITEOUT = u'.wh.'
|
|
AUFS_WHITEOUT_PREFIX_LENGTH = len(AUFS_WHITEOUT)
|
|
|
|
ALLOWED_TYPES = {tarfile.REGTYPE, tarfile.AREGTYPE}
|
|
|
|
|
|
def files_and_dirs_from_tar(source_stream, removed_prefix_collector):
|
|
try:
|
|
tar_stream = tarfile.open(mode='r|*', fileobj=source_stream)
|
|
except tarfile.ReadError:
|
|
# Empty tar file
|
|
return
|
|
|
|
for tar_info in tar_stream:
|
|
absolute = os.path.relpath(tar_info.name.decode('utf-8'), './')
|
|
dirname = os.path.dirname(absolute)
|
|
filename = os.path.basename(absolute)
|
|
|
|
# Skip directories and metadata
|
|
if (filename.startswith(AUFS_METADATA) or
|
|
absolute.startswith(AUFS_METADATA)):
|
|
# Skip
|
|
continue
|
|
|
|
elif filename.startswith(AUFS_WHITEOUT):
|
|
removed_filename = filename[AUFS_WHITEOUT_PREFIX_LENGTH:]
|
|
removed_prefix = os.path.join('/', dirname, removed_filename)
|
|
removed_prefix_collector.add(removed_prefix)
|
|
continue
|
|
|
|
elif tar_info.type in ALLOWED_TYPES:
|
|
yield '/' + absolute
|
|
|
|
|
|
def __compute_removed(base_trie, removed_prefixes):
|
|
for prefix in removed_prefixes:
|
|
for filename in base_trie.keys(prefix):
|
|
yield filename
|
|
|
|
|
|
def __compute_added_changed(base_trie, delta_trie):
|
|
added = set()
|
|
changed = set()
|
|
|
|
for filename in delta_trie.keys():
|
|
if filename not in base_trie:
|
|
added.add(filename)
|
|
else:
|
|
changed.add(filename)
|
|
|
|
return added, changed
|
|
|
|
|
|
def __new_fs(base_trie, added, removed):
|
|
for filename in base_trie.keys():
|
|
if filename not in removed:
|
|
yield filename
|
|
|
|
for filename in added:
|
|
yield filename
|
|
|
|
|
|
def empty_fs():
|
|
return marisa_trie.Trie()
|
|
|
|
|
|
def compute_new_diffs_and_fs(base_trie, filename_source,
|
|
removed_prefix_collector):
|
|
new_trie = marisa_trie.Trie(filename_source)
|
|
(new_added, new_changed) = __compute_added_changed(base_trie, new_trie)
|
|
|
|
new_removed = marisa_trie.Trie(__compute_removed(base_trie,
|
|
removed_prefix_collector))
|
|
|
|
new_fs = marisa_trie.Trie(__new_fs(base_trie, new_added, new_removed))
|
|
|
|
return (new_fs, new_added, new_changed, new_removed.keys())
|