This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/util/registry/tarlayerformat.py
2015-08-03 16:04:19 -04:00

94 lines
3 KiB
Python

import os
import tarfile
import copy
class TarLayerReadException(Exception):
""" Exception raised when reading a layer has failed. """
pass
class TarLayerFormat(object):
""" Class which creates a generator of the combined TAR data. """
def __init__(self, tar_iterator, path_prefix=None):
self.tar_iterator = tar_iterator
self.path_prefix = path_prefix
def get_generator(self):
for current_tar in self.tar_iterator():
# Read the current TAR. If it is empty, we just continue
# to the next one.
tar_file = None
try:
tar_file = tarfile.open(mode='r|*', fileobj=current_tar)
except tarfile.ReadError as re:
if re.message != 'empty file':
raise TarLayerReadException('Could not read layer')
if not tar_file:
continue
# For each of the tar entries, yield them IF and ONLY IF we have not
# encountered the path before.
# 9MB (+ padding below) so that it matches the 10MB expected by Gzip.
chunk_size = 1024 * 1024 * 9
for tar_info in tar_file:
if not self.check_tar_info(tar_info):
continue
# Yield the tar header.
if self.path_prefix:
# Note: We use a copy here because we need to make sure we copy over all the internal
# data of the tar header. We cannot use frombuf(tobuf()), however, because it doesn't
# properly handle large filenames.
clone = copy.deepcopy(tar_info)
clone.name = os.path.join(self.path_prefix, clone.name)
# If the entry is a *hard* link, then prefix it as well. Soft links are relative.
if clone.linkname and clone.type == tarfile.LNKTYPE:
clone.linkname = os.path.join(self.path_prefix, clone.linkname)
yield clone.tobuf()
else:
yield tar_info.tobuf()
# Try to extract any file contents for the tar. If found, we yield them as well.
if tar_info.isreg():
file_stream = tar_file.extractfile(tar_info)
if file_stream is not None:
length = 0
while True:
current_block = file_stream.read(chunk_size)
if not len(current_block):
break
yield current_block
length += len(current_block)
file_stream.close()
# Files must be padding to 512 byte multiples.
if length % 512 != 0:
yield '\0' * (512 - (length % 512))
# Close the layer stream now that we're done with it.
tar_file.close()
# Conduct any post-tar work.
self.after_tar_layer(current_tar)
# Last two records are empty in TAR spec.
yield '\0' * 512
yield '\0' * 512
def check_tar_info(self, tar_info):
""" Returns true if the current tar_info should be added to the combined tar. False
otherwise.
"""
raise NotImplementedError()
def after_tar_layer(self, current_tar):
""" Invoked after a TAR layer is added, to do any post-add work. """
raise NotImplementedError()