initial import for Open Source 🎉

This commit is contained in:
Jimmy Zelinskie 2019-11-12 11:09:47 -05:00
parent 1898c361f3
commit 9c0dd3b722
2048 changed files with 218743 additions and 0 deletions

View file

31
util/registry/aufs.py Normal file
View file

@ -0,0 +1,31 @@
import os
AUFS_METADATA = u'.wh..wh.'
AUFS_WHITEOUT = u'.wh.'
AUFS_WHITEOUT_PREFIX_LENGTH = len(AUFS_WHITEOUT)
def is_aufs_metadata(absolute):
""" Returns whether the given absolute references an AUFS metadata file. """
filename = os.path.basename(absolute)
return filename.startswith(AUFS_METADATA) or absolute.startswith(AUFS_METADATA)
def get_deleted_filename(absolute):
""" Returns the name of the deleted file referenced by the AUFS whiteout file at
the given path or None if the file path does not reference a whiteout file.
"""
filename = os.path.basename(absolute)
if not filename.startswith(AUFS_WHITEOUT):
return None
return filename[AUFS_WHITEOUT_PREFIX_LENGTH:]
def get_deleted_prefix(absolute):
""" Returns the path prefix of the deleted file referenced by the AUFS whiteout file at
the given path or None if the file path does not reference a whiteout file.
"""
deleted_filename = get_deleted_filename(absolute)
if deleted_filename is None:
return None
dirname = os.path.dirname(absolute)
return os.path.join('/', dirname, deleted_filename)[1:]

View file

@ -0,0 +1,34 @@
import re
from semantic_version import Version
_USER_AGENT_SEARCH_REGEX = re.compile(r'docker\/([0-9]+(?:\.[0-9]+){1,2})')
_EXACT_1_5_USER_AGENT = re.compile(r'^Go 1\.1 package http$')
_ONE_FIVE_ZERO = '1.5.0'
def docker_version(user_agent_string):
""" Extract the Docker version from the user agent, taking special care to
handle the case of a 1.5 client requesting an auth token, which sends
a broken user agent. If we can not positively identify a version, return
None.
"""
# First search for a well defined semver portion in the UA header.
found_semver = _USER_AGENT_SEARCH_REGEX.search(user_agent_string)
if found_semver:
# Docker changed their versioning scheme on Feb 17, 2017 to use date-based versioning:
# https://github.com/docker/docker/pull/31075
# This scheme allows for 0s to appear as prefixes in the major or minor portions of the version,
# which violates semver. Strip them out.
portions = found_semver.group(1).split('.')
updated_portions = [(p[:-1].lstrip('0') + p[-1]) for p in portions]
return Version('.'.join(updated_portions), partial=True)
# Check if we received the very specific header which represents a 1.5 request
# to the auth endpoints.
elif _EXACT_1_5_USER_AGENT.match(user_agent_string):
return Version(_ONE_FIVE_ZERO)
else:
return None

167
util/registry/filelike.py Normal file
View file

@ -0,0 +1,167 @@
WHENCE_ABSOLUTE = 0
WHENCE_RELATIVE = 1
WHENCE_RELATIVE_END = 2
READ_UNTIL_END = -1
class BaseStreamFilelike(object):
def __init__(self, fileobj):
self._fileobj = fileobj
self._cursor_position = 0
def close(self):
self._fileobj.close()
def read(self, size=READ_UNTIL_END):
buf = self._fileobj.read(size)
if buf is None:
return None
self._cursor_position += len(buf)
return buf
def tell(self):
return self._cursor_position
def seek(self, index, whence=WHENCE_ABSOLUTE):
num_bytes_to_ff = 0
if whence == WHENCE_ABSOLUTE:
if index < self._cursor_position:
raise IOError('Cannot seek backwards')
num_bytes_to_ff = index - self._cursor_position
elif whence == WHENCE_RELATIVE:
if index < 0:
raise IOError('Cannnot seek backwards')
num_bytes_to_ff = index
elif whence == WHENCE_RELATIVE_END:
raise IOError('Stream does not have a known end point')
bytes_forward = num_bytes_to_ff
while num_bytes_to_ff > 0:
buf = self._fileobj.read(num_bytes_to_ff)
if not buf:
raise IOError('Seek past end of file')
num_bytes_to_ff -= len(buf)
self._cursor_position += bytes_forward
return bytes_forward
class SocketReader(BaseStreamFilelike):
def __init__(self, fileobj):
super(SocketReader, self).__init__(fileobj)
self.handlers = []
def add_handler(self, handler):
self.handlers.append(handler)
def read(self, size=READ_UNTIL_END):
buf = super(SocketReader, self).read(size)
for handler in self.handlers:
handler(buf)
return buf
def wrap_with_handler(in_fp, handler):
wrapper = SocketReader(in_fp)
wrapper.add_handler(handler)
return wrapper
class FilelikeStreamConcat(object):
""" A file-like object which concats all the file-like objects in the specified generator into
a single stream.
"""
def __init__(self, file_generator):
self._file_generator = file_generator
self._current_file = file_generator.next()
self._current_position = 0
self._closed = False
def tell(self):
return self._current_position
def close(self):
self._closed = True
def read(self, size=READ_UNTIL_END):
buf = ''
current_size = size
while size == READ_UNTIL_END or len(buf) < size:
current_buf = self._current_file.read(current_size)
if current_buf:
buf += current_buf
self._current_position += len(current_buf)
if size != READ_UNTIL_END:
current_size -= len(current_buf)
else:
# That file was out of data, prime a new one
self._current_file.close()
try:
self._current_file = self._file_generator.next()
except StopIteration:
return buf
return buf
class StreamSlice(BaseStreamFilelike):
""" A file-like object which returns a file-like object that represents a slice of the data in
the specified file obj. All methods will act as if the slice is its own file.
"""
def __init__(self, fileobj, start_offset=0, end_offset_exclusive=READ_UNTIL_END):
super(StreamSlice, self).__init__(fileobj)
self._end_offset_exclusive = end_offset_exclusive
self._start_offset = start_offset
if start_offset > 0:
self.seek(start_offset)
def read(self, size=READ_UNTIL_END):
if self._end_offset_exclusive == READ_UNTIL_END:
# We weren't asked to limit the end of the stream
return super(StreamSlice, self).read(size)
# Compute the max bytes to read until the end or until we reach the user requested max
max_bytes_to_read = self._end_offset_exclusive - super(StreamSlice, self).tell()
if size != READ_UNTIL_END:
max_bytes_to_read = min(max_bytes_to_read, size)
return super(StreamSlice, self).read(max_bytes_to_read)
def _file_min(self, first, second):
if first == READ_UNTIL_END:
return second
if second == READ_UNTIL_END:
return first
return min(first, second)
def tell(self):
return super(StreamSlice, self).tell() - self._start_offset
def seek(self, index, whence=WHENCE_ABSOLUTE):
index = self._file_min(self._end_offset_exclusive, index)
super(StreamSlice, self).seek(index, whence)
class LimitingStream(StreamSlice):
""" A file-like object which mimics the specified file stream being limited to the given number
of bytes. All calls after that limit (if specified) will act as if the file has no additional
data.
"""
def __init__(self, fileobj, read_limit=READ_UNTIL_END, seekable=True):
super(LimitingStream, self).__init__(fileobj, 0, read_limit)
self._seekable = seekable
def seek(self, index, whence=WHENCE_ABSOLUTE):
if not self._seekable:
raise AttributeError
super(LimitingStream, self).seek(index, whence)

View file

@ -0,0 +1,88 @@
def _complain_ifclosed(closed):
if closed:
raise ValueError, "I/O operation on closed file"
class GeneratorFile(object):
""" File-like object which wraps a Python generator to produce the file contents.
Modeled on StringIO and comments on the file-like interface copied from there.
"""
def __init__(self, generator):
self._generator = generator
self._closed = False
self._buf = ''
self._position = 0
def __iter__(self):
return self
def tell(self):
"""Return the file's current position, like stdio's ftell()."""
_complain_ifclosed(self._closed)
return self._position
def next(self):
"""A file object is its own iterator, for example iter(f) returns f
(unless f is closed). When a file is used as an iterator, typically
in a for loop (for example, for line in f: print line), the next()
method is called repeatedly. This method returns the next input line,
or raises StopIteration when EOF is hit.
"""
_complain_ifclosed(self._closed)
r = self.read()
if not r:
raise StopIteration
return r
def readable(self):
return not self._closed
def readline(self):
buf = []
while True:
c = self.read(size=1)
buf.append(c)
if c == '\n' or c == '':
return ''.join(buf)
def flush(self):
_complain_ifclosed(self._closed)
def read(self, size=-1):
"""Read at most size bytes from the file
(less if the read hits EOF before obtaining size bytes).
If the size argument is negative or omitted, read all data until EOF
is reached. The bytes are returned as a string object. An empty
string is returned when EOF is encountered immediately.
"""
_complain_ifclosed(self._closed)
buf = self._buf
while size < 0 or len(buf) < size:
try:
buf = buf + self._generator.next()
except StopIteration:
break
returned = ''
if size >= 1:
self._buf = buf[size:]
returned = buf[:size]
else:
self._buf = ''
returned = buf
self._position = self._position + len(returned)
return returned
def seek(self):
raise NotImplementedError
def close(self):
self._closed = True
del self._buf
def __enter__(self):
return self
def __exit__(self, type, value, traceback):
self._closed = True

View file

@ -0,0 +1,113 @@
import zlib
import string
BLOCK_SIZE = 16384
"""Read block size"""
WINDOW_BUFFER_SIZE = 16 + zlib.MAX_WBITS
"""zlib window buffer size, set to gzip's format"""
class GzipInputStream(object):
"""
Simple class that allow streaming reads from GZip files.
Python 2.x gzip.GZipFile relies on .seek() and .tell(), so it
doesn't support this (@see: http://bo4.me/YKWSsL).
Adapted from: https://gist.github.com/beaufour/4205533
"""
def __init__(self, fileobj):
"""
Initialize with the given file-like object.
@param fileobj: file-like object,
"""
self._file = fileobj
self._zip = zlib.decompressobj(WINDOW_BUFFER_SIZE)
self._offset = 0 # position in unzipped stream
self._data = ""
def __fill(self, num_bytes):
"""
Fill the internal buffer with 'num_bytes' of data.
@param num_bytes: int, number of bytes to read in (0 = everything)
"""
if not self._zip:
return
while not num_bytes or len(self._data) < num_bytes:
data = self._file.read(BLOCK_SIZE)
if not data:
self._data = self._data + self._zip.flush()
self._zip = None # no more data
break
self._data = self._data + self._zip.decompress(data)
def __iter__(self):
return self
def seek(self, offset, whence=0):
if whence == 0:
position = offset
elif whence == 1:
position = self._offset + offset
else:
raise IOError("Illegal argument")
if position < self._offset:
raise IOError("Cannot seek backwards")
# skip forward, in blocks
while position > self._offset:
if not self.read(min(position - self._offset, BLOCK_SIZE)):
break
def tell(self):
return self._offset
def read(self, size=0):
self.__fill(size)
if size:
data = self._data[:size]
self._data = self._data[size:]
else:
data = self._data
self._data = ""
self._offset = self._offset + len(data)
return data
def next(self):
line = self.readline()
if not line:
raise StopIteration()
return line
def readline(self):
# make sure we have an entire line
while self._zip and "\n" not in self._data:
self.__fill(len(self._data) + 512)
pos = string.find(self._data, "\n") + 1
if pos <= 0:
return self.read()
return self.read(pos)
def readlines(self):
lines = []
while True:
line = self.readline()
if not line:
break
lines.append(line)
return lines
def close(self):
self._file.close()

View file

@ -0,0 +1,53 @@
"""
Defines utility methods for working with gzip streams.
"""
import zlib
import time
# Window size for decompressing GZIP streams.
# This results in ZLIB automatically detecting the GZIP headers.
# http://stackoverflow.com/questions/3122145/zlib-error-error-3-while-decompressing-incorrect-header-check/22310760#22310760
ZLIB_GZIP_WINDOW = zlib.MAX_WBITS | 32
CHUNK_SIZE = 5 * 1024 * 1024
class SizeInfo(object):
def __init__(self):
self.uncompressed_size = 0
self.compressed_size = 0
self.is_valid = True
def calculate_size_handler():
""" Returns an object and a SocketReader handler. The handler will gunzip the data it receives,
adding the size found to the object.
"""
size_info = SizeInfo()
decompressor = zlib.decompressobj(ZLIB_GZIP_WINDOW)
def fn(buf):
if not size_info.is_valid:
return
# Note: We set a maximum CHUNK_SIZE to prevent the decompress from taking too much
# memory. As a result, we have to loop until the unconsumed tail is empty.
current_data = buf
size_info.compressed_size += len(current_data)
while len(current_data) > 0:
try:
size_info.uncompressed_size += len(decompressor.decompress(current_data, CHUNK_SIZE))
except:
# The gzip stream is not valid for some reason.
size_info.uncompressed_size = None
size_info.is_valid = False
return
current_data = decompressor.unconsumed_tail
# Make sure we allow the scheduler to do other work if we get stuck in this tight loop.
if len(current_data) > 0:
time.sleep(0)
return size_info, fn

59
util/registry/gzipwrap.py Normal file
View file

@ -0,0 +1,59 @@
from gzip import GzipFile
# 256K buffer to Gzip
GZIP_BUFFER_SIZE = 1024 * 256
class GzipWrap(object):
def __init__(self, input, filename=None, compresslevel=1):
self.input = iter(input)
self.buffer = ''
self.zipper = GzipFile(filename, mode='wb', fileobj=self, compresslevel=compresslevel, mtime=0)
self.is_done = False
def read(self, size=-1):
if size is None or size < 0:
raise Exception('Call to GzipWrap with unbound size will result in poor performance')
# If the buffer already has enough bytes, then simply pop them off of
# the beginning and return them.
if len(self.buffer) >= size or self.is_done:
ret = self.buffer[0:size]
self.buffer = self.buffer[size:]
return ret
# Otherwise, zip the input until we have enough bytes.
while True:
# Attempt to retrieve the next bytes to write.
is_done = False
input_size = 0
input_buffer = ''
while input_size < GZIP_BUFFER_SIZE:
try:
s = self.input.next()
input_buffer += s
input_size = input_size + len(s)
except StopIteration:
is_done = True
break
self.zipper.write(input_buffer)
if is_done:
self.zipper.flush()
self.zipper.close()
self.is_done = True
if len(self.buffer) >= size or is_done:
ret = self.buffer[0:size]
self.buffer = self.buffer[size:]
return ret
def flush(self):
pass
def write(self, data):
self.buffer += data
def close(self):
self.input.close()

View file

@ -0,0 +1,76 @@
class QueueFile(object):
""" Class which implements a file-like interface and reads QueueResult's from a blocking
multiprocessing queue.
"""
def __init__(self, queue, name=None):
self._queue = queue
self._closed = False
self._done = False
self._buffer = ''
self._total_size = 0
self._name = name
self.raised_exception = False
self._exception_handlers = []
def add_exception_handler(self, handler):
self._exception_handlers.append(handler)
def read(self, size=-1):
# If the queuefile was closed or we have finished, send back any remaining data.
if self._closed or self._done:
if size == -1:
buf = self._buffer
self._buffer = ''
return buf
buf = self._buffer[0:size]
self._buffer = self._buffer[size:]
return buf
# Loop until we reach the requested data size (or forever if all data was requested).
while (len(self._buffer) < size) or (size == -1):
result = self._queue.get(block=True)
# Check for any exceptions raised by the queue process.
if result.exception is not None:
self._closed = True
self.raised_exception = True
# Fire off the exception to any registered handlers. If no handlers were registered,
# then raise the exception locally.
handled = False
for handler in self._exception_handlers:
handler(result.exception)
handled = True
if handled:
return ''
else:
raise result.exception
# Check for no further data. If the QueueProcess has finished producing data, then break
# out of the loop to return the data already acquired.
if result.data is None:
self._done = True
break
# Add the data to the buffer.
self._buffer += result.data
self._total_size += len(result.data)
# Return the requested slice of the buffer.
if size == -1:
buf = self._buffer
self._buffer = ''
return buf
buf = self._buffer[0:size]
self._buffer = self._buffer[size:]
return buf
def flush(self):
# Purposefully not implemented.
pass
def close(self):
self._closed = True

View file

@ -0,0 +1,76 @@
from multiprocessing import Process, Queue
from collections import namedtuple
import logging
import multiprocessing
import time
import sys
import traceback
logger = multiprocessing.log_to_stderr()
logger.setLevel(logging.INFO)
class QueueProcess(object):
""" Helper class which invokes a worker in a process to produce
data for one (or more) queues.
"""
def __init__(self, get_producer, chunk_size, max_size, args, finished=None):
self._get_producer = get_producer
self._queues = []
self._chunk_size = chunk_size
self._max_size = max_size
self._args = args or []
self._finished = finished
def create_queue(self):
""" Adds a multiprocessing queue to the list of queues. Any queues added
will have the data produced appended.
"""
queue = Queue(self._max_size / self._chunk_size)
self._queues.append(queue)
return queue
@staticmethod
def run_process(target, args, finished=None):
def _target(tar, arg, fin):
try:
tar(*args)
finally:
if fin:
fin()
Process(target=_target, args=(target, args, finished)).start()
def run(self):
# Important! gipc is used here because normal multiprocessing does not work
# correctly with gevent when we sleep.
args = (self._get_producer, self._queues, self._chunk_size, self._args)
QueueProcess.run_process(_run, args, finished=self._finished)
QueueResult = namedtuple('QueueResult', ['data', 'exception'])
def _run(get_producer, queues, chunk_size, args):
producer = get_producer(*args)
while True:
try:
result = QueueResult(producer(chunk_size) or None, None)
except Exception as ex:
message = '%s\n%s' % (ex.message, "".join(traceback.format_exception(*sys.exc_info())))
result = QueueResult(None, Exception(message))
for queue in queues:
try:
queue.put(result, block=True)
except Exception as ex:
logger.exception('Exception writing to queue.')
return
# Terminate the producer loop if the data produced is empty or an exception occurred.
if result.data is None or result.exception is not None:
break
# Important! This allows the thread that writes the queue data to the pipe
# to do so. Otherwise, this hangs.
time.sleep(0)

View file

@ -0,0 +1,34 @@
import features
import json
from contextlib import contextmanager
from data import model
from app import image_replication_queue
DEFAULT_BATCH_SIZE = 1000
@contextmanager
def queue_replication_batch(namespace, batch_size=DEFAULT_BATCH_SIZE):
"""
Context manager implementation which returns a target callable that takes the storage
to queue for replication. When the the context block exits the items generated by
the callable will be bulk inserted into the queue with the specified batch size.
"""
namespace_user = model.user.get_namespace_user(namespace)
with image_replication_queue.batch_insert(batch_size) as queue_put:
def queue_storage_replication_batch(storage):
if features.STORAGE_REPLICATION:
queue_put([storage.uuid], json.dumps({
'namespace_user_id': namespace_user.id,
'storage_id': storage.uuid,
}))
yield queue_storage_replication_batch
def queue_storage_replication(namespace, storage):
""" Queues replication for the given image storage under the given namespace (if enabled). """
with queue_replication_batch(namespace, 1) as batch_spawn:
batch_spawn(storage)

View file

@ -0,0 +1,70 @@
import os
import tarfile
import marisa_trie
from util.registry.aufs import is_aufs_metadata, get_deleted_prefix
from util.registry.tarlayerformat import TarLayerFormat
class StreamLayerMerger(TarLayerFormat):
""" Class which creates a generator of the combined TAR data for a set of Docker layers. """
def __init__(self, get_tar_stream_iterator, path_prefix=None, reporter=None):
super(StreamLayerMerger, self).__init__(get_tar_stream_iterator, path_prefix, reporter=reporter)
self.path_trie = marisa_trie.Trie()
self.path_encountered = set()
self.deleted_prefix_trie = marisa_trie.Trie()
self.deleted_prefixes_encountered = set()
def after_tar_layer(self):
# Update the tries.
self.path_trie = marisa_trie.Trie(self.path_encountered)
self.deleted_prefix_trie = marisa_trie.Trie(self.deleted_prefixes_encountered)
@staticmethod
def _normalize_path(path):
return os.path.relpath(path.decode('utf-8'), './')
def _check_deleted(self, absolute):
ubsolute = unicode(absolute)
for prefix in self.deleted_prefix_trie.iter_prefixes(ubsolute):
if not os.path.relpath(ubsolute, prefix).startswith('..'):
return True
return False
def is_skipped_file(self, filename):
absolute = StreamLayerMerger._normalize_path(filename)
# Skip metadata.
if is_aufs_metadata(absolute):
return True
# Check if the file is under a deleted path.
if self._check_deleted(absolute):
return True
# Check if this file has already been encountered somewhere. If so,
# skip it.
ubsolute = unicode(absolute)
if ubsolute in self.path_trie:
return True
return False
def should_append_file(self, filename):
if self.is_skipped_file(filename):
return False
absolute = StreamLayerMerger._normalize_path(filename)
# Add any prefix of deleted paths to the prefix list.
deleted_prefix = get_deleted_prefix(absolute)
if deleted_prefix is not None:
self.deleted_prefixes_encountered.add(deleted_prefix)
return False
# Otherwise, add the path to the encountered list and return it.
self.path_encountered.add(absolute)
return True

View file

@ -0,0 +1,188 @@
import os
import tarfile
import copy
from abc import ABCMeta, abstractmethod
from collections import defaultdict
from six import add_metaclass
from util.abchelpers import nooper
class TarLayerReadException(Exception):
""" Exception raised when reading a layer has failed. """
pass
# 9MB (+ padding below) so that it matches the 10MB expected by Gzip.
CHUNK_SIZE = 1024 * 1024 * 9
@add_metaclass(ABCMeta)
class TarLayerFormatterReporter(object):
@abstractmethod
def report_pass(self, stream_count):
""" Reports a formatting pass. """
pass
@nooper
class NoopReporter(TarLayerFormatterReporter):
pass
@add_metaclass(ABCMeta)
class TarLayerFormat(object):
""" Class which creates a generator of the combined TAR data. """
def __init__(self, tar_stream_getter_iterator, path_prefix=None, reporter=None):
self.tar_stream_getter_iterator = tar_stream_getter_iterator
self.path_prefix = path_prefix or ''
self.reporter = reporter or NoopReporter()
def get_generator(self):
for stream_getter in self.tar_stream_getter_iterator():
current_tar_stream = stream_getter()
# Read the current TAR. If it is empty, we just continue
# to the next one.
tar_file = TarLayerFormat._tar_file_from_stream(current_tar_stream)
if not tar_file:
continue
# For each of the tar entries, yield them IF and ONLY IF we have not
# encountered the path before.
dangling_hard_links = defaultdict(list)
try:
for tar_info in tar_file:
if not self.should_append_file(tar_info.name):
continue
# Note: We use a copy here because we need to make sure we copy over all the internal
# data of the tar header. We cannot use frombuf(tobuf()), however, because it doesn't
# properly handle large filenames.
clone = copy.deepcopy(tar_info)
clone.name = os.path.join(self.path_prefix, clone.name)
# If the entry is a *hard* link, then prefix it as well. Soft links are relative.
if clone.linkname and clone.type == tarfile.LNKTYPE:
# If the entry is a dangling hard link, we skip here. Dangling hard links will be handled
# in a second pass.
if self.is_skipped_file(tar_info.linkname):
dangling_hard_links[tar_info.linkname].append(tar_info)
continue
clone.linkname = os.path.join(self.path_prefix, clone.linkname)
# Yield the tar header.
yield clone.tobuf()
# Try to extract any file contents for the tar. If found, we yield them as well.
if tar_info.isreg():
for block in TarLayerFormat._emit_file(tar_file, tar_info):
yield block
except UnicodeDecodeError as ude:
raise TarLayerReadException('Decode error: %s' % ude)
# Close the layer stream now that we're done with it.
tar_file.close()
# If there are any dangling hard links, open a new stream and retarget the dangling hard
# links to a new copy of the contents, which will be placed under the *first* dangling hard
# link's name.
if len(dangling_hard_links) > 0:
tar_file = TarLayerFormat._tar_file_from_stream(stream_getter())
if not tar_file:
raise TarLayerReadException('Could not re-read tar layer')
for tar_info in tar_file:
# If we encounter a file that holds the data for a dangling link,
# emit it under the name of the first dangling hard link. All other
# dangling hard links will be retargeted to this first name.
if tar_info.name in dangling_hard_links:
first_dangling = dangling_hard_links[tar_info.name][0]
# Copy the first dangling hard link, change it to a normal file,
# and emit the deleted file's contents for it.
clone = copy.deepcopy(first_dangling)
clone.name = os.path.join(self.path_prefix, first_dangling.name)
clone.type = tar_info.type
clone.size = tar_info.size
clone.pax_headers = tar_info.pax_headers
yield clone.tobuf()
for block in TarLayerFormat._emit_file(tar_file, tar_info):
yield block
elif (tar_info.type == tarfile.LNKTYPE and
tar_info.linkname in dangling_hard_links and
not self.is_skipped_file(tar_info.name)):
# Retarget if necessary. All dangling hard links (but the first) will
# need to be retargeted.
first_dangling = dangling_hard_links[tar_info.linkname][0]
if tar_info.name == first_dangling.name:
# Skip; the first dangling is handled above.
continue
# Retarget the hard link to the first dangling hard link.
clone = copy.deepcopy(tar_info)
clone.name = os.path.join(self.path_prefix, clone.name)
clone.linkname = os.path.join(self.path_prefix, first_dangling.name)
yield clone.tobuf()
# Close the layer stream now that we're done with it.
tar_file.close()
# Conduct any post-tar work.
self.after_tar_layer()
self.reporter.report_pass(2 if len(dangling_hard_links) > 0 else 1)
# Last two records are empty in TAR spec.
yield '\0' * 512
yield '\0' * 512
@abstractmethod
def is_skipped_file(self, filename):
""" Returns true if the file with the given name will be skipped during append.
"""
pass
@abstractmethod
def should_append_file(self, filename):
""" Returns true if the file with the given name should be appended when producing
the new TAR.
"""
pass
@abstractmethod
def after_tar_layer(self):
""" Invoked after a TAR layer is added, to do any post-add work. """
pass
@staticmethod
def _tar_file_from_stream(stream):
tar_file = None
try:
tar_file = tarfile.open(mode='r|*', fileobj=stream)
except tarfile.ReadError as re:
if str(re) != 'empty file':
raise TarLayerReadException('Could not read layer')
return tar_file
@staticmethod
def _emit_file(tar_file, tar_info):
file_stream = tar_file.extractfile(tar_info)
if file_stream is not None:
length = 0
while True:
current_block = file_stream.read(CHUNK_SIZE)
if not len(current_block):
break
yield current_block
length += len(current_block)
file_stream.close()
# Files must be padding to 512 byte multiples.
if length % 512 != 0:
yield '\0' * (512 - (length % 512))

View file

@ -0,0 +1,47 @@
import pytest
from util.registry.dockerver import docker_version
from semantic_version import Version, Spec
@pytest.mark.parametrize('ua_string, ver_info', [
# Old "semantic" versioning.
('docker/1.6.0 go/go1.4.2 git-commit/1234567 kernel/4.2.0-18-generic os/linux arch/amd64',
Version('1.6.0')),
('docker/1.7.1 go/go1.4.2 kernel/4.1.7-15.23.amzn1.x86_64 os/linux arch/amd64',
Version('1.7.1')),
('docker/1.6.2 go/go1.4.2 git-commit/7c8fca2-dirty kernel/4.0.5 os/linux arch/amd64',
Version('1.6.2')),
('docker/1.9.0 go/go1.4.2 git-commit/76d6bc9 kernel/3.16.0-4-amd64 os/linux arch/amd64',
Version('1.9.0')),
('docker/1.9.1 go/go1.4.2 git-commit/a34a1d5 kernel/3.10.0-229.20.1.el7.x86_64 os/linux arch/amd64',
Version('1.9.1')),
('docker/1.8.2-circleci go/go1.4.2 git-commit/a8b52f5 kernel/3.13.0-71-generic os/linux arch/amd64',
Version('1.8.2')),
('Go 1.1 package http', Version('1.5.0')),
('curl', None),
('docker/1.8 stuff', Version('1.8', partial=True)),
# Newer date-based versioning: YY.MM.revnum
('docker/17.03.0 my_version_sucks', Version('17.3.0')),
('docker/17.03.0-foobar my_version_sucks', Version('17.3.0')),
('docker/17.10.2 go/go1.4.2 git-commit/a34a1d5 kernel/3.10.0-229.20.1.el7.x86_64 os/linux arch/amd64',
Version('17.10.2')),
('docker/17.00.4 my_version_sucks', Version('17.0.4')),
('docker/17.12.00 my_version_sucks', Version('17.12.0')),
])
def test_parsing(ua_string, ver_info):
parsed_ver = docker_version(ua_string)
assert parsed_ver == ver_info, 'Expected %s, Found %s' % (ver_info, parsed_ver)
@pytest.mark.parametrize('spec, no_match_cases, match_cases', [
(Spec('<1.6.0'), ['1.6.0', '1.6.1', '1.9.0', '100.5.2'], ['0.0.0', '1.5.99']),
(Spec('<1.9.0'), ['1.9.0', '100.5.2'], ['0.0.0', '1.5.99', '1.6.0', '1.6.1']),
(Spec('<1.6.0,>0.0.1'), ['1.6.0', '1.6.1', '1.9.0', '0.0.0'], ['1.5.99']),
(Spec('>17.3.0'), ['17.3.0', '1.13.0'], ['17.4.0', '17.12.1']),
])
def test_specs(spec, no_match_cases, match_cases):
for no_match_case in no_match_cases:
assert not spec.match(Version(no_match_case))
for match_case in match_cases:
assert spec.match(Version(match_case))

View file

@ -0,0 +1,132 @@
from StringIO import StringIO
from util.registry.filelike import FilelikeStreamConcat, LimitingStream, StreamSlice
def somegenerator():
yield 'some'
yield 'cool'
yield 'file-contents'
def test_parts():
gens = iter([StringIO(s) for s in somegenerator()])
fileobj = FilelikeStreamConcat(gens)
assert fileobj.read(2) == 'so'
assert fileobj.read(3) == 'mec'
assert fileobj.read(7) == 'oolfile'
assert fileobj.read(-1) == '-contents'
def test_entire():
gens = iter([StringIO(s) for s in somegenerator()])
fileobj = FilelikeStreamConcat(gens)
assert fileobj.read(-1) == 'somecoolfile-contents'
def test_nolimit():
fileobj = StringIO('this is a cool test')
stream = LimitingStream(fileobj)
assert stream.read(-1) == 'this is a cool test'
assert len('this is a cool test') == stream.tell()
def test_simplelimit():
fileobj = StringIO('this is a cool test')
stream = LimitingStream(fileobj, 4)
assert stream.read(-1) == 'this'
assert 4 == stream.tell()
def test_simplelimit_readdefined():
fileobj = StringIO('this is a cool test')
stream = LimitingStream(fileobj, 4)
assert stream.read(2) == 'th'
assert 2 == stream.tell()
def test_nolimit_readdefined():
fileobj = StringIO('this is a cool test')
stream = LimitingStream(fileobj, -1)
assert stream.read(2) == 'th'
assert 2 == stream.tell()
def test_limit_multiread():
fileobj = StringIO('this is a cool test')
stream = LimitingStream(fileobj, 7)
assert stream.read(4) == 'this'
assert stream.read(3) == ' is'
assert stream.read(2) == ''
assert 7 == stream.tell()
def test_limit_multiread2():
fileobj = StringIO('this is a cool test')
stream = LimitingStream(fileobj, 7)
assert stream.read(4) == 'this'
assert stream.read(-1) == ' is'
assert 7 == stream.tell()
def test_seek():
fileobj = StringIO('this is a cool test')
stream = LimitingStream(fileobj)
stream.seek(2)
assert stream.read(2) == 'is'
assert 4 == stream.tell()
def test_seek_withlimit():
fileobj = StringIO('this is a cool test')
stream = LimitingStream(fileobj, 3)
stream.seek(2)
assert stream.read(2) == 'i'
assert 3 == stream.tell()
def test_seek_pastlimit():
fileobj = StringIO('this is a cool test')
stream = LimitingStream(fileobj, 3)
stream.seek(4)
assert stream.read(1) == ''
assert 3 == stream.tell()
def test_seek_to_tell():
fileobj = StringIO('this is a cool test')
stream = LimitingStream(fileobj, 3)
stream.seek(stream.tell())
assert stream.read(4) == 'thi'
assert 3 == stream.tell()
def test_none_read():
class NoneReader(object):
def read(self, size=None):
return None
stream = StreamSlice(NoneReader(), 0)
assert stream.read(-1) == None
assert stream.tell() == 0
def test_noslice():
fileobj = StringIO('this is a cool test')
stream = StreamSlice(fileobj, 0)
assert stream.read(-1) == 'this is a cool test'
assert len('this is a cool test') == stream.tell()
def test_startindex():
fileobj = StringIO('this is a cool test')
stream = StreamSlice(fileobj, 5)
assert stream.read(-1) == 'is a cool test'
assert len('is a cool test') == stream.tell()
def test_startindex_limitedread():
fileobj = StringIO('this is a cool test')
stream = StreamSlice(fileobj, 5)
assert stream.read(4) == 'is a'
assert 4 == stream.tell()
def test_slice():
fileobj = StringIO('this is a cool test')
stream = StreamSlice(fileobj, 5, 9)
assert stream.read(-1) == 'is a'
assert len('is a') == stream.tell()
def test_slice_explictread():
fileobj = StringIO('this is a cool test')
stream = StreamSlice(fileobj, 5, 9)
assert stream.read(2) == 'is'
assert stream.read(5) == ' a'
assert len('is a') == stream.tell()

View file

@ -0,0 +1,98 @@
from _pyio import BufferedReader
import magic
from util.registry.generatorfile import GeneratorFile
def sample_generator():
yield 'this'
yield 'is'
yield 'a'
yield 'test'
def test_basic_generator():
with GeneratorFile(sample_generator()) as f:
assert f.tell() == 0
assert f.read() == "thisisatest"
assert f.tell() == len("thisisatest")
def test_same_lengths():
with GeneratorFile(sample_generator()) as f:
assert f.read(4) == "this"
assert f.tell() == 4
assert f.read(2) == "is"
assert f.tell() == 6
assert f.read(1) == "a"
assert f.tell() == 7
assert f.read(4) == "test"
assert f.tell() == 11
def test_indexed_lengths():
with GeneratorFile(sample_generator()) as f:
assert f.read(6) == "thisis"
assert f.tell() == 6
assert f.read(5) == "atest"
assert f.tell() == 11
def test_misindexed_lengths():
with GeneratorFile(sample_generator()) as f:
assert f.read(6) == "thisis"
assert f.tell() == 6
assert f.read(3) == "ate"
assert f.tell() == 9
assert f.read(2) == "st"
assert f.tell() == 11
assert f.read(2) == ""
assert f.tell() == 11
def test_misindexed_lengths_2():
with GeneratorFile(sample_generator()) as f:
assert f.read(8) == "thisisat"
assert f.tell() == 8
assert f.read(1) == "e"
assert f.tell() == 9
assert f.read(2) == "st"
assert f.tell() == 11
assert f.read(2) == ""
assert f.tell() == 11
def test_overly_long():
with GeneratorFile(sample_generator()) as f:
assert f.read(60) == "thisisatest"
assert f.tell() == 11
def test_with_bufferedreader():
with GeneratorFile(sample_generator()) as f:
buffered = BufferedReader(f)
assert buffered.peek(10) == "thisisatest"
assert buffered.read(10) == "thisisates"
def mimed_html_generator():
yield '<html>'
yield '<body>'
yield 'sometext' * 1024
yield '</body>'
yield '</html>'
def test_magic():
mgc = magic.Magic(mime=True)
with GeneratorFile(mimed_html_generator()) as f:
buffered = BufferedReader(f)
file_header_bytes = buffered.peek(1024)
assert mgc.from_buffer(file_header_bytes) == "text/html"
with GeneratorFile(sample_generator()) as f:
buffered = BufferedReader(f)
file_header_bytes = buffered.peek(1024)
assert mgc.from_buffer(file_header_bytes) == "text/plain"

View file

@ -0,0 +1,112 @@
import os
import pytest
from util.registry.queueprocess import QueueResult
from util.registry.queuefile import QueueFile
class FakeQueue(object):
def __init__(self):
self.items = []
def get(self, block):
return self.items.pop(0)
def put(self, data):
self.items.append(data)
def test_basic():
queue = FakeQueue()
queue.put(QueueResult('hello world', None))
queue.put(QueueResult('! how goes there?', None))
queue.put(QueueResult(None, None))
queuefile = QueueFile(queue)
assert queuefile.read() == 'hello world! how goes there?'
def test_chunk_reading():
queue = FakeQueue()
queue.put(QueueResult('hello world', None))
queue.put(QueueResult('! how goes there?', None))
queue.put(QueueResult(None, None))
queuefile = QueueFile(queue)
data = ''
while True:
result = queuefile.read(size=2)
if not result:
break
data += result
assert data == 'hello world! how goes there?'
def test_unhandled_exception():
queue = FakeQueue()
queue.put(QueueResult('hello world', None))
queue.put(QueueResult(None, IOError('some exception')))
queue.put(QueueResult('! how goes there?', None))
queue.put(QueueResult(None, None))
queuefile = QueueFile(queue)
with pytest.raises(IOError):
queuefile.read(size=12)
def test_handled_exception():
queue = FakeQueue()
queue.put(QueueResult('hello world', None))
queue.put(QueueResult(None, IOError('some exception')))
queue.put(QueueResult('! how goes there?', None))
queue.put(QueueResult(None, None))
ex_found = [None]
def handler(ex):
ex_found[0] = ex
queuefile = QueueFile(queue)
queuefile.add_exception_handler(handler)
queuefile.read(size=12)
assert ex_found[0] is not None
def test_binary_data():
queue = FakeQueue()
# Generate some binary data.
binary_data = os.urandom(1024)
queue.put(QueueResult(binary_data, None))
queue.put(QueueResult(None, None))
queuefile = QueueFile(queue)
found_data = ''
while True:
current_data = queuefile.read(size=37)
if len(current_data) == 0:
break
found_data = found_data + current_data
assert found_data == binary_data
def test_empty_data():
queue = FakeQueue()
# Generate some empty binary data.
binary_data = '\0' * 1024
queue.put(QueueResult(binary_data, None))
queue.put(QueueResult(None, None))
queuefile = QueueFile(queue)
found_data = ''
while True:
current_data = queuefile.read(size=37)
if len(current_data) == 0:
break
found_data = found_data + current_data
assert found_data == binary_data

View file

@ -0,0 +1,492 @@
import tarfile
import pytest
from StringIO import StringIO
from util.registry.streamlayerformat import StreamLayerMerger
from util.registry.aufs import AUFS_WHITEOUT
from util.registry.tarlayerformat import TarLayerReadException
def create_layer(*file_pairs):
output = StringIO()
with tarfile.open(fileobj=output, mode='w:gz') as tar:
for current_filename, current_contents in file_pairs:
if current_contents is None:
# This is a deleted file.
if current_filename.endswith('/'):
current_filename = current_filename[:-1]
parts = current_filename.split('/')
if len(parts) > 1:
current_filename = '/'.join(parts[:-1]) + '/' + AUFS_WHITEOUT + parts[-1]
else:
current_filename = AUFS_WHITEOUT + parts[-1]
current_contents = ''
if current_contents.startswith('linkto:'):
info = tarfile.TarInfo(name=current_filename)
info.linkname = current_contents[len('linkto:'):]
info.type = tarfile.LNKTYPE
tar.addfile(info)
else:
info = tarfile.TarInfo(name=current_filename)
info.size = len(current_contents)
tar.addfile(info, fileobj=StringIO(current_contents))
return output.getvalue()
def create_empty_layer():
return ''
def squash_layers(layers, path_prefix=None):
def getter_for_layer(layer):
return lambda: StringIO(layer)
def layer_stream_getter():
return [getter_for_layer(layer) for layer in layers]
merger = StreamLayerMerger(layer_stream_getter, path_prefix=path_prefix)
merged_data = ''.join(merger.get_generator())
return merged_data
def assertHasFile(squashed, filename, contents):
with tarfile.open(fileobj=StringIO(squashed), mode='r:*') as tar:
member = tar.getmember(filename)
assert contents == '\n'.join(tar.extractfile(member).readlines())
def assertDoesNotHaveFile(squashed, filename):
with tarfile.open(fileobj=StringIO(squashed), mode='r:*') as tar:
try:
member = tar.getmember(filename)
except Exception as ex:
return
assert False, 'Filename %s found' % filename
def test_single_layer():
tar_layer = create_layer(
('some_file', 'foo'),
('another_file', 'bar'),
('third_file', 'meh'))
squashed = squash_layers([tar_layer])
assertHasFile(squashed, 'some_file', 'foo')
assertHasFile(squashed, 'another_file', 'bar')
assertHasFile(squashed, 'third_file', 'meh')
def test_multiple_layers():
second_layer = create_layer(
('some_file', 'foo'),
('another_file', 'bar'),
('third_file', 'meh'))
first_layer = create_layer(
('top_file', 'top'))
squashed = squash_layers([first_layer, second_layer])
assertHasFile(squashed, 'some_file', 'foo')
assertHasFile(squashed, 'another_file', 'bar')
assertHasFile(squashed, 'third_file', 'meh')
assertHasFile(squashed, 'top_file', 'top')
def test_multiple_layers_dot():
second_layer = create_layer(
('./some_file', 'foo'),
('another_file', 'bar'),
('./third_file', 'meh'))
first_layer = create_layer(
('top_file', 'top'))
squashed = squash_layers([first_layer, second_layer])
assertHasFile(squashed, './some_file', 'foo')
assertHasFile(squashed, 'another_file', 'bar')
assertHasFile(squashed, './third_file', 'meh')
assertHasFile(squashed, 'top_file', 'top')
def test_multiple_layers_overwrite():
second_layer = create_layer(
('some_file', 'foo'),
('another_file', 'bar'),
('third_file', 'meh'))
first_layer = create_layer(
('another_file', 'top'))
squashed = squash_layers([first_layer, second_layer])
assertHasFile(squashed, 'some_file', 'foo')
assertHasFile(squashed, 'third_file', 'meh')
assertHasFile(squashed, 'another_file', 'top')
def test_multiple_layers_overwrite_base_dot():
second_layer = create_layer(
('some_file', 'foo'),
('./another_file', 'bar'),
('third_file', 'meh'))
first_layer = create_layer(
('another_file', 'top'))
squashed = squash_layers([first_layer, second_layer])
assertHasFile(squashed, 'some_file', 'foo')
assertHasFile(squashed, 'third_file', 'meh')
assertHasFile(squashed, 'another_file', 'top')
assertDoesNotHaveFile(squashed, './another_file')
def test_multiple_layers_overwrite_top_dot():
second_layer = create_layer(
('some_file', 'foo'),
('another_file', 'bar'),
('third_file', 'meh'))
first_layer = create_layer(
('./another_file', 'top'))
squashed = squash_layers([first_layer, second_layer])
assertHasFile(squashed, 'some_file', 'foo')
assertHasFile(squashed, 'third_file', 'meh')
assertHasFile(squashed, './another_file', 'top')
assertDoesNotHaveFile(squashed, 'another_file')
def test_deleted_file():
second_layer = create_layer(
('some_file', 'foo'),
('another_file', 'bar'),
('third_file', 'meh'))
first_layer = create_layer(
('another_file', None))
squashed = squash_layers([first_layer, second_layer])
assertHasFile(squashed, 'some_file', 'foo')
assertHasFile(squashed, 'third_file', 'meh')
assertDoesNotHaveFile(squashed, 'another_file')
def test_deleted_readded_file():
third_layer = create_layer(
('another_file', 'bar'))
second_layer = create_layer(
('some_file', 'foo'),
('another_file', None),
('third_file', 'meh'))
first_layer = create_layer(
('another_file', 'newagain'))
squashed = squash_layers([first_layer, second_layer, third_layer])
assertHasFile(squashed, 'some_file', 'foo')
assertHasFile(squashed, 'third_file', 'meh')
assertHasFile(squashed, 'another_file', 'newagain')
def test_deleted_in_lower_layer():
third_layer = create_layer(
('deleted_file', 'bar'))
second_layer = create_layer(
('some_file', 'foo'),
('deleted_file', None),
('third_file', 'meh'))
first_layer = create_layer(
('top_file', 'top'))
squashed = squash_layers([first_layer, second_layer, third_layer])
assertHasFile(squashed, 'some_file', 'foo')
assertHasFile(squashed, 'third_file', 'meh')
assertHasFile(squashed, 'top_file', 'top')
assertDoesNotHaveFile(squashed, 'deleted_file')
def test_deleted_in_lower_layer_with_added_dot():
third_layer = create_layer(
('./deleted_file', 'something'))
second_layer = create_layer(
('deleted_file', None))
squashed = squash_layers([second_layer, third_layer])
assertDoesNotHaveFile(squashed, 'deleted_file')
def test_deleted_in_lower_layer_with_deleted_dot():
third_layer = create_layer(
('./deleted_file', 'something'))
second_layer = create_layer(
('./deleted_file', None))
squashed = squash_layers([second_layer, third_layer])
assertDoesNotHaveFile(squashed, 'deleted_file')
def test_directory():
second_layer = create_layer(
('foo/some_file', 'foo'),
('foo/another_file', 'bar'))
first_layer = create_layer(
('foo/some_file', 'top'))
squashed = squash_layers([first_layer, second_layer])
assertHasFile(squashed, 'foo/some_file', 'top')
assertHasFile(squashed, 'foo/another_file', 'bar')
def test_sub_directory():
second_layer = create_layer(
('foo/some_file', 'foo'),
('foo/bar/another_file', 'bar'))
first_layer = create_layer(
('foo/some_file', 'top'))
squashed = squash_layers([first_layer, second_layer])
assertHasFile(squashed, 'foo/some_file', 'top')
assertHasFile(squashed, 'foo/bar/another_file', 'bar')
def test_delete_directory():
second_layer = create_layer(
('foo/some_file', 'foo'),
('foo/another_file', 'bar'))
first_layer = create_layer(
('foo/', None))
squashed = squash_layers([first_layer, second_layer])
assertDoesNotHaveFile(squashed, 'foo/some_file')
assertDoesNotHaveFile(squashed, 'foo/another_file')
def test_delete_sub_directory():
second_layer = create_layer(
('foo/some_file', 'foo'),
('foo/bar/another_file', 'bar'))
first_layer = create_layer(
('foo/bar/', None))
squashed = squash_layers([first_layer, second_layer])
assertDoesNotHaveFile(squashed, 'foo/bar/another_file')
assertHasFile(squashed, 'foo/some_file', 'foo')
def test_delete_sub_directory_with_dot():
second_layer = create_layer(
('foo/some_file', 'foo'),
('foo/bar/another_file', 'bar'))
first_layer = create_layer(
('./foo/bar/', None))
squashed = squash_layers([first_layer, second_layer])
assertDoesNotHaveFile(squashed, 'foo/bar/another_file')
assertHasFile(squashed, 'foo/some_file', 'foo')
def test_delete_sub_directory_with_subdot():
second_layer = create_layer(
('./foo/some_file', 'foo'),
('./foo/bar/another_file', 'bar'))
first_layer = create_layer(
('foo/bar/', None))
squashed = squash_layers([first_layer, second_layer])
assertDoesNotHaveFile(squashed, 'foo/bar/another_file')
assertDoesNotHaveFile(squashed, './foo/bar/another_file')
assertHasFile(squashed, './foo/some_file', 'foo')
def test_delete_directory_recreate():
third_layer = create_layer(
('foo/some_file', 'foo'),
('foo/another_file', 'bar'))
second_layer = create_layer(
('foo/', None))
first_layer = create_layer(
('foo/some_file', 'baz'))
squashed = squash_layers([first_layer, second_layer, third_layer])
assertHasFile(squashed, 'foo/some_file', 'baz')
assertDoesNotHaveFile(squashed, 'foo/another_file')
def test_delete_directory_prefix():
third_layer = create_layer(
('foobar/some_file', 'foo'),
('foo/another_file', 'bar'))
second_layer = create_layer(
('foo/', None))
squashed = squash_layers([second_layer, third_layer])
assertHasFile(squashed, 'foobar/some_file', 'foo')
assertDoesNotHaveFile(squashed, 'foo/another_file')
def test_delete_directory_pre_prefix():
third_layer = create_layer(
('foobar/baz/some_file', 'foo'),
('foo/another_file', 'bar'))
second_layer = create_layer(
('foo/', None))
squashed = squash_layers([second_layer, third_layer])
assertHasFile(squashed, 'foobar/baz/some_file', 'foo')
assertDoesNotHaveFile(squashed, 'foo/another_file')
def test_delete_root_directory():
third_layer = create_layer(
('build/first_file', 'foo'),
('build/second_file', 'bar'))
second_layer = create_layer(
('build', None))
squashed = squash_layers([second_layer, third_layer])
assertDoesNotHaveFile(squashed, 'build/first_file')
assertDoesNotHaveFile(squashed, 'build/second_file')
def test_tar_empty_layer():
third_layer = create_layer(
('build/first_file', 'foo'),
('build/second_file', 'bar'))
empty_layer = create_layer()
squashed = squash_layers([empty_layer, third_layer])
assertHasFile(squashed, 'build/first_file', 'foo')
assertHasFile(squashed, 'build/second_file', 'bar')
def test_data_empty_layer():
third_layer = create_layer(
('build/first_file', 'foo'),
('build/second_file', 'bar'))
empty_layer = create_empty_layer()
squashed = squash_layers([empty_layer, third_layer])
assertHasFile(squashed, 'build/first_file', 'foo')
assertHasFile(squashed, 'build/second_file', 'bar')
def test_broken_layer():
third_layer = create_layer(
('build/first_file', 'foo'),
('build/second_file', 'bar'))
broken_layer = 'not valid data'
with pytest.raises(TarLayerReadException):
squash_layers([broken_layer, third_layer])
def test_single_layer_with_prefix():
tar_layer = create_layer(
('some_file', 'foo'),
('another_file', 'bar'),
('third_file', 'meh'))
squashed = squash_layers([tar_layer], path_prefix='foo/')
assertHasFile(squashed, 'foo/some_file', 'foo')
assertHasFile(squashed, 'foo/another_file', 'bar')
assertHasFile(squashed, 'foo/third_file', 'meh')
def test_multiple_layers_overwrite_with_prefix():
second_layer = create_layer(
('some_file', 'foo'),
('another_file', 'bar'),
('third_file', 'meh'))
first_layer = create_layer(
('another_file', 'top'))
squashed = squash_layers([first_layer, second_layer], path_prefix='foo/')
assertHasFile(squashed, 'foo/some_file', 'foo')
assertHasFile(squashed, 'foo/third_file', 'meh')
assertHasFile(squashed, 'foo/another_file', 'top')
def test_superlong_filename():
tar_layer = create_layer(
('this_is_the_filename_that_never_ends_it_goes_on_and_on_my_friend_some_people_started', 'meh'))
squashed = squash_layers([tar_layer], path_prefix='foo/')
assertHasFile(squashed, 'foo/this_is_the_filename_that_never_ends_it_goes_on_and_on_my_friend_some_people_started', 'meh')
def test_superlong_prefix():
tar_layer = create_layer(
('some_file', 'foo'),
('another_file', 'bar'),
('third_file', 'meh'))
squashed = squash_layers([tar_layer],
path_prefix='foo/bar/baz/something/foo/bar/baz/anotherthing/whatever/this/is/a/really/long/filename/that/goes/here/')
assertHasFile(squashed, 'foo/bar/baz/something/foo/bar/baz/anotherthing/whatever/this/is/a/really/long/filename/that/goes/here/some_file', 'foo')
assertHasFile(squashed, 'foo/bar/baz/something/foo/bar/baz/anotherthing/whatever/this/is/a/really/long/filename/that/goes/here/another_file', 'bar')
assertHasFile(squashed, 'foo/bar/baz/something/foo/bar/baz/anotherthing/whatever/this/is/a/really/long/filename/that/goes/here/third_file', 'meh')
def test_hardlink_to_deleted_file():
first_layer = create_layer(
('tobedeletedfile', 'somecontents'),
('link_to_deleted_file', 'linkto:tobedeletedfile'),
('third_file', 'meh'))
second_layer = create_layer(
('tobedeletedfile', None))
squashed = squash_layers([second_layer, first_layer], path_prefix='foo/')
assertHasFile(squashed, 'foo/third_file', 'meh')
assertHasFile(squashed, 'foo/link_to_deleted_file', 'somecontents')
assertDoesNotHaveFile(squashed, 'foo/tobedeletedfile')
def test_multiple_hardlink_to_deleted_file():
first_layer = create_layer(
('tobedeletedfile', 'somecontents'),
('link_to_deleted_file', 'linkto:tobedeletedfile'),
('another_link_to_deleted_file', 'linkto:tobedeletedfile'),
('third_file', 'meh'))
second_layer = create_layer(
('tobedeletedfile', None))
squashed = squash_layers([second_layer, first_layer], path_prefix='foo/')
assertHasFile(squashed, 'foo/third_file', 'meh')
assertHasFile(squashed, 'foo/link_to_deleted_file', 'somecontents')
assertHasFile(squashed, 'foo/another_link_to_deleted_file', 'somecontents')
assertDoesNotHaveFile(squashed, 'foo/tobedeletedfile')

137
util/registry/torrent.py Normal file
View file

@ -0,0 +1,137 @@
import hashlib
import time
from binascii import hexlify
import bencode
import jwt
import resumablehashlib
class TorrentConfiguration(object):
def __init__(self, instance_keys, announce_url, filename_pepper, registry_title):
self.instance_keys = instance_keys
self.announce_url = announce_url
self.filename_pepper = filename_pepper
self.registry_title = registry_title
@classmethod
def for_testing(cls, instance_keys, announce_url, registry_title):
return TorrentConfiguration(instance_keys, announce_url, 'somepepper', registry_title)
@classmethod
def from_app_config(cls, instance_keys, config):
return TorrentConfiguration(instance_keys, config['BITTORRENT_ANNOUNCE_URL'],
config['BITTORRENT_FILENAME_PEPPER'], config['REGISTRY_TITLE'])
def _jwt_from_infodict(torrent_config, infodict):
""" Returns an encoded JWT for the given BitTorrent info dict, signed by the local instance's
private key.
"""
digest = hashlib.sha1()
digest.update(bencode.bencode(infodict))
return jwt_from_infohash(torrent_config, digest.digest())
def jwt_from_infohash(torrent_config, infohash_digest):
""" Returns an encoded JWT for the given BitTorrent infohash, signed by the local instance's
private key.
"""
token_data = {
'iss': torrent_config.instance_keys.service_name,
'aud': torrent_config.announce_url,
'infohash': hexlify(infohash_digest),
}
return jwt.encode(token_data, torrent_config.instance_keys.local_private_key, algorithm='RS256',
headers={'kid': torrent_config.instance_keys.local_key_id})
def make_torrent(torrent_config, name, webseed, length, piece_length, pieces):
info_dict = {
'name': name,
'length': length,
'piece length': piece_length,
'pieces': pieces,
'private': 1,
}
info_jwt = _jwt_from_infodict(torrent_config, info_dict)
return bencode.bencode({
'announce': torrent_config.announce_url + "?jwt=" + info_jwt,
'url-list': str(webseed),
'encoding': 'UTF-8',
'created by': torrent_config.registry_title,
'creation date': int(time.time()),
'info': info_dict,
})
def public_torrent_filename(blob_uuid):
""" Returns the filename for the given blob UUID in a public image. """
return hashlib.sha256(blob_uuid).hexdigest()
def per_user_torrent_filename(torrent_config, user_uuid, blob_uuid):
""" Returns the filename for the given blob UUID for a private image. """
joined = torrent_config.filename_pepper + "||" + blob_uuid + "||" + user_uuid
return hashlib.sha256(joined).hexdigest()
class PieceHasher(object):
""" Utility for computing torrent piece hashes as the data flows through the update
method of this class. Users should get the final value by calling final_piece_hashes
since new chunks are allocated lazily.
"""
def __init__(self, piece_size, starting_offset=0, starting_piece_hash_bytes='',
hash_fragment_to_resume=None):
if not isinstance(starting_offset, (int, long)):
raise TypeError('starting_offset must be an integer')
elif not isinstance(piece_size, (int, long)):
raise TypeError('piece_size must be an integer')
self._current_offset = starting_offset
self._piece_size = piece_size
self._piece_hashes = bytearray(starting_piece_hash_bytes)
if hash_fragment_to_resume is None:
self._hash_fragment = resumablehashlib.sha1()
else:
self._hash_fragment = hash_fragment_to_resume
def update(self, buf):
buf_offset = 0
while buf_offset < len(buf):
buf_bytes_to_hash = buf[0:self._piece_length_remaining()]
to_hash_len = len(buf_bytes_to_hash)
if self._piece_offset() == 0 and to_hash_len > 0 and self._current_offset > 0:
# We are opening a new piece
self._piece_hashes.extend(self._hash_fragment.digest())
self._hash_fragment = resumablehashlib.sha1()
self._hash_fragment.update(buf_bytes_to_hash)
self._current_offset += to_hash_len
buf_offset += to_hash_len
@property
def hashed_bytes(self):
return self._current_offset
def _piece_length_remaining(self):
return self._piece_size - (self._current_offset % self._piece_size)
def _piece_offset(self):
return self._current_offset % self._piece_size
@property
def piece_hashes(self):
return self._piece_hashes
@property
def hash_fragment(self):
return self._hash_fragment
def final_piece_hashes(self):
return self._piece_hashes + self._hash_fragment.digest()