Hash and track layer file chunks for torrenting

This commit is contained in:
Jake Moshenko 2015-12-30 17:19:19 -05:00 committed by Jimmy Zelinskie
parent 40c741f34e
commit fe87d3c796
8 changed files with 115 additions and 10 deletions

View file

@ -14,6 +14,7 @@ from endpoints.decorators import anon_protect
from util.cache import cache_control
from util.registry.filelike import wrap_with_handler, StreamSlice
from util.registry.gzipstream import calculate_size_handler
from util.registry.torrent import PieceHasher
from storage.basestorage import InvalidChunkException
@ -221,6 +222,13 @@ def _upload_chunk(namespace, repo_name, upload_uuid):
input_fp = wrap_with_handler(input_fp, found.sha_state.update)
piece_hasher = None
# TODO remove this when all in-progress blob uploads reliably contain piece hashes
if start_offset == 0 and found.piece_sha_state is not None:
piece_hasher = PieceHasher(app.config['TORRENT_PIECE_SIZE'], start_offset, found.piece_hashes,
found.piece_sha_state)
input_fp = wrap_with_handler(input_fp, piece_hasher.update)
# If this is the first chunk and we're starting at the 0 offset, add a handler to gunzip the
# stream so we can determine the uncompressed size. We'll throw out this data if another chunk
# comes in, but in the common case Docker only sends one chunk.
@ -231,7 +239,8 @@ def _upload_chunk(namespace, repo_name, upload_uuid):
try:
length_written, new_metadata, error = storage.stream_upload_chunk(location_set, upload_uuid,
start_offset, length, input_fp,
start_offset, length,
input_fp,
found.storage_metadata,
content_type=BLOB_CONTENT_TYPE)
except InvalidChunkException:
@ -246,6 +255,10 @@ def _upload_chunk(namespace, repo_name, upload_uuid):
# know the uncompressed size.
found.uncompressed_byte_count = None
if piece_hasher is not None:
found.piece_hashes = piece_hasher.piece_hashes
found.piece_sha_state = piece_hasher.hash_fragment
found.storage_metadata = new_metadata
found.byte_count += length_written
found.chunk_count += 1
@ -274,10 +287,15 @@ def _finish_upload(namespace, repo_name, upload_obj, expected_digest):
final_blob_location, upload_obj.storage_metadata)
# Mark the blob as uploaded.
model.blob.store_blob_record_and_temp_link(namespace, repo_name, expected_digest,
upload_obj.location, upload_obj.byte_count,
app.config['PUSH_TEMP_TAG_EXPIRATION_SEC'],
upload_obj.uncompressed_byte_count)
blob_storage = model.blob.store_blob_record_and_temp_link(namespace, repo_name, expected_digest,
upload_obj.location,
upload_obj.byte_count,
app.config['PUSH_TEMP_TAG_EXPIRATION_SEC'],
upload_obj.uncompressed_byte_count)
if upload_obj.piece_sha_state is not None:
piece_string = upload_obj.piece_hashes + upload_obj.piece_sha_state.hexdigest()
model.storage.save_torrent_info(blob_storage, app.config['TORRENT_PIECE_SIZE'], piece_string)
# Delete the upload tracking row.
upload_obj.delete_instance()