diff --git a/data/database.py b/data/database.py index d540ec013..1e50b15d4 100644 --- a/data/database.py +++ b/data/database.py @@ -484,11 +484,12 @@ class EmailConfirmation(BaseModel): class ImageStorage(BaseModel): uuid = CharField(default=uuid_generator, index=True, unique=True) - checksum = CharField(null=True) + checksum = CharField(null=True) # TODO remove when all checksums have been moved back to Image image_size = BigIntegerField(null=True) uncompressed_size = BigIntegerField(null=True) uploading = BooleanField(default=True, null=True) cas_path = BooleanField(default=True) + content_checksum = CharField(null=True, index=True) class ImageStorageTransformation(BaseModel): @@ -570,6 +571,7 @@ class Image(BaseModel): command = TextField(null=True) aggregate_size = BigIntegerField(null=True) v1_json_metadata = TextField(null=True) + v1_checksum = CharField(null=True) class Meta: database = db diff --git a/data/migrations/versions/2827d36939e4_separate_v1_and_v2_checksums.py b/data/migrations/versions/2827d36939e4_separate_v1_and_v2_checksums.py new file mode 100644 index 000000000..4e161daed --- /dev/null +++ b/data/migrations/versions/2827d36939e4_separate_v1_and_v2_checksums.py @@ -0,0 +1,30 @@ +"""Separate v1 and v2 checksums. + +Revision ID: 2827d36939e4 +Revises: 73669db7e12 +Create Date: 2015-11-04 16:29:48.905775 + +""" + +# revision identifiers, used by Alembic. +revision = '2827d36939e4' +down_revision = '73669db7e12' + +from alembic import op +import sqlalchemy as sa + + +def upgrade(tables): + ### commands auto generated by Alembic - please adjust! ### + op.add_column('image', sa.Column('v1_checksum', sa.String(length=255), nullable=True)) + op.add_column('imagestorage', sa.Column('content_checksum', sa.String(length=255), nullable=True)) + op.create_index('imagestorage_content_checksum', 'imagestorage', ['content_checksum'], unique=False) + ### end Alembic commands ### + + +def downgrade(tables): + ### commands auto generated by Alembic - please adjust! ### + op.drop_index('imagestorage_content_checksum', table_name='imagestorage') + op.drop_column('imagestorage', 'content_checksum') + op.drop_column('image', 'v1_checksum') + ### end Alembic commands ### diff --git a/data/migrations/versions/73669db7e12_remove_legacy_github_column.py b/data/migrations/versions/73669db7e12_remove_legacy_github_column.py index e6ee5d040..38698c5eb 100644 --- a/data/migrations/versions/73669db7e12_remove_legacy_github_column.py +++ b/data/migrations/versions/73669db7e12_remove_legacy_github_column.py @@ -12,10 +12,6 @@ down_revision = '35f538da62' from alembic import op import sqlalchemy as sa -<<<<<<< HEAD -======= -from sqlalchemy.dialects import mysql ->>>>>>> Remove the used_legacy_github column def upgrade(tables): ### commands auto generated by Alembic - please adjust! ### diff --git a/data/model/blob.py b/data/model/blob.py index 4bad62584..5547c7646 100644 --- a/data/model/blob.py +++ b/data/model/blob.py @@ -17,7 +17,7 @@ def get_repo_blob_by_digest(namespace, repo_name, blob_digest): .join(Repository) .join(Namespace) .where(Repository.name == repo_name, Namespace.username == namespace, - ImageStorage.checksum == blob_digest)) + ImageStorage.content_checksum == blob_digest)) if not placements: raise BlobDoesNotExist('Blob does not exist with digest: {0}'.format(blob_digest)) @@ -35,11 +35,11 @@ def store_blob_record_and_temp_link(namespace, repo_name, blob_digest, location_ repo = _basequery.get_existing_repository(namespace, repo_name) try: - storage = ImageStorage.get(checksum=blob_digest) + storage = ImageStorage.get(content_checksum=blob_digest) location = ImageStorageLocation.get(name=location_name) ImageStoragePlacement.get(storage=storage, location=location) except ImageStorage.DoesNotExist: - storage = ImageStorage.create(checksum=blob_digest) + storage = ImageStorage.create(content_checksum=blob_digest) except ImageStoragePlacement.DoesNotExist: ImageStoragePlacement.create(storage=storage, location=location) diff --git a/data/model/image.py b/data/model/image.py index 078875417..96b01c8e6 100644 --- a/data/model/image.py +++ b/data/model/image.py @@ -284,10 +284,7 @@ def set_image_metadata(docker_image_id, namespace_name, repository_name, created except Image.DoesNotExist: raise DataModelException('No image with specified id and repository') - # We cleanup any old checksum in case it's a retry after a fail - fetched.storage.checksum = None fetched.created = datetime.now() - if created_date_str is not None: try: fetched.created = dateutil.parser.parse(created_date_str).replace(tzinfo=None) @@ -295,6 +292,11 @@ def set_image_metadata(docker_image_id, namespace_name, repository_name, created # parse raises different exceptions, so we cannot use a specific kind of handler here. pass + # We cleanup any old checksum in case it's a retry after a fail + fetched.v1_checksum = None + fetched.storage.checksum = None # TODO remove when storage checksums are no longer read + fetched.storage.content_checksum = None + fetched.comment = comment fetched.command = command fetched.v1_json_metadata = v1_json_metadata diff --git a/digest/checksums.py b/digest/checksums.py index ea30e4dc1..95a39ce96 100644 --- a/digest/checksums.py +++ b/digest/checksums.py @@ -75,6 +75,14 @@ def simple_checksum_handler(json_data): return h, fn +def content_checksum_handler(): + h = hashlib.sha256() + + def fn(buf): + h.update(buf) + return h, fn + + def compute_simple(fp, json_data): data = json_data + '\n' return 'sha256:{0}'.format(sha256_file(fp, data)) diff --git a/endpoints/v1/registry.py b/endpoints/v1/registry.py index 3d049c757..19915363c 100644 --- a/endpoints/v1/registry.py +++ b/endpoints/v1/registry.py @@ -249,6 +249,10 @@ def put_image_layer(namespace, repository, image_id): h, sum_hndlr = checksums.simple_checksum_handler(json_data) sr.add_handler(sum_hndlr) + # Add a handler which computes the content checksum only + ch, content_sum_hndlr = checksums.content_checksum_handler() + sr.add_handler(content_sum_hndlr) + # Stream write the data to storage. with database.CloseForLongOperation(app.config): try: @@ -278,6 +282,7 @@ def put_image_layer(namespace, repository, image_id): # We don't have a checksum stored yet, that's fine skipping the check. # Not removing the mark though, image is not downloadable yet. session['checksum'] = csums + session['content_checksum'] = 'sha256:{0}'.format(ch.hexdigest()) return make_response('true', 200) checksum = repo_image.storage.checksum @@ -339,8 +344,9 @@ def put_image_checksum(namespace, repository, image_id): abort(409, 'Cannot set checksum for image %(image_id)s', issue='image-write-error', image_id=image_id) - logger.debug('Storing image checksum') - err = store_checksum(repo_image.storage, checksum) + logger.debug('Storing image and content checksums') + content_checksum = session.get('content_checksum', None) + err = store_checksum(repo_image, checksum, content_checksum) if err: abort(400, err) @@ -429,14 +435,18 @@ def generate_ancestry(image_id, uuid, locations, parent_id=None, parent_uuid=Non store.put_content(locations, store.image_ancestry_path(uuid), json.dumps(data)) -def store_checksum(image_storage, checksum): +def store_checksum(image_with_storage, checksum, content_checksum): checksum_parts = checksum.split(':') if len(checksum_parts) != 2: return 'Invalid checksum format' # We store the checksum - image_storage.checksum = checksum - image_storage.save() + image_with_storage.storage.checksum = checksum # TODO remove when v1 checksums are on image only + image_with_storage.storage.content_checksum = content_checksum + image_with_storage.storage.save() + + image_with_storage.v1_checksum = checksum + image_with_storage.save() @v1_bp.route('/images//json', methods=['PUT']) diff --git a/initdb.py b/initdb.py index 33b8e2b5a..7d2218778 100644 --- a/initdb.py +++ b/initdb.py @@ -82,7 +82,7 @@ def __create_subtree(repo, structure, creator_username, parent, tag_map): new_image_locations = new_image.storage.locations new_image.storage.uuid = __gen_image_uuid(repo, image_num) new_image.storage.uploading = False - new_image.storage.checksum = checksum + new_image.storage.content_checksum = checksum new_image.storage.save() # Write some data for the storage.