Move v1 checksums to image and track v2 separately

This commit is contained in:
Jake Moshenko 2015-11-04 16:18:53 -05:00 committed by Jimmy Zelinskie
parent 2b3633b107
commit 3d0bcbaaeb
8 changed files with 65 additions and 17 deletions

View file

@ -484,11 +484,12 @@ class EmailConfirmation(BaseModel):
class ImageStorage(BaseModel): class ImageStorage(BaseModel):
uuid = CharField(default=uuid_generator, index=True, unique=True) uuid = CharField(default=uuid_generator, index=True, unique=True)
checksum = CharField(null=True) checksum = CharField(null=True) # TODO remove when all checksums have been moved back to Image
image_size = BigIntegerField(null=True) image_size = BigIntegerField(null=True)
uncompressed_size = BigIntegerField(null=True) uncompressed_size = BigIntegerField(null=True)
uploading = BooleanField(default=True, null=True) uploading = BooleanField(default=True, null=True)
cas_path = BooleanField(default=True) cas_path = BooleanField(default=True)
content_checksum = CharField(null=True, index=True)
class ImageStorageTransformation(BaseModel): class ImageStorageTransformation(BaseModel):
@ -570,6 +571,7 @@ class Image(BaseModel):
command = TextField(null=True) command = TextField(null=True)
aggregate_size = BigIntegerField(null=True) aggregate_size = BigIntegerField(null=True)
v1_json_metadata = TextField(null=True) v1_json_metadata = TextField(null=True)
v1_checksum = CharField(null=True)
class Meta: class Meta:
database = db database = db

View file

@ -0,0 +1,30 @@
"""Separate v1 and v2 checksums.
Revision ID: 2827d36939e4
Revises: 73669db7e12
Create Date: 2015-11-04 16:29:48.905775
"""
# revision identifiers, used by Alembic.
revision = '2827d36939e4'
down_revision = '73669db7e12'
from alembic import op
import sqlalchemy as sa
def upgrade(tables):
### commands auto generated by Alembic - please adjust! ###
op.add_column('image', sa.Column('v1_checksum', sa.String(length=255), nullable=True))
op.add_column('imagestorage', sa.Column('content_checksum', sa.String(length=255), nullable=True))
op.create_index('imagestorage_content_checksum', 'imagestorage', ['content_checksum'], unique=False)
### end Alembic commands ###
def downgrade(tables):
### commands auto generated by Alembic - please adjust! ###
op.drop_index('imagestorage_content_checksum', table_name='imagestorage')
op.drop_column('imagestorage', 'content_checksum')
op.drop_column('image', 'v1_checksum')
### end Alembic commands ###

View file

@ -12,10 +12,6 @@ down_revision = '35f538da62'
from alembic import op from alembic import op
import sqlalchemy as sa import sqlalchemy as sa
<<<<<<< HEAD
=======
from sqlalchemy.dialects import mysql
>>>>>>> Remove the used_legacy_github column
def upgrade(tables): def upgrade(tables):
### commands auto generated by Alembic - please adjust! ### ### commands auto generated by Alembic - please adjust! ###

View file

@ -17,7 +17,7 @@ def get_repo_blob_by_digest(namespace, repo_name, blob_digest):
.join(Repository) .join(Repository)
.join(Namespace) .join(Namespace)
.where(Repository.name == repo_name, Namespace.username == namespace, .where(Repository.name == repo_name, Namespace.username == namespace,
ImageStorage.checksum == blob_digest)) ImageStorage.content_checksum == blob_digest))
if not placements: if not placements:
raise BlobDoesNotExist('Blob does not exist with digest: {0}'.format(blob_digest)) raise BlobDoesNotExist('Blob does not exist with digest: {0}'.format(blob_digest))
@ -35,11 +35,11 @@ def store_blob_record_and_temp_link(namespace, repo_name, blob_digest, location_
repo = _basequery.get_existing_repository(namespace, repo_name) repo = _basequery.get_existing_repository(namespace, repo_name)
try: try:
storage = ImageStorage.get(checksum=blob_digest) storage = ImageStorage.get(content_checksum=blob_digest)
location = ImageStorageLocation.get(name=location_name) location = ImageStorageLocation.get(name=location_name)
ImageStoragePlacement.get(storage=storage, location=location) ImageStoragePlacement.get(storage=storage, location=location)
except ImageStorage.DoesNotExist: except ImageStorage.DoesNotExist:
storage = ImageStorage.create(checksum=blob_digest) storage = ImageStorage.create(content_checksum=blob_digest)
except ImageStoragePlacement.DoesNotExist: except ImageStoragePlacement.DoesNotExist:
ImageStoragePlacement.create(storage=storage, location=location) ImageStoragePlacement.create(storage=storage, location=location)

View file

@ -284,10 +284,7 @@ def set_image_metadata(docker_image_id, namespace_name, repository_name, created
except Image.DoesNotExist: except Image.DoesNotExist:
raise DataModelException('No image with specified id and repository') raise DataModelException('No image with specified id and repository')
# We cleanup any old checksum in case it's a retry after a fail
fetched.storage.checksum = None
fetched.created = datetime.now() fetched.created = datetime.now()
if created_date_str is not None: if created_date_str is not None:
try: try:
fetched.created = dateutil.parser.parse(created_date_str).replace(tzinfo=None) fetched.created = dateutil.parser.parse(created_date_str).replace(tzinfo=None)
@ -295,6 +292,11 @@ def set_image_metadata(docker_image_id, namespace_name, repository_name, created
# parse raises different exceptions, so we cannot use a specific kind of handler here. # parse raises different exceptions, so we cannot use a specific kind of handler here.
pass pass
# We cleanup any old checksum in case it's a retry after a fail
fetched.v1_checksum = None
fetched.storage.checksum = None # TODO remove when storage checksums are no longer read
fetched.storage.content_checksum = None
fetched.comment = comment fetched.comment = comment
fetched.command = command fetched.command = command
fetched.v1_json_metadata = v1_json_metadata fetched.v1_json_metadata = v1_json_metadata

View file

@ -75,6 +75,14 @@ def simple_checksum_handler(json_data):
return h, fn return h, fn
def content_checksum_handler():
h = hashlib.sha256()
def fn(buf):
h.update(buf)
return h, fn
def compute_simple(fp, json_data): def compute_simple(fp, json_data):
data = json_data + '\n' data = json_data + '\n'
return 'sha256:{0}'.format(sha256_file(fp, data)) return 'sha256:{0}'.format(sha256_file(fp, data))

View file

@ -249,6 +249,10 @@ def put_image_layer(namespace, repository, image_id):
h, sum_hndlr = checksums.simple_checksum_handler(json_data) h, sum_hndlr = checksums.simple_checksum_handler(json_data)
sr.add_handler(sum_hndlr) sr.add_handler(sum_hndlr)
# Add a handler which computes the content checksum only
ch, content_sum_hndlr = checksums.content_checksum_handler()
sr.add_handler(content_sum_hndlr)
# Stream write the data to storage. # Stream write the data to storage.
with database.CloseForLongOperation(app.config): with database.CloseForLongOperation(app.config):
try: try:
@ -278,6 +282,7 @@ def put_image_layer(namespace, repository, image_id):
# We don't have a checksum stored yet, that's fine skipping the check. # We don't have a checksum stored yet, that's fine skipping the check.
# Not removing the mark though, image is not downloadable yet. # Not removing the mark though, image is not downloadable yet.
session['checksum'] = csums session['checksum'] = csums
session['content_checksum'] = 'sha256:{0}'.format(ch.hexdigest())
return make_response('true', 200) return make_response('true', 200)
checksum = repo_image.storage.checksum checksum = repo_image.storage.checksum
@ -339,8 +344,9 @@ def put_image_checksum(namespace, repository, image_id):
abort(409, 'Cannot set checksum for image %(image_id)s', abort(409, 'Cannot set checksum for image %(image_id)s',
issue='image-write-error', image_id=image_id) issue='image-write-error', image_id=image_id)
logger.debug('Storing image checksum') logger.debug('Storing image and content checksums')
err = store_checksum(repo_image.storage, checksum) content_checksum = session.get('content_checksum', None)
err = store_checksum(repo_image, checksum, content_checksum)
if err: if err:
abort(400, err) abort(400, err)
@ -429,14 +435,18 @@ def generate_ancestry(image_id, uuid, locations, parent_id=None, parent_uuid=Non
store.put_content(locations, store.image_ancestry_path(uuid), json.dumps(data)) store.put_content(locations, store.image_ancestry_path(uuid), json.dumps(data))
def store_checksum(image_storage, checksum): def store_checksum(image_with_storage, checksum, content_checksum):
checksum_parts = checksum.split(':') checksum_parts = checksum.split(':')
if len(checksum_parts) != 2: if len(checksum_parts) != 2:
return 'Invalid checksum format' return 'Invalid checksum format'
# We store the checksum # We store the checksum
image_storage.checksum = checksum image_with_storage.storage.checksum = checksum # TODO remove when v1 checksums are on image only
image_storage.save() image_with_storage.storage.content_checksum = content_checksum
image_with_storage.storage.save()
image_with_storage.v1_checksum = checksum
image_with_storage.save()
@v1_bp.route('/images/<image_id>/json', methods=['PUT']) @v1_bp.route('/images/<image_id>/json', methods=['PUT'])

View file

@ -82,7 +82,7 @@ def __create_subtree(repo, structure, creator_username, parent, tag_map):
new_image_locations = new_image.storage.locations new_image_locations = new_image.storage.locations
new_image.storage.uuid = __gen_image_uuid(repo, image_num) new_image.storage.uuid = __gen_image_uuid(repo, image_num)
new_image.storage.uploading = False new_image.storage.uploading = False
new_image.storage.checksum = checksum new_image.storage.content_checksum = checksum
new_image.storage.save() new_image.storage.save()
# Write some data for the storage. # Write some data for the storage.