From 278bc736e30dc893917f6368e1fceb80c1d71a81 Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Thu, 22 Oct 2015 16:02:07 -0400 Subject: [PATCH] Revert "Merge pull request #682 from jzelinskie/revertrevert" This reverts commit 627ad25c9c409cb39fdc16a8aada55318f173827, reversing changes made to 31c392feccfe995f70f9fabbb2a01cbfc95b46ef. --- buildman/jobutil/buildjob.py | 5 +- data/model/image.py | 11 +-- digest/checksums.py | 2 +- endpoints/api/image.py | 12 ++-- endpoints/api/repository.py | 2 +- endpoints/v1/registry.py | 32 ++++++--- endpoints/verbs.py | 9 ++- util/migrate/backfill_aggregate_sizes.py | 50 ++++++-------- util/migrate/backfill_image_fields.py | 87 ------------------------ util/migrate/backfill_v1_metadata.py | 67 ------------------ 10 files changed, 65 insertions(+), 212 deletions(-) delete mode 100644 util/migrate/backfill_image_fields.py delete mode 100644 util/migrate/backfill_v1_metadata.py diff --git a/buildman/jobutil/buildjob.py b/buildman/jobutil/buildjob.py index dbbb8113f..f6291f62b 100644 --- a/buildman/jobutil/buildjob.py +++ b/buildman/jobutil/buildjob.py @@ -128,9 +128,10 @@ class BuildJob(object): return False full_command = '["/bin/sh", "-c", "%s"]' % cache_commands[step] - logger.debug('Checking step #%s: %s, %s == %s', step, image.id, image.command, full_command) + logger.debug('Checking step #%s: %s, %s == %s', step, image.id, + image.storage.command, full_command) - return image.command == full_command + return image.storage.command == full_command path = tree.find_longest_path(base_image.id, checker) if not path: diff --git a/data/model/image.py b/data/model/image.py index 82953cd86..00707d176 100644 --- a/data/model/image.py +++ b/data/model/image.py @@ -79,14 +79,7 @@ def get_repository_images_base(namespace_name, repository_name, query_modifier): query = query_modifier(query) - return invert_placement_query_results(query) - - -def invert_placement_query_results(placement_query): - """ This method will take a query which returns placements, storages, and images, and have it - return images and their storages, along with the placement set on each storage. - """ - location_list = list(placement_query) + location_list = list(query) images = {} for location in location_list: @@ -341,6 +334,7 @@ def set_image_size(docker_image_id, namespace_name, repository_name, image_size, try: # TODO(jschorr): Switch to this faster route once we have full ancestor aggregate_size # parent_image = Image.get(Image.id == ancestors[-1]) + # total_size = image_size + parent_image.storage.aggregate_size ancestor_size = (ImageStorage .select(fn.Sum(ImageStorage.image_size)) .join(Image) @@ -349,7 +343,6 @@ def set_image_size(docker_image_id, namespace_name, repository_name, image_size, # TODO stop writing to storage when all readers are removed if ancestor_size is not None: - # total_size = image_size + parent_image.storage.aggregate_size total_size = ancestor_size + image_size image.storage.aggregate_size = total_size image.aggregate_size = total_size diff --git a/digest/checksums.py b/digest/checksums.py index ea30e4dc1..154907823 100644 --- a/digest/checksums.py +++ b/digest/checksums.py @@ -68,7 +68,7 @@ def compute_tarsum(fp, json_data): def simple_checksum_handler(json_data): - h = hashlib.sha256(json_data.encode('utf8') + '\n') + h = hashlib.sha256(json_data + '\n') def fn(buf): h.update(buf) diff --git a/endpoints/api/image.py b/endpoints/api/image.py index 057e02cae..014bed412 100644 --- a/endpoints/api/image.py +++ b/endpoints/api/image.py @@ -12,7 +12,11 @@ from util.cache import cache_control_flask_restful def image_view(image, image_map, include_ancestors=True): - command = image.command + extended_props = image + if image.storage and image.storage.id: + extended_props = image.storage + + command = extended_props.command def docker_id(aid): if not aid or not aid in image_map: @@ -22,10 +26,10 @@ def image_view(image, image_map, include_ancestors=True): image_data = { 'id': image.docker_image_id, - 'created': format_date(image.created), - 'comment': image.comment, + 'created': format_date(extended_props.created), + 'comment': extended_props.comment, 'command': json.loads(command) if command else None, - 'size': image.storage.image_size, + 'size': extended_props.image_size, 'uploading': image.storage.uploading, 'sort_index': len(image.ancestors), } diff --git a/endpoints/api/repository.py b/endpoints/api/repository.py index b9664864e..d177403c5 100644 --- a/endpoints/api/repository.py +++ b/endpoints/api/repository.py @@ -254,7 +254,7 @@ class Repository(RepositoryParamResource): tag_info = { 'name': tag.name, 'image_id': tag.image.docker_image_id, - 'size': tag.image.aggregate_size + 'size': tag.image.storage.aggregate_size } if tag.lifetime_start_ts > 0: diff --git a/endpoints/v1/registry.py b/endpoints/v1/registry.py index 636da6f30..082a07864 100644 --- a/endpoints/v1/registry.py +++ b/endpoints/v1/registry.py @@ -211,10 +211,11 @@ def put_image_layer(namespace, repository, image_id): try: logger.debug('Retrieving image data') uuid = repo_image.storage.uuid - json_data = repo_image.v1_json_metadata - except (AttributeError): + json_data = store.get_content(repo_image.storage.locations, store.image_json_path(uuid)) + except (IOError, AttributeError): logger.exception('Exception when retrieving image data') - abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) + abort(404, 'Image %(image_id)s not found', issue='unknown-image', + image_id=image_id) logger.debug('Retrieving image path info') layer_path = store.image_layer_path(uuid) @@ -331,7 +332,8 @@ def put_image_checksum(namespace, repository, image_id): abort(404, 'Image not found: %(image_id)s', issue='unknown-image', image_id=image_id) logger.debug('Looking up repo layer data') - if not repo_image.v1_json_metadata: + uuid = repo_image.storage.uuid + if not store.exists(repo_image.storage.locations, store.image_json_path(uuid)): abort(404, 'Image not found: %(image_id)s', issue='unknown-image', image_id=image_id) logger.debug('Marking image path') @@ -371,7 +373,12 @@ def get_image_json(namespace, repository, image_id, headers): logger.debug('Looking up repo image') repo_image = model.image.get_repo_image_extended(namespace, repository, image_id) - if repo_image is None: + + logger.debug('Looking up repo layer data') + try: + uuid = repo_image.storage.uuid + data = store.get_content(repo_image.storage.locations, store.image_json_path(uuid)) + except (IOError, AttributeError): flask_abort(404) logger.debug('Looking up repo layer size') @@ -381,7 +388,7 @@ def get_image_json(namespace, repository, image_id, headers): # so handle this case rather than failing. headers['X-Docker-Size'] = str(size) - response = make_response(repo_image.v1_json_metadata, 200) + response = make_response(data, 200) response.headers.extend(headers) return response @@ -484,6 +491,8 @@ def put_image_json(namespace, repository, image_id): model.tag.create_temporary_hidden_tag(repo, repo_image, app.config['PUSH_TEMP_TAG_EXPIRATION_SEC']) + uuid = repo_image.storage.uuid + if image_id != data['id']: abort(400, 'JSON data contains invalid id for image: %(image_id)s', issue='invalid-request', image_id=image_id) @@ -501,12 +510,17 @@ def put_image_json(namespace, repository, image_id): if parent_id: logger.debug('Looking up parent image data') - if parent_id and not parent_image.v1_json_metadata: + if (parent_id and not + store.exists(parent_locations, store.image_json_path(parent_uuid))): abort(400, 'Image %(image_id)s depends on non existing parent image %(parent_id)s', issue='invalid-request', image_id=image_id, parent_id=parent_id) + logger.debug('Looking up image storage paths') + json_path = store.image_json_path(uuid) + logger.debug('Checking if image already exists') - if repo_image.v1_json_metadata and not image_is_uploading(repo_image): + if (store.exists(repo_image.storage.locations, json_path) and not + image_is_uploading(repo_image)): exact_abort(409, 'Image already exists') set_uploading_flag(repo_image, True) @@ -522,8 +536,6 @@ def put_image_json(namespace, repository, image_id): data.get('comment'), command, v1_metadata, parent_image) logger.debug('Putting json path') - uuid = repo_image.storage.uuid - json_path = store.image_json_path(uuid) store.put_content(repo_image.storage.locations, json_path, request.data) logger.debug('Generating image ancestry') diff --git a/endpoints/verbs.py b/endpoints/verbs.py index 6201bd897..be0067d1d 100644 --- a/endpoints/verbs.py +++ b/endpoints/verbs.py @@ -114,15 +114,17 @@ def _verify_repo_verb(store, namespace, repository, tag, verb, checker=None): abort(404) # Lookup the tag's image and storage. - repo_image = model.image.get_repo_image(namespace, repository, tag_image.docker_image_id) + repo_image = model.image.get_repo_image_extended(namespace, repository, tag_image.docker_image_id) if not repo_image: abort(404) # If there is a data checker, call it first. + uuid = repo_image.storage.uuid image_json = None if checker is not None: - image_json = json.loads(repo_image.v1_json_metadata) + image_json_data = store.get_content(repo_image.storage.locations, store.image_json_path(uuid)) + image_json = json.loads(image_json_data) if not checker(image_json): logger.debug('Check mismatch on %s/%s:%s, verb %s', namespace, repository, tag, verb) @@ -191,7 +193,8 @@ def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker= # Load the image's JSON layer. if not image_json: - image_json = json.loads(repo_image.v1_json_metadata) + image_json_data = store.get_content(repo_image.storage.locations, store.image_json_path(uuid)) + image_json = json.loads(image_json_data) # Calculate a synthetic image ID. synthetic_image_id = hashlib.sha256(tag_image.docker_image_id + ':' + verb).hexdigest() diff --git a/util/migrate/backfill_aggregate_sizes.py b/util/migrate/backfill_aggregate_sizes.py index db6f195f2..8e624753b 100644 --- a/util/migrate/backfill_aggregate_sizes.py +++ b/util/migrate/backfill_aggregate_sizes.py @@ -1,50 +1,44 @@ import logging -from data.database import ImageStorage, Image, db, db_for_update +from data.database import ImageStorage, Image, db from app import app - -logger = logging.getLogger(__name__) - +LOGGER = logging.getLogger(__name__) def backfill_aggregate_sizes(): """ Generates aggregate sizes for any image storage entries without them """ - logger.debug('Aggregate sizes backfill: Began execution') + LOGGER.setLevel(logging.DEBUG) + LOGGER.debug('Aggregate sizes backfill: Began execution') while True: - batch_image_ids = list(Image - .select(Image.id) - .where(Image.aggregate_size >> None) - .limit(100)) + batch_storage_ids = list(ImageStorage + .select(ImageStorage.id) + .where(ImageStorage.aggregate_size >> None) + .limit(10)) - if len(batch_image_ids) == 0: + if len(batch_storage_ids) == 0: # There are no storages left to backfill. We're done! - logger.debug('Aggregate sizes backfill: Backfill completed') + LOGGER.debug('Aggregate sizes backfill: Backfill completed') return - logger.debug('Aggregate sizes backfill: Found %s records to update', len(batch_image_ids)) - for image_id in batch_image_ids: - logger.debug('Updating image : %s', image_id.id) + LOGGER.debug('Aggregate sizes backfill: Found %s records to update', len(batch_storage_ids)) + for image_storage_id in batch_storage_ids: + LOGGER.debug('Updating image storage: %s', image_storage_id.id) with app.config['DB_TRANSACTION_FACTORY'](db): try: - image = (Image - .select(Image, ImageStorage) - .join(ImageStorage) - .where(Image.id == image_id) - .get()) - - aggregate_size = image.storage.image_size + storage = ImageStorage.select().where(ImageStorage.id == image_storage_id.id).get() + image = Image.select().where(Image.storage == storage).get() image_ids = image.ancestors.split('/')[1:-1] + aggregate_size = storage.image_size for image_id in image_ids: - to_add = db_for_update(Image - .select(Image, ImageStorage) - .join(ImageStorage) - .where(Image.id == image_id)).get() - aggregate_size += to_add.storage.image_size + current_image = Image.select().where(Image.id == image_id).join(ImageStorage) + aggregate_size += image.storage.image_size - image.aggregate_size = aggregate_size - image.save() + storage.aggregate_size = aggregate_size + storage.save() + except ImageStorage.DoesNotExist: + pass except Image.DoesNotExist: pass diff --git a/util/migrate/backfill_image_fields.py b/util/migrate/backfill_image_fields.py deleted file mode 100644 index 184cc8a42..000000000 --- a/util/migrate/backfill_image_fields.py +++ /dev/null @@ -1,87 +0,0 @@ -import logging - -from peewee import (CharField, BigIntegerField, BooleanField, ForeignKeyField, DateTimeField, - TextField) -from data.database import BaseModel, db, db_for_update -from app import app - - -logger = logging.getLogger(__name__) - - -class Repository(BaseModel): - pass - - -# Vendor the information from tables we will be writing to at the time of this migration -class ImageStorage(BaseModel): - created = DateTimeField(null=True) - comment = TextField(null=True) - command = TextField(null=True) - aggregate_size = BigIntegerField(null=True) - uploading = BooleanField(default=True, null=True) - - -class Image(BaseModel): - # This class is intentionally denormalized. Even though images are supposed - # to be globally unique we can't treat them as such for permissions and - # security reasons. So rather than Repository <-> Image being many to many - # each image now belongs to exactly one repository. - docker_image_id = CharField(index=True) - repository = ForeignKeyField(Repository) - - # '/' separated list of ancestory ids, e.g. /1/2/6/7/10/ - ancestors = CharField(index=True, default='/', max_length=64535, null=True) - - storage = ForeignKeyField(ImageStorage, index=True, null=True) - - created = DateTimeField(null=True) - comment = TextField(null=True) - command = TextField(null=True) - aggregate_size = BigIntegerField(null=True) - v1_json_metadata = TextField(null=True) - - -def backfill_image_fields(): - """ Copies metadata from image storages to their images. """ - logger.debug('Image metadata backfill: Began execution') - while True: - batch_image_ids = list(Image - .select(Image.id) - .join(ImageStorage) - .where(Image.created >> None, Image.comment >> None, - Image.command >> None, Image.aggregate_size >> None, - ImageStorage.uploading == False, - ~((ImageStorage.created >> None) & - (ImageStorage.comment >> None) & - (ImageStorage.command >> None) & - (ImageStorage.aggregate_size >> None))) - .limit(100)) - - if len(batch_image_ids) == 0: - logger.debug('Image metadata backfill: Backfill completed') - return - - logger.debug('Image metadata backfill: Found %s records to update', len(batch_image_ids)) - for image_id in batch_image_ids: - logger.debug('Updating image: %s', image_id.id) - - with app.config['DB_TRANSACTION_FACTORY'](db): - try: - image = db_for_update(Image - .select(Image, ImageStorage) - .join(ImageStorage) - .where(Image.id == image_id.id)).get() - - image.created = image.storage.created - image.comment = image.storage.comment - image.command = image.storage.command - image.aggregate_size = image.storage.aggregate_size - image.save() - except Image.DoesNotExist: - pass - -if __name__ == "__main__": - logging.basicConfig(level=logging.DEBUG) - logging.getLogger('peewee').setLevel(logging.CRITICAL) - backfill_image_fields() diff --git a/util/migrate/backfill_v1_metadata.py b/util/migrate/backfill_v1_metadata.py deleted file mode 100644 index be7a37c93..000000000 --- a/util/migrate/backfill_v1_metadata.py +++ /dev/null @@ -1,67 +0,0 @@ -import logging - -from peewee import JOIN_LEFT_OUTER - -from data.database import (Image, ImageStorage, ImageStoragePlacement, ImageStorageLocation, db, - db_for_update) -from app import app, storage -from data import model - - -logger = logging.getLogger(__name__) - - -def backfill_v1_metadata(): - """ Copies metadata from image storages to their images. """ - logger.debug('Image v1 metadata backfill: Began execution') - while True: - batch_image_ids = list(Image - .select(Image.id) - .join(ImageStorage) - .where(Image.v1_json_metadata >> None, ImageStorage.uploading == False) - .limit(100)) - - if len(batch_image_ids) == 0: - logger.debug('Image v1 metadata backfill: Backfill completed') - return - - logger.debug('Image v1 metadata backfill: Found %s records to update', len(batch_image_ids)) - for one_id in batch_image_ids: - with app.config['DB_TRANSACTION_FACTORY'](db): - try: - logger.debug('Loading image: %s', one_id.id) - - raw_query = (ImageStoragePlacement - .select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation) - .join(ImageStorageLocation) - .switch(ImageStoragePlacement) - .join(ImageStorage, JOIN_LEFT_OUTER) - .join(Image) - .where(Image.id == one_id.id)) - - placement_query = db_for_update(raw_query) - - repo_image_list = model.image.invert_placement_query_results(placement_query) - if len(repo_image_list) > 1: - logger.error('Found more images than we requested, something is wrong with the query') - return - - repo_image = repo_image_list[0] - uuid = repo_image.storage.uuid - json_path = storage.image_json_path(uuid) - - logger.debug('Updating image: %s from: %s', repo_image.id, json_path) - try: - data = storage.get_content(repo_image.storage.locations, json_path) - except IOError: - data = None - logger.exception('failed to find v1 metadata, defaulting to None') - repo_image.v1_json_metadata = data - repo_image.save() - except ImageStoragePlacement.DoesNotExist: - pass - -if __name__ == "__main__": - logging.basicConfig(level=logging.DEBUG) - # logging.getLogger('peewee').setLevel(logging.CRITICAL) - backfill_v1_metadata()