Merge pull request #557 from coreos-inc/revert-migration

Revert "Merge pull request #491 from jakedt/migratebackp2"
This commit is contained in:
Silas Sewell 2015-09-28 16:13:50 -04:00
commit c4f938898a
11 changed files with 66 additions and 232 deletions

View file

@ -128,9 +128,10 @@ class BuildJob(object):
return False return False
full_command = '["/bin/sh", "-c", "%s"]' % cache_commands[step] full_command = '["/bin/sh", "-c", "%s"]' % cache_commands[step]
logger.debug('Checking step #%s: %s, %s == %s', step, image.id, image.command, full_command) logger.debug('Checking step #%s: %s, %s == %s', step, image.id,
image.storage.command, full_command)
return image.command == full_command return image.storage.command == full_command
path = tree.find_longest_path(base_image.id, checker) path = tree.find_longest_path(base_image.id, checker)
if not path: if not path:

View file

@ -1,24 +0,0 @@
"""Backfill image fields from image storages
Revision ID: 2e0380215d01
Revises: 3ff4fbc94644
Create Date: 2015-09-15 16:57:42.850246
"""
# revision identifiers, used by Alembic.
revision = '2e0380215d01'
down_revision = '3ff4fbc94644'
from alembic import op
import sqlalchemy as sa
from util.migrate.backfill_image_fields import backfill_image_fields
from util.migrate.backfill_v1_metadata import backfill_v1_metadata
def upgrade(tables):
backfill_image_fields()
backfill_v1_metadata()
def downgrade(tables):
pass

View file

@ -79,14 +79,7 @@ def get_repository_images_base(namespace_name, repository_name, query_modifier):
query = query_modifier(query) query = query_modifier(query)
return invert_placement_query_results(query) location_list = list(query)
def invert_placement_query_results(placement_query):
""" This method will take a query which returns placements, storages, and images, and have it
return images and their storages, along with the placement set on each storage.
"""
location_list = list(placement_query)
images = {} images = {}
for location in location_list: for location in location_list:
@ -333,7 +326,7 @@ def set_image_size(docker_image_id, namespace_name, repository_name, image_size,
try: try:
# TODO(jschorr): Switch to this faster route once we have full ancestor aggregate_size # TODO(jschorr): Switch to this faster route once we have full ancestor aggregate_size
# parent_image = Image.get(Image.id == ancestors[-1]) # parent_image = Image.get(Image.id == ancestors[-1])
# total_size = image_size + parent_image.aggregate_size # total_size = image_size + parent_image.storage.aggregate_size
total_size = (ImageStorage total_size = (ImageStorage
.select(fn.Sum(ImageStorage.image_size)) .select(fn.Sum(ImageStorage.image_size))
.join(Image) .join(Image)

View file

@ -68,7 +68,7 @@ def compute_tarsum(fp, json_data):
def simple_checksum_handler(json_data): def simple_checksum_handler(json_data):
h = hashlib.sha256(json_data.encode('utf8') + '\n') h = hashlib.sha256(json_data + '\n')
def fn(buf): def fn(buf):
h.update(buf) h.update(buf)

View file

@ -12,7 +12,11 @@ from util.cache import cache_control_flask_restful
def image_view(image, image_map, include_ancestors=True): def image_view(image, image_map, include_ancestors=True):
command = image.command extended_props = image
if image.storage and image.storage.id:
extended_props = image.storage
command = extended_props.command
def docker_id(aid): def docker_id(aid):
if not aid or not aid in image_map: if not aid or not aid in image_map:
@ -22,10 +26,10 @@ def image_view(image, image_map, include_ancestors=True):
image_data = { image_data = {
'id': image.docker_image_id, 'id': image.docker_image_id,
'created': format_date(image.created), 'created': format_date(extended_props.created),
'comment': image.comment, 'comment': extended_props.comment,
'command': json.loads(command) if command else None, 'command': json.loads(command) if command else None,
'size': image.storage.image_size, 'size': extended_props.image_size,
'uploading': image.storage.uploading, 'uploading': image.storage.uploading,
'sort_index': len(image.ancestors), 'sort_index': len(image.ancestors),
} }

View file

@ -248,7 +248,7 @@ class Repository(RepositoryParamResource):
tag_info = { tag_info = {
'name': tag.name, 'name': tag.name,
'image_id': tag.image.docker_image_id, 'image_id': tag.image.docker_image_id,
'size': tag.image.aggregate_size 'size': tag.image.storage.aggregate_size
} }
if tag.lifetime_start_ts > 0: if tag.lifetime_start_ts > 0:

View file

@ -211,10 +211,11 @@ def put_image_layer(namespace, repository, image_id):
try: try:
logger.debug('Retrieving image data') logger.debug('Retrieving image data')
uuid = repo_image.storage.uuid uuid = repo_image.storage.uuid
json_data = repo_image.v1_json_metadata json_data = store.get_content(repo_image.storage.locations, store.image_json_path(uuid))
except (AttributeError): except (IOError, AttributeError):
logger.exception('Exception when retrieving image data') logger.exception('Exception when retrieving image data')
abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) abort(404, 'Image %(image_id)s not found', issue='unknown-image',
image_id=image_id)
logger.debug('Retrieving image path info') logger.debug('Retrieving image path info')
layer_path = store.image_layer_path(uuid) layer_path = store.image_layer_path(uuid)
@ -330,8 +331,10 @@ def put_image_checksum(namespace, repository, image_id):
if not repo_image or not repo_image.storage: if not repo_image or not repo_image.storage:
abort(404, 'Image not found: %(image_id)s', issue='unknown-image', image_id=image_id) abort(404, 'Image not found: %(image_id)s', issue='unknown-image', image_id=image_id)
uuid = repo_image.storage.uuid
logger.debug('Looking up repo layer data') logger.debug('Looking up repo layer data')
if not repo_image.v1_json_metadata: if not store.exists(repo_image.storage.locations, store.image_json_path(uuid)):
abort(404, 'Image not found: %(image_id)s', issue='unknown-image', image_id=image_id) abort(404, 'Image not found: %(image_id)s', issue='unknown-image', image_id=image_id)
logger.debug('Marking image path') logger.debug('Marking image path')
@ -371,14 +374,19 @@ def get_image_json(namespace, repository, image_id, headers):
logger.debug('Looking up repo image') logger.debug('Looking up repo image')
repo_image = model.image.get_repo_image_extended(namespace, repository, image_id) repo_image = model.image.get_repo_image_extended(namespace, repository, image_id)
if repo_image is None:
logger.debug('Looking up repo layer data')
try:
uuid = repo_image.storage.uuid
data = store.get_content(repo_image.storage.locations, store.image_json_path(uuid))
except (IOError, AttributeError):
flask_abort(404) flask_abort(404)
logger.debug('Looking up repo layer size') logger.debug('Looking up repo layer size')
size = repo_image.storage.image_size size = repo_image.storage.image_size
headers['X-Docker-Size'] = str(size) headers['X-Docker-Size'] = str(size)
response = make_response(repo_image.v1_json_metadata, 200) response = make_response(data, 200)
response.headers.extend(headers) response.headers.extend(headers)
return response return response
@ -481,6 +489,8 @@ def put_image_json(namespace, repository, image_id):
model.tag.create_temporary_hidden_tag(repo, repo_image, model.tag.create_temporary_hidden_tag(repo, repo_image,
app.config['PUSH_TEMP_TAG_EXPIRATION_SEC']) app.config['PUSH_TEMP_TAG_EXPIRATION_SEC'])
uuid = repo_image.storage.uuid
if image_id != data['id']: if image_id != data['id']:
abort(400, 'JSON data contains invalid id for image: %(image_id)s', abort(400, 'JSON data contains invalid id for image: %(image_id)s',
issue='invalid-request', image_id=image_id) issue='invalid-request', image_id=image_id)
@ -498,12 +508,17 @@ def put_image_json(namespace, repository, image_id):
if parent_id: if parent_id:
logger.debug('Looking up parent image data') logger.debug('Looking up parent image data')
if parent_id and not parent_image.v1_json_metadata: if (parent_id and not
store.exists(parent_locations, store.image_json_path(parent_uuid))):
abort(400, 'Image %(image_id)s depends on non existing parent image %(parent_id)s', abort(400, 'Image %(image_id)s depends on non existing parent image %(parent_id)s',
issue='invalid-request', image_id=image_id, parent_id=parent_id) issue='invalid-request', image_id=image_id, parent_id=parent_id)
logger.debug('Looking up image storage paths')
json_path = store.image_json_path(uuid)
logger.debug('Checking if image already exists') logger.debug('Checking if image already exists')
if repo_image.v1_json_metadata and not image_is_uploading(repo_image): if (store.exists(repo_image.storage.locations, json_path) and not
image_is_uploading(repo_image)):
exact_abort(409, 'Image already exists') exact_abort(409, 'Image already exists')
set_uploading_flag(repo_image, True) set_uploading_flag(repo_image, True)
@ -519,8 +534,6 @@ def put_image_json(namespace, repository, image_id):
data.get('comment'), command, v1_metadata, parent_image) data.get('comment'), command, v1_metadata, parent_image)
logger.debug('Putting json path') logger.debug('Putting json path')
uuid = repo_image.storage.uuid
json_path = store.image_json_path(uuid)
store.put_content(repo_image.storage.locations, json_path, request.data) store.put_content(repo_image.storage.locations, json_path, request.data)
logger.debug('Generating image ancestry') logger.debug('Generating image ancestry')

View file

@ -114,15 +114,17 @@ def _verify_repo_verb(store, namespace, repository, tag, verb, checker=None):
abort(404) abort(404)
# Lookup the tag's image and storage. # Lookup the tag's image and storage.
repo_image = model.image.get_repo_image(namespace, repository, tag_image.docker_image_id) repo_image = model.image.get_repo_image_extended(namespace, repository, tag_image.docker_image_id)
if not repo_image: if not repo_image:
abort(404) abort(404)
# If there is a data checker, call it first. # If there is a data checker, call it first.
uuid = repo_image.storage.uuid
image_json = None image_json = None
if checker is not None: if checker is not None:
image_json = json.loads(repo_image.v1_json_metadata) image_json_data = store.get_content(repo_image.storage.locations, store.image_json_path(uuid))
image_json = json.loads(image_json_data)
if not checker(image_json): if not checker(image_json):
logger.debug('Check mismatch on %s/%s:%s, verb %s', namespace, repository, tag, verb) logger.debug('Check mismatch on %s/%s:%s, verb %s', namespace, repository, tag, verb)
@ -191,7 +193,8 @@ def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker=
# Load the image's JSON layer. # Load the image's JSON layer.
if not image_json: if not image_json:
image_json = json.loads(repo_image.v1_json_metadata) image_json_data = store.get_content(repo_image.storage.locations, store.image_json_path(uuid))
image_json = json.loads(image_json_data)
# Calculate a synthetic image ID. # Calculate a synthetic image ID.
synthetic_image_id = hashlib.sha256(tag_image.docker_image_id + ':' + verb).hexdigest() synthetic_image_id = hashlib.sha256(tag_image.docker_image_id + ':' + verb).hexdigest()

View file

@ -1,50 +1,44 @@
import logging import logging
from data.database import ImageStorage, Image, db, db_for_update from data.database import ImageStorage, Image, db
from app import app from app import app
LOGGER = logging.getLogger(__name__)
logger = logging.getLogger(__name__)
def backfill_aggregate_sizes(): def backfill_aggregate_sizes():
""" Generates aggregate sizes for any image storage entries without them """ """ Generates aggregate sizes for any image storage entries without them """
logger.debug('Aggregate sizes backfill: Began execution') LOGGER.setLevel(logging.DEBUG)
LOGGER.debug('Aggregate sizes backfill: Began execution')
while True: while True:
batch_image_ids = list(Image batch_storage_ids = list(ImageStorage
.select(Image.id) .select(ImageStorage.id)
.where(Image.aggregate_size >> None) .where(ImageStorage.aggregate_size >> None)
.limit(100)) .limit(10))
if len(batch_image_ids) == 0: if len(batch_storage_ids) == 0:
# There are no storages left to backfill. We're done! # There are no storages left to backfill. We're done!
logger.debug('Aggregate sizes backfill: Backfill completed') LOGGER.debug('Aggregate sizes backfill: Backfill completed')
return return
logger.debug('Aggregate sizes backfill: Found %s records to update', len(batch_image_ids)) LOGGER.debug('Aggregate sizes backfill: Found %s records to update', len(batch_storage_ids))
for image_id in batch_image_ids: for image_storage_id in batch_storage_ids:
logger.debug('Updating image : %s', image_id.id) LOGGER.debug('Updating image storage: %s', image_storage_id.id)
with app.config['DB_TRANSACTION_FACTORY'](db): with app.config['DB_TRANSACTION_FACTORY'](db):
try: try:
image = (Image storage = ImageStorage.select().where(ImageStorage.id == image_storage_id.id).get()
.select(Image, ImageStorage) image = Image.select().where(Image.storage == storage).get()
.join(ImageStorage)
.where(Image.id == image_id)
.get())
aggregate_size = image.storage.image_size
image_ids = image.ancestors.split('/')[1:-1] image_ids = image.ancestors.split('/')[1:-1]
aggregate_size = storage.image_size
for image_id in image_ids: for image_id in image_ids:
to_add = db_for_update(Image current_image = Image.select().where(Image.id == image_id).join(ImageStorage)
.select(Image, ImageStorage) aggregate_size += image.storage.image_size
.join(ImageStorage)
.where(Image.id == image_id)).get()
aggregate_size += to_add.storage.image_size
image.aggregate_size = aggregate_size storage.aggregate_size = aggregate_size
image.save() storage.save()
except ImageStorage.DoesNotExist:
pass
except Image.DoesNotExist: except Image.DoesNotExist:
pass pass

View file

@ -1,87 +0,0 @@
import logging
from peewee import (CharField, BigIntegerField, BooleanField, ForeignKeyField, DateTimeField,
TextField)
from data.database import BaseModel, db, db_for_update
from app import app
logger = logging.getLogger(__name__)
class Repository(BaseModel):
pass
# Vendor the information from tables we will be writing to at the time of this migration
class ImageStorage(BaseModel):
created = DateTimeField(null=True)
comment = TextField(null=True)
command = TextField(null=True)
aggregate_size = BigIntegerField(null=True)
uploading = BooleanField(default=True, null=True)
class Image(BaseModel):
# This class is intentionally denormalized. Even though images are supposed
# to be globally unique we can't treat them as such for permissions and
# security reasons. So rather than Repository <-> Image being many to many
# each image now belongs to exactly one repository.
docker_image_id = CharField(index=True)
repository = ForeignKeyField(Repository)
# '/' separated list of ancestory ids, e.g. /1/2/6/7/10/
ancestors = CharField(index=True, default='/', max_length=64535, null=True)
storage = ForeignKeyField(ImageStorage, index=True, null=True)
created = DateTimeField(null=True)
comment = TextField(null=True)
command = TextField(null=True)
aggregate_size = BigIntegerField(null=True)
v1_json_metadata = TextField(null=True)
def backfill_image_fields():
""" Copies metadata from image storages to their images. """
logger.debug('Image metadata backfill: Began execution')
while True:
batch_image_ids = list(Image
.select(Image.id)
.join(ImageStorage)
.where(Image.created >> None, Image.comment >> None,
Image.command >> None, Image.aggregate_size >> None,
ImageStorage.uploading == False,
~((ImageStorage.created >> None) &
(ImageStorage.comment >> None) &
(ImageStorage.command >> None) &
(ImageStorage.aggregate_size >> None)))
.limit(100))
if len(batch_image_ids) == 0:
logger.debug('Image metadata backfill: Backfill completed')
return
logger.debug('Image metadata backfill: Found %s records to update', len(batch_image_ids))
for image_id in batch_image_ids:
logger.debug('Updating image: %s', image_id.id)
with app.config['DB_TRANSACTION_FACTORY'](db):
try:
image = db_for_update(Image
.select(Image, ImageStorage)
.join(ImageStorage)
.where(Image.id == image_id.id)).get()
image.created = image.storage.created
image.comment = image.storage.comment
image.command = image.storage.command
image.aggregate_size = image.storage.aggregate_size
image.save()
except Image.DoesNotExist:
pass
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('peewee').setLevel(logging.CRITICAL)
backfill_image_fields()

View file

@ -1,63 +0,0 @@
import logging
from peewee import JOIN_LEFT_OUTER
from data.database import (Image, ImageStorage, ImageStoragePlacement, ImageStorageLocation, db,
db_for_update)
from app import app, storage
from data import model
logger = logging.getLogger(__name__)
def backfill_v1_metadata():
""" Copies metadata from image storages to their images. """
logger.debug('Image v1 metadata backfill: Began execution')
while True:
batch_image_ids = list(Image
.select(Image.id)
.join(ImageStorage)
.where(Image.v1_json_metadata >> None, ImageStorage.uploading == False)
.limit(100))
if len(batch_image_ids) == 0:
logger.debug('Image v1 metadata backfill: Backfill completed')
return
logger.debug('Image v1 metadata backfill: Found %s records to update', len(batch_image_ids))
for one_id in batch_image_ids:
with app.config['DB_TRANSACTION_FACTORY'](db):
try:
logger.debug('Loading image: %s', one_id.id)
raw_query = (ImageStoragePlacement
.select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation)
.join(ImageStorageLocation)
.switch(ImageStoragePlacement)
.join(ImageStorage, JOIN_LEFT_OUTER)
.join(Image)
.where(Image.id == one_id.id))
placement_query = db_for_update(raw_query)
repo_image_list = model.image.invert_placement_query_results(placement_query)
if len(repo_image_list) > 1:
logger.error('Found more images than we requested, something is wrong with the query')
return
repo_image = repo_image_list[0]
uuid = repo_image.storage.uuid
json_path = storage.image_json_path(uuid)
logger.debug('Updating image: %s from: %s', repo_image.id, json_path)
data = storage.get_content(repo_image.storage.locations, json_path)
repo_image.v1_json_metadata = data
repo_image.save()
except ImageStoragePlacement.DoesNotExist:
pass
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
# logging.getLogger('peewee').setLevel(logging.CRITICAL)
backfill_v1_metadata()