Actually store the generated image storage in the database, and allow it to be garbage collected when the parent image storage is collected.

This commit is contained in:
Jake Moshenko 2014-09-18 17:26:40 -04:00
parent 43555af63d
commit 11bb8e6448
5 changed files with 120 additions and 50 deletions

View file

@ -192,7 +192,6 @@ class PermissionPrototype(BaseModel):
)
class AccessToken(BaseModel):
friendly_name = CharField(null=True)
code = CharField(default=random_string_generator(length=64), unique=True,
@ -238,6 +237,23 @@ class ImageStorage(BaseModel):
uploading = BooleanField(default=True, null=True)
class ImageStorageTransformation(BaseModel):
name = CharField(index=True, unique=True)
class DerivedImageStorage(BaseModel):
source = ForeignKeyField(ImageStorage, null=True, related_name='source')
derivative = ForeignKeyField(ImageStorage, related_name='derivative')
transformation = ForeignKeyField(ImageStorageTransformation)
class Meta:
database = db
read_slaves = (read_slave,)
indexes = (
(('source', 'transformation'), True),
)
class ImageStorageLocation(BaseModel):
name = CharField(unique=True, index=True)
@ -422,4 +438,4 @@ all_models = [User, Repository, Image, AccessToken, Role, RepositoryPermission,
OAuthApplication, OAuthAuthorizationCode, OAuthAccessToken, NotificationKind,
Notification, ImageStorageLocation, ImageStoragePlacement,
ExternalNotificationEvent, ExternalNotificationMethod, RepositoryNotification,
RepositoryAuthorizedEmail]
RepositoryAuthorizedEmail, ImageStorageTransformation, DerivedImageStorage]

View file

@ -70,6 +70,10 @@ class InvalidBuildTriggerException(DataModelException):
pass
class InvalidImageException(DataModelException):
pass
class TooManyUsersException(DataModelException):
pass
@ -1055,6 +1059,14 @@ def __translate_ancestry(old_ancestry, translations, repository, username, prefe
return '/%s/' % '/'.join(new_ids)
def _create_storage(location_name):
storage = ImageStorage.create()
location = ImageStorageLocation.get(name=location_name)
ImageStoragePlacement.create(location=location, storage=storage)
storage.locations = {location_name}
return storage
def find_create_or_link_image(docker_image_id, repository, username, translations,
preferred_location):
with config.app_config['DB_TRANSACTION_FACTORY'](db):
@ -1093,10 +1105,7 @@ def find_create_or_link_image(docker_image_id, repository, username, translation
origin_image_id = to_copy.id
except Image.DoesNotExist:
logger.debug('Creating new storage for docker id: %s', docker_image_id)
storage = ImageStorage.create()
location = ImageStorageLocation.get(name=preferred_location)
ImageStoragePlacement.create(location=location, storage=storage)
storage.locations = {preferred_location}
storage = _create_storage(preferred_location)
logger.debug('Storage locations: %s', storage.locations)
@ -1114,6 +1123,43 @@ def find_create_or_link_image(docker_image_id, repository, username, translation
return new_image
def find_or_create_derived_storage(source, transformation_name, preferred_location):
try:
found = (ImageStorage
.select(ImageStorage, DerivedImageStorage)
.join(DerivedImageStorage, on=(ImageStorage.id == DerivedImageStorage.derivative))
.join(ImageStorageTransformation)
.where(DerivedImageStorage.source == source,
ImageStorageTransformation.name == transformation_name)
.get())
found.locations = {placement.location.name for placement in found.imagestorageplacement_set}
return found
except ImageStorage.DoesNotExist:
logger.debug('Creating storage dervied from source: %s', source.uuid)
trans = ImageStorageTransformation.get(name=transformation_name)
new_storage = _create_storage(preferred_location)
DerivedImageStorage.create(source=source, derivative=new_storage, transformation=trans)
return new_storage
def get_storage_by_uuid(storage_uuid):
placements = list(ImageStoragePlacement
.select(ImageStoragePlacement, ImageStorage, ImageStorageLocation)
.join(ImageStorageLocation)
.switch(ImageStoragePlacement)
.join(ImageStorage)
.where(ImageStorage.uuid == storage_uuid))
if not placements:
raise InvalidImageException('No storage found with uuid: %s', storage_uuid)
found = placements[0].storage
found.locations = {placement.location.name for placement in placements}
return found
def set_image_size(docker_image_id, namespace_name, repository_name,
image_size):
try:
@ -1252,15 +1298,8 @@ def garbage_collect_repository(namespace_name, repository_name):
image_to_remove.delete_instance()
if uuids_to_check_for_gc:
storage_to_remove = (ImageStorage
.select()
.join(Image, JOIN_LEFT_OUTER)
.group_by(ImageStorage)
.where(ImageStorage.uuid << list(uuids_to_check_for_gc))
.having(fn.Count(Image.id) == 0))
for storage in storage_to_remove:
def remove_storages(query):
for storage in query:
logger.debug('Garbage collecting image storage: %s', storage.uuid)
image_path = config.store.image_path(storage.uuid)
@ -1269,7 +1308,24 @@ def garbage_collect_repository(namespace_name, repository_name):
placement.delete_instance()
config.store.remove({location_name}, image_path)
storage.delete_instance()
storage.delete_instance(recursive=True)
if uuids_to_check_for_gc:
storage_to_remove = (ImageStorage
.select()
.join(Image, JOIN_LEFT_OUTER)
.group_by(ImageStorage)
.where(ImageStorage.uuid << list(uuids_to_check_for_gc))
.having(fn.Count(Image.id) == 0))
remove_storages(storage_to_remove)
# Now remove any derived image storages whose sources have been removed
derived_storages_to_remove = (ImageStorage
.select()
.join(DerivedImageStorage, on=(ImageStorage.id == DerivedImageStorage.derivative))
.where(DerivedImageStorage.source >> None))
remove_storages(derived_storages_to_remove)
return len(to_remove)

View file

@ -1,26 +1,24 @@
import logging
import json
import hashlib
from flask import (make_response, request, session, Response, redirect,
Blueprint, abort, send_file, make_response)
from flask import redirect, Blueprint, abort, send_file
from app import storage as store, app
from auth.auth import process_auth
from auth.permissions import ReadRepositoryPermission
from data import model
from endpoints.registry import set_cache_headers
from data import database
from util.queuefile import QueueFile
from util.queueprocess import QueueProcess
from util.gzipwrap import GzipWrap
from util.streamlayerformat import StreamLayerMerger
from werkzeug.wsgi import wrap_file
verbs = Blueprint('verbs', __name__)
logger = logging.getLogger(__name__)
def _open_stream(namespace, repository, image_list):
def get_next_layer():
for current_image_id in image_list:
@ -32,20 +30,25 @@ def _open_stream(namespace, repository, image_list):
logger.debug('Returning image layer %s: %s' % (current_image_id, current_image_path))
yield current_image_stream
database.configure(app.config)
stream = GzipWrap(StreamLayerMerger(get_next_layer).get_generator())
return stream.read
def _write_synthetic_image_to_storage(namespace, repository, locations,
synthetic_image_id, queue_file):
# TODO: make sure this synthetic image expires!
image_path = store.image_layer_path(synthetic_image_id)
store.stream_write(locations, image_path, queue_file)
def _write_synthetic_image_to_storage(linked_storage_uuid, linked_locations, queue_file):
image_path = store.image_layer_path(linked_storage_uuid)
store.stream_write(linked_locations, image_path, queue_file)
queue_file.close()
database.configure(app.config)
done_uploading = model.get_storage_by_uuid(linked_storage_uuid)
done_uploading.uploading = False
done_uploading.save()
@verbs.route('/<namespace>/<repository>/<tag>/squash', methods=['GET'])
@process_auth
@set_cache_headers
def get_squashed_tag(namespace, repository, tag, headers):
def get_squashed_tag(namespace, repository, tag):
permission = ReadRepositoryPermission(namespace, repository)
if permission.can() or model.repository_is_public(namespace, repository):
# Lookup the requested tag.
@ -57,34 +60,26 @@ def get_squashed_tag(namespace, repository, tag, headers):
repo_image = model.get_repo_image(namespace, repository, tag_image.docker_image_id)
if not repo_image:
abort(404)
# Calculate a synthetic image ID by hashing the *image storage ID* with our
# secret. This is done to prevent the ID being guessable/overwritable by
# external pushes.
unhashed = str(repo_image.storage.id) + ':' + app.config['SECRET_KEY']
synthetic_image_id = hashlib.sha256(unhashed).hexdigest()
# Check to see if the synthetic image ID exists in storage. If so, we just return a 302.
logger.debug('Looking up synthetic image %s', synthetic_image_id)
locations = repo_image.storage.locations
saved_image_path = store.image_layer_path(synthetic_image_id)
if store.exists(locations, saved_image_path):
logger.debug('Synthetic image %s exists in storage', synthetic_image_id)
download_url = store.get_direct_download_url(locations, saved_image_path)
derived = model.find_or_create_derived_storage(repo_image.storage, 'squash',
store.preferred_locations[0])
if not derived.uploading:
logger.debug('Derived image %s exists in storage', derived.uuid)
derived_layer_path = store.image_layer_path(derived.uuid)
download_url = store.get_direct_download_url(derived.locations, derived_layer_path)
if download_url:
logger.debug('Redirecting to download URL for synthetic image %s', synthetic_image_id)
return redirect(download_url, code=302)
logger.debug('Redirecting to download URL for derived image %s', derived.uuid)
return redirect(download_url)
logger.debug('Sending cached synthetic image %s', synthetic_image_id)
return send_file(store.stream_read_file(locations, saved_image_path))
logger.debug('Sending cached derived image %s', derived.uuid)
return send_file(store.stream_read_file(derived.locations, derived_layer_path))
# Load the ancestry for the image.
logger.debug('Building and returning synthetic image %s', synthetic_image_id)
logger.debug('Building and returning derived image %s', derived.uuid)
uuid = repo_image.storage.uuid
ancestry_data = store.get_content(repo_image.storage.locations, store.image_ancestry_path(uuid))
full_image_list = json.loads(ancestry_data)
# Create a queue process to generate the data. The queue files will read from the process
# and send the results to the client and storage.
args = (namespace, repository, full_image_list)
@ -92,12 +87,12 @@ def get_squashed_tag(namespace, repository, tag, headers):
client_queue_file = QueueFile(queue_process.create_queue(), 'client')
storage_queue_file = QueueFile(queue_process.create_queue(), 'storage')
# Start building.
queue_process.run()
# Start the storage saving.
storage_args = (namespace, repository, locations, synthetic_image_id, storage_queue_file)
storage_args = (derived.uuid, derived.locations, storage_queue_file)
QueueProcess.run_process(_write_synthetic_image_to_storage, storage_args)
# Return the client's data.

View file

@ -243,6 +243,8 @@ def initialize_database():
ImageStorageLocation.create(name='local_eu')
ImageStorageLocation.create(name='local_us')
ImageStorageTransformation.create(name='squash')
# NOTE: These MUST be copied over to NotificationKind, since every external
# notification can also generate a Quay.io notification.
ExternalNotificationEvent.create(name='repo_push')

View file

@ -36,3 +36,4 @@ psycopg2
pyyaml
git+https://github.com/DevTable/aniso8601-fake.git
git+https://github.com/DevTable/anunidecode.git
gipc