Merge remote-tracking branch 'upstream/v2-phase4' into python-registry-v2

This commit is contained in:
Jake Moshenko 2015-10-22 16:59:28 -04:00
commit e7a6176594
105 changed files with 4439 additions and 2074 deletions

View file

@ -1,44 +1,50 @@
import logging
from data.database import ImageStorage, Image, db
from data.database import ImageStorage, Image, db, db_for_update
from app import app
LOGGER = logging.getLogger(__name__)
logger = logging.getLogger(__name__)
def backfill_aggregate_sizes():
""" Generates aggregate sizes for any image storage entries without them """
LOGGER.setLevel(logging.DEBUG)
LOGGER.debug('Aggregate sizes backfill: Began execution')
logger.debug('Aggregate sizes backfill: Began execution')
while True:
batch_storage_ids = list(ImageStorage
.select(ImageStorage.id)
.where(ImageStorage.aggregate_size >> None)
.limit(10))
batch_image_ids = list(Image
.select(Image.id)
.where(Image.aggregate_size >> None)
.limit(100))
if len(batch_storage_ids) == 0:
if len(batch_image_ids) == 0:
# There are no storages left to backfill. We're done!
LOGGER.debug('Aggregate sizes backfill: Backfill completed')
logger.debug('Aggregate sizes backfill: Backfill completed')
return
LOGGER.debug('Aggregate sizes backfill: Found %s records to update', len(batch_storage_ids))
for image_storage_id in batch_storage_ids:
LOGGER.debug('Updating image storage: %s', image_storage_id.id)
logger.debug('Aggregate sizes backfill: Found %s records to update', len(batch_image_ids))
for image_id in batch_image_ids:
logger.debug('Updating image : %s', image_id.id)
with app.config['DB_TRANSACTION_FACTORY'](db):
try:
storage = ImageStorage.select().where(ImageStorage.id == image_storage_id.id).get()
image = Image.select().where(Image.storage == storage).get()
image = (Image
.select(Image, ImageStorage)
.join(ImageStorage)
.where(Image.id == image_id)
.get())
aggregate_size = image.storage.image_size
image_ids = image.ancestors.split('/')[1:-1]
aggregate_size = storage.image_size
for image_id in image_ids:
current_image = Image.select().where(Image.id == image_id).join(ImageStorage)
aggregate_size += image.storage.image_size
to_add = db_for_update(Image
.select(Image, ImageStorage)
.join(ImageStorage)
.where(Image.id == image_id)).get()
aggregate_size += to_add.storage.image_size
storage.aggregate_size = aggregate_size
storage.save()
except ImageStorage.DoesNotExist:
pass
image.aggregate_size = aggregate_size
image.save()
except Image.DoesNotExist:
pass

View file

@ -0,0 +1,87 @@
import logging
from peewee import (CharField, BigIntegerField, BooleanField, ForeignKeyField, DateTimeField,
TextField)
from data.database import BaseModel, db, db_for_update
from app import app
logger = logging.getLogger(__name__)
class Repository(BaseModel):
pass
# Vendor the information from tables we will be writing to at the time of this migration
class ImageStorage(BaseModel):
created = DateTimeField(null=True)
comment = TextField(null=True)
command = TextField(null=True)
aggregate_size = BigIntegerField(null=True)
uploading = BooleanField(default=True, null=True)
class Image(BaseModel):
# This class is intentionally denormalized. Even though images are supposed
# to be globally unique we can't treat them as such for permissions and
# security reasons. So rather than Repository <-> Image being many to many
# each image now belongs to exactly one repository.
docker_image_id = CharField(index=True)
repository = ForeignKeyField(Repository)
# '/' separated list of ancestory ids, e.g. /1/2/6/7/10/
ancestors = CharField(index=True, default='/', max_length=64535, null=True)
storage = ForeignKeyField(ImageStorage, index=True, null=True)
created = DateTimeField(null=True)
comment = TextField(null=True)
command = TextField(null=True)
aggregate_size = BigIntegerField(null=True)
v1_json_metadata = TextField(null=True)
def backfill_image_fields():
""" Copies metadata from image storages to their images. """
logger.debug('Image metadata backfill: Began execution')
while True:
batch_image_ids = list(Image
.select(Image.id)
.join(ImageStorage)
.where(Image.created >> None, Image.comment >> None,
Image.command >> None, Image.aggregate_size >> None,
ImageStorage.uploading == False,
~((ImageStorage.created >> None) &
(ImageStorage.comment >> None) &
(ImageStorage.command >> None) &
(ImageStorage.aggregate_size >> None)))
.limit(100))
if len(batch_image_ids) == 0:
logger.debug('Image metadata backfill: Backfill completed')
return
logger.debug('Image metadata backfill: Found %s records to update', len(batch_image_ids))
for image_id in batch_image_ids:
logger.debug('Updating image: %s', image_id.id)
with app.config['DB_TRANSACTION_FACTORY'](db):
try:
image = db_for_update(Image
.select(Image, ImageStorage)
.join(ImageStorage)
.where(Image.id == image_id.id)).get()
image.created = image.storage.created
image.comment = image.storage.comment
image.command = image.storage.command
image.aggregate_size = image.storage.aggregate_size
image.save()
except Image.DoesNotExist:
pass
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('peewee').setLevel(logging.CRITICAL)
backfill_image_fields()

View file

@ -0,0 +1,72 @@
import logging
from peewee import JOIN_LEFT_OUTER
from data.database import (Image, ImageStorage, ImageStoragePlacement, ImageStorageLocation, db,
db_for_update)
from app import app, storage
from data import model
logger = logging.getLogger(__name__)
def image_json_path(storage_uuid):
base_path = storage.image_path(storage_uuid)
return '{0}json'.format(base_path)
def backfill_v1_metadata():
""" Copies metadata from image storages to their images. """
logger.debug('Image v1 metadata backfill: Began execution')
while True:
batch_image_ids = list(Image
.select(Image.id)
.join(ImageStorage)
.where(Image.v1_json_metadata >> None, ImageStorage.uploading == False)
.limit(100))
if len(batch_image_ids) == 0:
logger.debug('Image v1 metadata backfill: Backfill completed')
return
logger.debug('Image v1 metadata backfill: Found %s records to update', len(batch_image_ids))
for one_id in batch_image_ids:
with app.config['DB_TRANSACTION_FACTORY'](db):
try:
logger.debug('Loading image: %s', one_id.id)
raw_query = (ImageStoragePlacement
.select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation)
.join(ImageStorageLocation)
.switch(ImageStoragePlacement)
.join(ImageStorage, JOIN_LEFT_OUTER)
.join(Image)
.where(Image.id == one_id.id))
placement_query = db_for_update(raw_query)
repo_image_list = model.image.invert_placement_query_results(placement_query)
if len(repo_image_list) > 1:
logger.error('Found more images than we requested, something is wrong with the query')
return
repo_image = repo_image_list[0]
uuid = repo_image.storage.uuid
json_path = image_json_path(uuid)
logger.debug('Updating image: %s from: %s', repo_image.id, json_path)
try:
data = storage.get_content(repo_image.storage.locations, json_path)
except IOError:
data = None
logger.exception('failed to find v1 metadata, defaulting to None')
repo_image.v1_json_metadata = data
repo_image.save()
except ImageStoragePlacement.DoesNotExist:
pass
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
# logging.getLogger('peewee').setLevel(logging.CRITICAL)
backfill_v1_metadata()

View file

@ -5,7 +5,7 @@ from app import app
from data.database import configure, BaseModel, uuid_generator
from peewee import *
from bitbucket import BitBucket
from endpoints.trigger import BitbucketBuildTrigger
from buildtrigger.bitbuckethandler import BitbucketBuildTrigger
configure(app.config)

View file

@ -4,7 +4,7 @@ import json
from data.database import RepositoryBuildTrigger, BuildTriggerService, db, db_for_update
from app import app
from endpoints.trigger import BuildTriggerHandler
from buildtrigger.basehandler import BuildTriggerHandler
from util.security.ssh import generate_ssh_keypair
from github import GithubException
@ -24,7 +24,8 @@ def backfill_github_deploykeys():
.select(RepositoryBuildTrigger.id)
.where(RepositoryBuildTrigger.private_key >> None)
.where(RepositoryBuildTrigger.service == github_service)
.limit(10))
.where(RepositoryBuildTrigger.used_legacy_github >> None)
.limit(100))
filtered_ids = [trigger.id for trigger in build_trigger_ids if trigger.id not in encountered]
if len(filtered_ids) == 0:
@ -39,15 +40,22 @@ def backfill_github_deploykeys():
with app.config['DB_TRANSACTION_FACTORY'](db):
try:
query = RepositoryBuildTrigger.select(RepositoryBuildTrigger.id == trigger_id)
query = RepositoryBuildTrigger.select().where(RepositoryBuildTrigger.id == trigger_id)
trigger = db_for_update(query).get()
except RepositoryBuildTrigger.DoesNotExist:
logger.debug('Could not find build trigger %s', trigger_id)
continue
trigger.used_legacy_github = True
trigger.save()
handler = BuildTriggerHandler.get_handler(trigger)
config = handler.config
if not 'build_source' in config:
logger.debug('Could not find build source for trigger %s', trigger_id)
continue
build_source = config['build_source']
gh_client = handler._get_client()
@ -83,5 +91,8 @@ def backfill_github_deploykeys():
if __name__ == "__main__":
logging.getLogger('boto').setLevel(logging.CRITICAL)
logging.getLogger('github').setLevel(logging.CRITICAL)
logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False)
backfill_github_deploykeys()

View file

@ -67,7 +67,7 @@ def backfill_sizes_from_data():
decompressor = zlib.decompressobj(ZLIB_GZIP_WINDOW)
uncompressed_size = 0
with store.stream_read_file(with_locs.locations, store.image_layer_path(uuid)) as stream:
with store.stream_read_file(with_locs.locations, store.v1_image_layer_path(uuid)) as stream:
while True:
current_data = stream.read(CHUNK_SIZE)
if len(current_data) == 0: