Refactor the util directory to use subpackages.

This commit is contained in:
Jake Moshenko 2015-08-03 15:49:10 -04:00
parent 974ccaa2e7
commit 18100be481
46 changed files with 36 additions and 39 deletions

0
util/migrate/__init__.py Normal file
View file

View file

@ -0,0 +1,49 @@
import logging
from data.database import ImageStorage, Image, db
from app import app
LOGGER = logging.getLogger(__name__)
def backfill_aggregate_sizes():
""" Generates aggregate sizes for any image storage entries without them """
LOGGER.setLevel(logging.DEBUG)
LOGGER.debug('Aggregate sizes backfill: Began execution')
while True:
batch_storage_ids = list(ImageStorage
.select(ImageStorage.id)
.where(ImageStorage.aggregate_size >> None)
.limit(10))
if len(batch_storage_ids) == 0:
# There are no storages left to backfill. We're done!
LOGGER.debug('Aggregate sizes backfill: Backfill completed')
return
LOGGER.debug('Aggregate sizes backfill: Found %s records to update', len(batch_storage_ids))
for image_storage_id in batch_storage_ids:
LOGGER.debug('Updating image storage: %s', image_storage_id.id)
with app.config['DB_TRANSACTION_FACTORY'](db):
try:
storage = ImageStorage.select().where(ImageStorage.id == image_storage_id.id).get()
image = Image.select().where(Image.storage == storage).get()
image_ids = image.ancestors.split('/')[1:-1]
aggregate_size = storage.image_size
for image_id in image_ids:
current_image = Image.select().where(Image.id == image_id).join(ImageStorage)
aggregate_size += image.storage.image_size
storage.aggregate_size = aggregate_size
storage.save()
except ImageStorage.DoesNotExist:
pass
except Image.DoesNotExist:
pass
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('peewee').setLevel(logging.CRITICAL)
backfill_aggregate_sizes()

View file

@ -0,0 +1,54 @@
import logging
import uuid
from data.database import User, db
from app import app
LOGGER = logging.getLogger(__name__)
def backfill_user_uuids():
""" Generates UUIDs for any Users without them. """
LOGGER.setLevel(logging.DEBUG)
LOGGER.debug('User UUID Backfill: Began execution')
# Check to see if any users are missing uuids.
has_missing_uuids = True
try:
User.select(User.id).where(User.uuid >> None).get()
except User.DoesNotExist:
has_missing_uuids = False
if not has_missing_uuids:
LOGGER.debug('User UUID Backfill: No migration needed')
return
LOGGER.debug('User UUID Backfill: Starting migration')
while True:
batch_user_ids = list(User
.select(User.id)
.where(User.uuid >> None)
.limit(100))
if len(batch_user_ids) == 0:
# There are no users left to backfill. We're done!
LOGGER.debug('User UUID Backfill: Backfill completed')
return
LOGGER.debug('User UUID Backfill: Found %s records to update', len(batch_user_ids))
for user_id in batch_user_ids:
with app.config['DB_TRANSACTION_FACTORY'](db):
try:
user = User.select(User.id, User.uuid).where(User.id == user_id).get()
user.uuid = str(uuid.uuid4())
user.save(only=[User.uuid])
except User.DoesNotExist:
pass
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('boto').setLevel(logging.CRITICAL)
logging.getLogger('peewee').setLevel(logging.CRITICAL)
backfill_user_uuids()

View file

@ -0,0 +1,48 @@
import logging
import time
from datetime import datetime, timedelta
from data.database import RepositoryBuild, AccessToken
from app import app
logger = logging.getLogger(__name__)
BATCH_SIZE = 1000
def delete_temporary_access_tokens(older_than):
# Find the highest ID up to which we should delete
up_to_id = (AccessToken
.select(AccessToken.id)
.where(AccessToken.created < older_than)
.limit(1)
.order_by(AccessToken.id.desc())
.get().id)
logger.debug('Deleting temporary access tokens with ids lower than: %s', up_to_id)
access_tokens_in_builds = (RepositoryBuild.select(RepositoryBuild.access_token).distinct())
while up_to_id > 0:
starting_at_id = max(up_to_id - BATCH_SIZE, 0)
logger.debug('Deleting tokens with ids between %s and %s', starting_at_id, up_to_id)
start_time = datetime.utcnow()
(AccessToken
.delete()
.where(AccessToken.id >= starting_at_id,
AccessToken.id < up_to_id,
AccessToken.temporary == True,
~(AccessToken.id << access_tokens_in_builds))
.execute())
time_to_delete = datetime.utcnow() - start_time
up_to_id -= BATCH_SIZE
logger.debug('Sleeping for %s seconds', time_to_delete.total_seconds())
time.sleep(time_to_delete.total_seconds())
if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)
delete_temporary_access_tokens(datetime.utcnow() - timedelta(days=2))

View file

@ -0,0 +1,90 @@
import logging
import json
from app import app
from data.database import configure, RepositoryBuildTrigger, BuildTriggerService
from bitbucket import BitBucket
from endpoints.trigger import BitbucketBuildTrigger
configure(app.config)
logger = logging.getLogger(__name__)
def run_bitbucket_migration():
bitbucket_trigger = BuildTriggerService.get(BuildTriggerService.name == "bitbucket")
encountered = set()
while True:
found = list(RepositoryBuildTrigger.select().where(
RepositoryBuildTrigger.service == bitbucket_trigger,
RepositoryBuildTrigger.config ** "%\"hook_id%"))
found = [f for f in found if not f.uuid in encountered]
if not found:
logger.debug('No additional records found')
return
logger.debug('Found %s records to be changed', len(found))
for trigger in found:
encountered.add(trigger.uuid)
try:
config = json.loads(trigger.config)
except:
logging.error("Cannot parse config for trigger %s", trigger.uuid)
continue
logger.debug("Checking trigger %s", trigger.uuid)
if 'hook_id' in config:
logger.debug("Updating trigger %s to a webhook", trigger.uuid)
trigger_handler = BitbucketBuildTrigger(trigger)
client = trigger_handler._get_repository_client()
hook_id = config['hook_id']
# Lookup the old service hook.
logger.debug("Looking up old service URL for trigger %s", trigger.uuid)
(result, hook_data, err_msg) = client.services().get(hook_id)
if not result or not hook_data:
logger.error('Error when retrieving service hook for trigger %s: %s', trigger.uuid, err_msg)
continue
if not 'webhook_id' in config:
hook_data = hook_data[0]['service']
webhook_url = [f for f in hook_data['fields'] if f['name'] == 'URL'][0]['value']
logger.debug("Adding webhook for trigger %s: %s", trigger.uuid, webhook_url)
# Add the new web hook.
description = 'Webhook for invoking builds on %s' % app.config['REGISTRY_TITLE_SHORT']
webhook_events = ['repo:push']
(result, data, err_msg) = client.webhooks().create(description, webhook_url, webhook_events)
if not result:
logger.error('Error when adding webhook for trigger %s: %s', trigger.uuid, err_msg)
continue
config['webhook_id'] = data['uuid']
trigger.config = json.dumps(config)
trigger.save()
# Remove the old service hook.
logger.debug("Deleting old service URL for trigger %s", trigger.uuid)
(result, _, err_msg) = client.services().delete(hook_id)
if not result:
logger.error('Error when deleting service hook for trigger %s: %s', trigger.uuid, err_msg)
continue
del config['hook_id']
# Update the config.
trigger.config = json.dumps(config)
trigger.save()
logger.debug("Trigger %s updated to a webhook", trigger.uuid)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('boto').setLevel(logging.CRITICAL)
run_bitbucket_migration()

View file

@ -0,0 +1,52 @@
import logging
import json
from app import app
from data.database import configure, RepositoryNotification, ExternalNotificationMethod
configure(app.config)
logger = logging.getLogger(__name__)
def run_slackwebhook_migration():
slack_method = ExternalNotificationMethod.get(ExternalNotificationMethod.name == "slack")
encountered = set()
while True:
found = list(RepositoryNotification.select().where(
RepositoryNotification.method == slack_method,
RepositoryNotification.config_json ** "%subdomain%",
~(RepositoryNotification.config_json ** "%url%")))
found = [f for f in found if not f.uuid in encountered]
if not found:
logger.debug('No additional records found')
return
logger.debug('Found %s records to be changed', len(found))
for notification in found:
encountered.add(notification.uuid)
try:
config = json.loads(notification.config_json)
except:
logging.error("Cannot parse config for noticification %s", notification.uuid)
continue
logger.debug("Checking notification %s", notification.uuid)
if 'subdomain' in config and 'token' in config:
subdomain = config['subdomain']
token = config['token']
new_url = 'https://%s.slack.com/services/hooks/incoming-webhook?token=%s' % (subdomain, token)
config['url'] = new_url
logger.debug("Updating notification %s to URL: %s", notification.uuid, new_url)
notification.config_json = json.dumps(config)
notification.save()
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('boto').setLevel(logging.CRITICAL)
run_slackwebhook_migration()

View file

@ -0,0 +1,105 @@
import logging
import zlib
from data import model
from data.database import ImageStorage
from app import app, storage as store
from data.database import db, db_random_func
from util.registry.gzipstream import ZLIB_GZIP_WINDOW
logger = logging.getLogger(__name__)
CHUNK_SIZE = 5 * 1024 * 1024
def backfill_sizes_from_data():
logger.setLevel(logging.DEBUG)
logger.debug('Starting uncompressed image size backfill')
logger.debug('NOTE: This can be a LONG RUNNING OPERATION. Please wait!')
# Check for any uncompressed images.
has_images = bool(list(ImageStorage
.select(ImageStorage.uuid)
.where(ImageStorage.uncompressed_size >> None,
ImageStorage.image_size > 0,
ImageStorage.uploading == False)
.limit(1)))
if not has_images:
logger.debug('Uncompressed backfill: No migration needed')
return
logger.debug('Uncompressed backfill: Starting migration')
encountered = set()
while True:
# Load the record from the DB.
batch_ids = list(ImageStorage
.select(ImageStorage.uuid)
.where(ImageStorage.uncompressed_size >> None,
ImageStorage.image_size > 0,
ImageStorage.uploading == False)
.limit(100)
.order_by(db_random_func()))
batch_ids = set([s.uuid for s in batch_ids]) - encountered
logger.debug('Found %s images to process', len(batch_ids))
if len(batch_ids) == 0:
# We're done!
return
counter = 1
for uuid in batch_ids:
encountered.add(uuid)
logger.debug('Processing image ID %s (%s/%s)', uuid, counter, len(batch_ids))
counter = counter + 1
try:
with_locs = model.storage.get_storage_by_uuid(uuid)
if with_locs.uncompressed_size is not None:
logger.debug('Somebody else already filled this in for us: %s', uuid)
continue
# Read the layer from backing storage and calculate the uncompressed size.
logger.debug('Loading data: %s (%s bytes)', uuid, with_locs.image_size)
decompressor = zlib.decompressobj(ZLIB_GZIP_WINDOW)
uncompressed_size = 0
with store.stream_read_file(with_locs.locations, store.image_layer_path(uuid)) as stream:
while True:
current_data = stream.read(CHUNK_SIZE)
if len(current_data) == 0:
break
while current_data:
uncompressed_size += len(decompressor.decompress(current_data, CHUNK_SIZE))
current_data = decompressor.unconsumed_tail
# Write the size to the image storage. We do so under a transaction AFTER checking to
# make sure the image storage still exists and has not changed.
logger.debug('Writing entry: %s. Size: %s', uuid, uncompressed_size)
with app.config['DB_TRANSACTION_FACTORY'](db):
current_record = model.storage.get_storage_by_uuid(uuid)
if not current_record.uploading and current_record.uncompressed_size == None:
current_record.uncompressed_size = uncompressed_size
current_record.save()
else:
logger.debug('Somebody else already filled this in for us, after we did the work: %s',
uuid)
except model.InvalidImageException:
logger.warning('Storage with uuid no longer exists: %s', uuid)
except IOError:
logger.warning('IOError on %s', uuid)
except MemoryError:
logger.warning('MemoryError on %s', uuid)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('boto').setLevel(logging.CRITICAL)
logging.getLogger('peewee').setLevel(logging.CRITICAL)
backfill_sizes_from_data()