Collapse all migrations prior to 2.0.0 into one.

This commit is contained in:
Jake Moshenko 2016-11-10 14:56:56 -05:00
parent 6de039dc97
commit b5834a8a66
105 changed files with 957 additions and 4692 deletions

View file

@ -1,55 +0,0 @@
import logging
from data.database import ImageStorage, Image, db, db_for_update
from app import app
logger = logging.getLogger(__name__)
def backfill_aggregate_sizes():
""" Generates aggregate sizes for any image storage entries without them """
logger.debug('Aggregate sizes backfill: Began execution')
while True:
batch_image_ids = list(Image
.select(Image.id)
.where(Image.aggregate_size >> None)
.limit(100))
if len(batch_image_ids) == 0:
# There are no storages left to backfill. We're done!
logger.debug('Aggregate sizes backfill: Backfill completed')
return
logger.debug('Aggregate sizes backfill: Found %s records to update', len(batch_image_ids))
for image_id in batch_image_ids:
logger.debug('Updating image : %s', image_id.id)
with app.config['DB_TRANSACTION_FACTORY'](db):
try:
image = (Image
.select(Image, ImageStorage)
.join(ImageStorage)
.where(Image.id == image_id)
.get())
aggregate_size = image.storage.image_size
image_ids = image.ancestor_id_list()
for image_id in image_ids:
to_add = db_for_update(Image
.select(Image, ImageStorage)
.join(ImageStorage)
.where(Image.id == image_id)).get()
aggregate_size += to_add.storage.image_size
image.aggregate_size = aggregate_size
image.save()
except Image.DoesNotExist:
pass
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('peewee').setLevel(logging.CRITICAL)
backfill_aggregate_sizes()

View file

@ -1,145 +0,0 @@
import logging
from peewee import (JOIN_LEFT_OUTER, CharField, BigIntegerField, BooleanField, ForeignKeyField,
IntegerField, IntegrityError, fn)
from data.database import BaseModel, CloseForLongOperation
from data.fields import Base64BinaryField
from app import app, storage
from digest import checksums
from util.migrate.allocator import yield_random_entries
from util.registry.torrent import PieceHasher
from util.registry.filelike import wrap_with_handler
BATCH_SIZE = 1000
logger = logging.getLogger(__name__)
# Vendor the information from tables we will be writing to at the time of this migration
class ImageStorage(BaseModel):
uuid = CharField(index=True, unique=True)
checksum = CharField(null=True)
image_size = BigIntegerField(null=True)
uncompressed_size = BigIntegerField(null=True)
uploading = BooleanField(default=True, null=True)
cas_path = BooleanField(default=True)
content_checksum = CharField(null=True, index=True)
class ImageStorageLocation(BaseModel):
name = CharField(unique=True, index=True)
class ImageStoragePlacement(BaseModel):
storage = ForeignKeyField(ImageStorage)
location = ForeignKeyField(ImageStorageLocation)
class TorrentInfo(BaseModel):
storage = ForeignKeyField(ImageStorage)
piece_length = IntegerField()
pieces = Base64BinaryField()
def _get_image_storage_locations(storage_id):
placements_query = (ImageStoragePlacement
.select(ImageStoragePlacement, ImageStorageLocation)
.join(ImageStorageLocation)
.switch(ImageStoragePlacement)
.join(ImageStorage, JOIN_LEFT_OUTER)
.where(ImageStorage.id == storage_id))
locations = set()
for placement in placements_query:
locations.add(placement.location.name)
return locations
def _get_layer_path(storage_record):
""" Returns the path in the storage engine to the layer data referenced by the storage row. """
if not storage_record.cas_path:
logger.debug('Serving layer from legacy v1 path: %s', storage_record.uuid)
return storage.v1_image_layer_path(storage_record.uuid)
return storage.blob_path(storage_record.content_checksum)
def backfill_content_checksums_and_torrent_pieces(piece_length):
""" Hashes the entire file for the content associated with an imagestorage. """
logger.debug('Began execution')
logger.debug('This may be a long operation!')
def batch_query():
return (ImageStorage
.select(ImageStorage.id, ImageStorage.uuid, ImageStorage.content_checksum,
ImageStorage.cas_path)
.join(TorrentInfo, JOIN_LEFT_OUTER, on=((TorrentInfo.storage == ImageStorage.id) &
(TorrentInfo.piece_length == piece_length)))
.where((TorrentInfo.id >> None) | (ImageStorage.content_checksum >> None)))
max_id = ImageStorage.select(fn.Max(ImageStorage.id)).scalar()
checksums_written = 0
pieces_written = 0
for candidate_storage, abort in yield_random_entries(batch_query, ImageStorage.id, BATCH_SIZE,
max_id):
locations = _get_image_storage_locations(candidate_storage.id)
checksum = candidate_storage.content_checksum
torrent_pieces = ''
with CloseForLongOperation(app.config):
try:
# Compute the checksum
layer_path = _get_layer_path(candidate_storage)
with storage.stream_read_file(locations, layer_path) as layer_data_handle:
hasher = PieceHasher(piece_length)
wrapped = wrap_with_handler(layer_data_handle, hasher.update)
checksum = 'sha256:{0}'.format(checksums.sha256_file(wrapped))
torrent_pieces = hasher.final_piece_hashes()
except Exception as exc:
logger.exception('Unable to compute hashes for storage: %s', candidate_storage.uuid)
# Create a fallback value for the checksum
if checksum is None:
checksum = 'unknown:{0}'.format(exc.__class__.__name__)
torrent_collision = False
checksum_collision = False
# Now update the ImageStorage with the checksum
num_updated = (ImageStorage
.update(content_checksum=checksum)
.where(ImageStorage.id == candidate_storage.id,
ImageStorage.content_checksum >> None)).execute()
checksums_written += num_updated
if num_updated == 0:
checksum_collision = True
try:
TorrentInfo.create(storage=candidate_storage.id, piece_length=piece_length,
pieces=torrent_pieces)
pieces_written += 1
except IntegrityError:
torrent_collision = True
if torrent_collision and checksum_collision:
logger.info('Another worker pre-empted us for storage: %s', candidate_storage.uuid)
abort.set()
if (pieces_written % BATCH_SIZE) == 0 or (checksums_written % BATCH_SIZE) == 0:
logger.debug('%s checksums written, %s torrent pieces written', checksums_written,
pieces_written)
logger.debug('Completed, %s checksums written, %s torrent pieces written', checksums_written,
pieces_written)
if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)
#logging.getLogger('peewee').setLevel(logging.WARNING)
logging.getLogger('boto').setLevel(logging.WARNING)
logging.getLogger('data.database').setLevel(logging.WARNING)
backfill_content_checksums_and_torrent_pieces(app.config['BITTORRENT_PIECE_SIZE'])

View file

@ -1,87 +0,0 @@
import logging
from peewee import (CharField, BigIntegerField, BooleanField, ForeignKeyField, DateTimeField,
TextField)
from data.database import BaseModel, db, db_for_update
from app import app
logger = logging.getLogger(__name__)
class Repository(BaseModel):
pass
# Vendor the information from tables we will be writing to at the time of this migration
class ImageStorage(BaseModel):
created = DateTimeField(null=True)
comment = TextField(null=True)
command = TextField(null=True)
aggregate_size = BigIntegerField(null=True)
uploading = BooleanField(default=True, null=True)
class Image(BaseModel):
# This class is intentionally denormalized. Even though images are supposed
# to be globally unique we can't treat them as such for permissions and
# security reasons. So rather than Repository <-> Image being many to many
# each image now belongs to exactly one repository.
docker_image_id = CharField(index=True)
repository = ForeignKeyField(Repository)
# '/' separated list of ancestory ids, e.g. /1/2/6/7/10/
ancestors = CharField(index=True, default='/', max_length=64535, null=True)
storage = ForeignKeyField(ImageStorage, index=True, null=True)
created = DateTimeField(null=True)
comment = TextField(null=True)
command = TextField(null=True)
aggregate_size = BigIntegerField(null=True)
v1_json_metadata = TextField(null=True)
def backfill_image_fields():
""" Copies metadata from image storages to their images. """
logger.debug('Image metadata backfill: Began execution')
while True:
batch_image_ids = list(Image
.select(Image.id)
.join(ImageStorage)
.where(Image.created >> None, Image.comment >> None,
Image.command >> None, Image.aggregate_size >> None,
ImageStorage.uploading == False,
~((ImageStorage.created >> None) &
(ImageStorage.comment >> None) &
(ImageStorage.command >> None) &
(ImageStorage.aggregate_size >> None)))
.limit(100))
if len(batch_image_ids) == 0:
logger.debug('Image metadata backfill: Backfill completed')
return
logger.debug('Image metadata backfill: Found %s records to update', len(batch_image_ids))
for image_id in batch_image_ids:
logger.debug('Updating image: %s', image_id.id)
with app.config['DB_TRANSACTION_FACTORY'](db):
try:
image = db_for_update(Image
.select(Image, ImageStorage)
.join(ImageStorage)
.where(Image.id == image_id.id)).get()
image.created = image.storage.created
image.comment = image.storage.comment
image.command = image.storage.command
image.aggregate_size = image.storage.aggregate_size
image.save()
except Image.DoesNotExist:
pass
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('peewee').setLevel(logging.CRITICAL)
backfill_image_fields()

View file

@ -1,82 +0,0 @@
import logging
from data.database import BaseModel
from peewee import (fn, CharField, BigIntegerField, ForeignKeyField, BooleanField, DateTimeField,
TextField, IntegerField)
from app import app
from util.migrate.allocator import yield_random_entries
BATCH_SIZE = 1000
class Repository(BaseModel):
pass
# Vendor the information from tables we will be writing to at the time of this migration
class ImageStorage(BaseModel):
uuid = CharField(index=True, unique=True)
checksum = CharField(null=True)
image_size = BigIntegerField(null=True)
uncompressed_size = BigIntegerField(null=True)
uploading = BooleanField(default=True, null=True)
cas_path = BooleanField(default=True)
content_checksum = CharField(null=True, index=True)
class Image(BaseModel):
docker_image_id = CharField(index=True)
repository = ForeignKeyField(Repository)
ancestors = CharField(index=True, default='/', max_length=64535, null=True)
storage = ForeignKeyField(ImageStorage, index=True, null=True)
created = DateTimeField(null=True)
comment = TextField(null=True)
command = TextField(null=True)
aggregate_size = BigIntegerField(null=True)
v1_json_metadata = TextField(null=True)
v1_checksum = CharField(null=True)
security_indexed = BooleanField(default=False)
security_indexed_engine = IntegerField(default=-1)
parent_id = IntegerField(index=True, null=True)
logger = logging.getLogger(__name__)
def backfill_parent_id():
logger.setLevel(logging.DEBUG)
logger.debug('backfill_parent_id: Starting')
logger.debug('backfill_parent_id: This can be a LONG RUNNING OPERATION. Please wait!')
def fetch_batch():
return (Image
.select(Image.id, Image.ancestors)
.join(ImageStorage)
.where(Image.parent_id >> None, Image.ancestors != '/',
ImageStorage.uploading == False))
max_id = Image.select(fn.Max(Image.id)).scalar()
written = 0
for to_backfill, abort in yield_random_entries(fetch_batch, Image.id, BATCH_SIZE, max_id):
computed_parent = int(to_backfill.ancestors.split('/')[-2])
num_changed = (Image
.update(parent_id=computed_parent)
.where(Image.id == to_backfill.id, Image.parent_id >> None)).execute()
if num_changed == 0:
logger.info('Collision with another worker, aborting batch')
abort.set()
written += num_changed
if (written % BATCH_SIZE) == 0:
logger.debug('%s entries written', written)
logger.debug('backfill_parent_id: Completed, updated %s entries', written)
if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('peewee').setLevel(logging.CRITICAL)
backfill_parent_id()

View file

@ -1,54 +0,0 @@
import logging
import uuid
from data.database import User, db
from app import app
LOGGER = logging.getLogger(__name__)
def backfill_user_uuids():
""" Generates UUIDs for any Users without them. """
LOGGER.setLevel(logging.DEBUG)
LOGGER.debug('User UUID Backfill: Began execution')
# Check to see if any users are missing uuids.
has_missing_uuids = True
try:
User.select(User.id).where(User.uuid >> None).get()
except User.DoesNotExist:
has_missing_uuids = False
if not has_missing_uuids:
LOGGER.debug('User UUID Backfill: No migration needed')
return
LOGGER.debug('User UUID Backfill: Starting migration')
while True:
batch_user_ids = list(User
.select(User.id)
.where(User.uuid >> None)
.limit(100))
if len(batch_user_ids) == 0:
# There are no users left to backfill. We're done!
LOGGER.debug('User UUID Backfill: Backfill completed')
return
LOGGER.debug('User UUID Backfill: Found %s records to update', len(batch_user_ids))
for user_id in batch_user_ids:
with app.config['DB_TRANSACTION_FACTORY'](db):
try:
user = User.select(User.id, User.uuid).where(User.id == user_id).get()
user.uuid = str(uuid.uuid4())
user.save(only=[User.uuid])
except User.DoesNotExist:
pass
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('boto').setLevel(logging.CRITICAL)
logging.getLogger('peewee').setLevel(logging.CRITICAL)
backfill_user_uuids()

View file

@ -1,75 +0,0 @@
import logging
from peewee import (CharField, BigIntegerField, BooleanField, ForeignKeyField, DateTimeField,
TextField, fn)
from data.database import BaseModel
from util.migrate.allocator import yield_random_entries
from app import app
BATCH_SIZE = 1000
logger = logging.getLogger(__name__)
class Repository(BaseModel):
pass
# Vendor the information from tables we will be writing to at the time of this migration
class ImageStorage(BaseModel):
uuid = CharField(index=True, unique=True)
checksum = CharField(null=True)
image_size = BigIntegerField(null=True)
uncompressed_size = BigIntegerField(null=True)
uploading = BooleanField(default=True, null=True)
cas_path = BooleanField(default=True)
content_checksum = CharField(null=True, index=True)
class Image(BaseModel):
docker_image_id = CharField(index=True)
repository = ForeignKeyField(Repository)
ancestors = CharField(index=True, default='/', max_length=64535, null=True)
storage = ForeignKeyField(ImageStorage, index=True, null=True)
created = DateTimeField(null=True)
comment = TextField(null=True)
command = TextField(null=True)
aggregate_size = BigIntegerField(null=True)
v1_json_metadata = TextField(null=True)
v1_checksum = CharField(null=True)
def backfill_checksums():
""" Copies checksums from image storages to their images. """
logger.debug('Began execution')
logger.debug('This may be a long operation!')
def batch_query():
return (Image
.select(Image, ImageStorage)
.join(ImageStorage)
.where(Image.v1_checksum >> None, ImageStorage.uploading == False,
~(ImageStorage.checksum >> None)))
max_id = Image.select(fn.Max(Image.id)).scalar()
written = 0
for candidate_image, abort in yield_random_entries(batch_query, Image.id, BATCH_SIZE, max_id):
num_changed = (Image
.update(v1_checksum=candidate_image.storage.checksum)
.where(Image.id == candidate_image.id, Image.v1_checksum >> None)).execute()
if num_changed == 0:
logger.info('Collision with another worker, aborting batch')
abort.set()
written += num_changed
if (written % BATCH_SIZE) == 0:
logger.debug('%s entries written', written)
logger.debug('Completed, updated %s entries', written)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('peewee').setLevel(logging.CRITICAL)
backfill_checksums()

View file

@ -1,117 +0,0 @@
import logging
from peewee import JOIN_LEFT_OUTER
from peewee import (CharField, BigIntegerField, BooleanField, ForeignKeyField, DateTimeField,
TextField)
from data.database import BaseModel, db, db_for_update
from app import app, storage
from data import model
logger = logging.getLogger(__name__)
class Repository(BaseModel):
pass
# Vendor the information from tables we will be writing to at the time of this migration
class ImageStorage(BaseModel):
uuid = CharField(index=True, unique=True)
checksum = CharField(null=True)
image_size = BigIntegerField(null=True)
uncompressed_size = BigIntegerField(null=True)
uploading = BooleanField(default=True, null=True)
class Image(BaseModel):
# This class is intentionally denormalized. Even though images are supposed
# to be globally unique we can't treat them as such for permissions and
# security reasons. So rather than Repository <-> Image being many to many
# each image now belongs to exactly one repository.
docker_image_id = CharField(index=True)
repository = ForeignKeyField(Repository)
# '/' separated list of ancestory ids, e.g. /1/2/6/7/10/
ancestors = CharField(index=True, default='/', max_length=64535, null=True)
storage = ForeignKeyField(ImageStorage, index=True, null=True)
created = DateTimeField(null=True)
comment = TextField(null=True)
command = TextField(null=True)
aggregate_size = BigIntegerField(null=True)
v1_json_metadata = TextField(null=True)
class ImageStorageLocation(BaseModel):
name = CharField(unique=True, index=True)
class ImageStoragePlacement(BaseModel):
storage = ForeignKeyField(ImageStorage)
location = ForeignKeyField(ImageStorageLocation)
def image_json_path(storage_uuid):
base_path = storage._image_path(storage_uuid)
return '{0}json'.format(base_path)
def backfill_v1_metadata():
""" Copies metadata from image storages to their images. """
logger.debug('Image v1 metadata backfill: Began execution')
while True:
batch_image_ids = list(Image
.select(Image.id)
.join(ImageStorage)
.where(Image.v1_json_metadata >> None, ImageStorage.uploading == False)
.limit(100))
if len(batch_image_ids) == 0:
logger.debug('Image v1 metadata backfill: Backfill completed')
return
logger.debug('Image v1 metadata backfill: Found %s records to update', len(batch_image_ids))
for one_id in batch_image_ids:
with app.config['DB_TRANSACTION_FACTORY'](db):
try:
logger.debug('Loading image: %s', one_id.id)
raw_query = (ImageStoragePlacement
.select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation)
.join(ImageStorageLocation)
.switch(ImageStoragePlacement)
.join(ImageStorage, JOIN_LEFT_OUTER)
.join(Image)
.where(Image.id == one_id.id))
placement_query = db_for_update(raw_query)
repo_image_list = model.image.invert_placement_query_results(placement_query)
if len(repo_image_list) > 1:
logger.error('Found more images than we requested, something is wrong with the query')
return
repo_image = repo_image_list[0]
uuid = repo_image.storage.uuid
json_path = image_json_path(uuid)
logger.debug('Updating image: %s from: %s', repo_image.id, json_path)
try:
data = storage.get_content(repo_image.storage.locations, json_path)
except IOError:
data = "{}"
logger.warning('failed to find v1 metadata, defaulting to {}')
repo_image.v1_json_metadata = data
repo_image.save()
except ImageStoragePlacement.DoesNotExist:
pass
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
# logging.getLogger('peewee').setLevel(logging.CRITICAL)
backfill_v1_metadata()

View file

@ -1,52 +0,0 @@
import uuid
import logging
from peewee import IntegrityError, CharField
from app import app
from data.database import BaseModel
logger = logging.getLogger(__name__)
ENUM_CLASSES_WITH_SEQUENCES = [
'TeamRole',
'LoginService',
'Visibility',
'Role',
'AccessTokenKind',
'BuildTriggerService',
'ImageStorageTransformation',
'ImageStorageSignatureKind',
'ImageStorageLocation',
'LogEntryKind',
'NotificationKind',
'ExternalNotificationEvent',
'ExternalNotificationMethod',
]
def reset_enum_sequences():
for class_name in ENUM_CLASSES_WITH_SEQUENCES:
reset_sequence(class_name)
def reset_sequence(class_name):
logger.info('Resetting sequence for table: %s', class_name.lower())
unique_name = '%s' % uuid.uuid4()
Model = type(class_name, (BaseModel,), {'name': CharField(index=True)})
for skips in xrange(50):
try:
Model.create(name=unique_name).delete_instance()
logger.info('Advanced sequence %s numbers', skips)
break
except IntegrityError:
pass
if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)
reset_enum_sequences()

View file

@ -1,116 +0,0 @@
import logging
import json
from app import app
from data.database import configure, BaseModel, uuid_generator
from peewee import *
from bitbucket import BitBucket
from buildtrigger.bitbuckethandler import BitbucketBuildTrigger
configure(app.config)
logger = logging.getLogger(__name__)
# Note: We vendor the RepositoryBuildTrigger and its dependencies here
class Repository(BaseModel):
pass
class BuildTriggerService(BaseModel):
name = CharField(index=True, unique=True)
class AccessToken(BaseModel):
pass
class User(BaseModel):
pass
class RepositoryBuildTrigger(BaseModel):
uuid = CharField(default=uuid_generator)
service = ForeignKeyField(BuildTriggerService, index=True)
repository = ForeignKeyField(Repository, index=True)
connected_user = ForeignKeyField(User)
auth_token = CharField(null=True)
private_key = TextField(null=True)
config = TextField(default='{}')
write_token = ForeignKeyField(AccessToken, null=True)
pull_robot = ForeignKeyField(User, related_name='triggerpullrobot')
def run_bitbucket_migration():
bitbucket_trigger = BuildTriggerService.get(BuildTriggerService.name == "bitbucket")
encountered = set()
while True:
found = list(RepositoryBuildTrigger.select().where(
RepositoryBuildTrigger.service == bitbucket_trigger,
RepositoryBuildTrigger.config ** "%\"hook_id%"))
found = [f for f in found if not f.uuid in encountered]
if not found:
logger.debug('No additional records found')
return
logger.debug('Found %s records to be changed', len(found))
for trigger in found:
encountered.add(trigger.uuid)
try:
config = json.loads(trigger.config)
except:
logging.error("Cannot parse config for trigger %s", trigger.uuid)
continue
logger.debug("Checking trigger %s", trigger.uuid)
if 'hook_id' in config:
logger.debug("Updating trigger %s to a webhook", trigger.uuid)
trigger_handler = BitbucketBuildTrigger(trigger)
client = trigger_handler._get_repository_client()
hook_id = config['hook_id']
# Lookup the old service hook.
logger.debug("Looking up old service URL for trigger %s", trigger.uuid)
(result, hook_data, err_msg) = client.services().get(hook_id)
if not result or not hook_data:
logger.error('Error when retrieving service hook for trigger %s: %s', trigger.uuid, err_msg)
continue
if not 'webhook_id' in config:
hook_data = hook_data[0]['service']
webhook_url = [f for f in hook_data['fields'] if f['name'] == 'URL'][0]['value']
logger.debug("Adding webhook for trigger %s: %s", trigger.uuid, webhook_url)
# Add the new web hook.
description = 'Webhook for invoking builds on %s' % app.config['REGISTRY_TITLE_SHORT']
webhook_events = ['repo:push']
(result, data, err_msg) = client.webhooks().create(description, webhook_url, webhook_events)
if not result:
logger.error('Error when adding webhook for trigger %s: %s', trigger.uuid, err_msg)
continue
config['webhook_id'] = data['uuid']
trigger.config = json.dumps(config)
trigger.save()
# Remove the old service hook.
logger.debug("Deleting old service URL for trigger %s", trigger.uuid)
(result, _, err_msg) = client.services().delete(hook_id)
if not result:
logger.error('Error when deleting service hook for trigger %s: %s', trigger.uuid, err_msg)
continue
del config['hook_id']
# Update the config.
trigger.config = json.dumps(config)
trigger.save()
logger.debug("Trigger %s updated to a webhook", trigger.uuid)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('boto').setLevel(logging.CRITICAL)
run_bitbucket_migration()

View file

@ -1,125 +0,0 @@
import logging
import logging.config
import json
from data.database import (db, db_for_update, BaseModel, CharField, ForeignKeyField,
TextField, BooleanField)
from app import app
from buildtrigger.basehandler import BuildTriggerHandler
from util.security.ssh import generate_ssh_keypair
from github import GithubException
logger = logging.getLogger(__name__)
class BuildTriggerService(BaseModel):
name = CharField(index=True, unique=True)
class Repository(BaseModel):
pass
class User(BaseModel):
pass
class AccessToken(BaseModel):
pass
class RepositoryBuildTrigger(BaseModel):
uuid = CharField()
service = ForeignKeyField(BuildTriggerService, index=True)
repository = ForeignKeyField(Repository, index=True)
connected_user = ForeignKeyField(User)
auth_token = CharField(null=True)
private_key = TextField(null=True)
config = TextField(default='{}')
write_token = ForeignKeyField(AccessToken, null=True)
pull_robot = ForeignKeyField(User, null=True, related_name='triggerpullrobot')
used_legacy_github = BooleanField(null=True, default=False)
def backfill_github_deploykeys():
""" Generates and saves private deploy keys for any GitHub build triggers still relying on
the old buildpack behavior. """
logger.setLevel(logging.DEBUG)
logger.debug('GitHub deploy key backfill: Began execution')
encountered = set()
github_service = BuildTriggerService.get(name='github')
while True:
build_trigger_ids = list(RepositoryBuildTrigger
.select(RepositoryBuildTrigger.id)
.where(RepositoryBuildTrigger.private_key >> None)
.where(RepositoryBuildTrigger.service == github_service)
.where(RepositoryBuildTrigger.used_legacy_github >> None)
.limit(100))
filtered_ids = [trigger.id for trigger in build_trigger_ids if trigger.id not in encountered]
if len(filtered_ids) == 0:
# We're done!
logger.debug('GitHub deploy key backfill: Backfill completed')
return
logger.debug('GitHub deploy key backfill: Found %s records to update', len(filtered_ids))
for trigger_id in filtered_ids:
encountered.add(trigger_id)
logger.debug('Updating build trigger: %s', trigger_id)
with app.config['DB_TRANSACTION_FACTORY'](db):
try:
query = RepositoryBuildTrigger.select().where(RepositoryBuildTrigger.id == trigger_id)
trigger = db_for_update(query).get()
except RepositoryBuildTrigger.DoesNotExist:
logger.debug('Could not find build trigger %s', trigger_id)
continue
trigger.used_legacy_github = True
trigger.save()
handler = BuildTriggerHandler.get_handler(trigger)
config = handler.config
if not 'build_source' in config:
logger.debug('Could not find build source for trigger %s', trigger_id)
continue
build_source = config['build_source']
gh_client = handler._get_client()
# Find the GitHub repository.
try:
gh_repo = gh_client.get_repo(build_source)
except GithubException:
logger.exception('Cannot find repository %s for trigger %s', build_source, trigger.id)
continue
# Add a deploy key to the GitHub repository.
public_key, private_key = generate_ssh_keypair()
config['credentials'] = [
{
'name': 'SSH Public Key',
'value': public_key,
},
]
logger.debug('Adding deploy key to build trigger %s', trigger.id)
try:
deploy_key = gh_repo.create_key('%s Builder' % app.config['REGISTRY_TITLE'], public_key)
config['deploy_key_id'] = deploy_key.id
except GithubException:
logger.exception('Cannot add deploy key to repository %s for trigger %s', build_source, trigger.id)
continue
logger.debug('Saving deploy key for trigger %s', trigger.id)
trigger.used_legacy_github = True
trigger.private_key = private_key
trigger.config = json.dumps(config)
trigger.save()
if __name__ == "__main__":
logging.getLogger('boto').setLevel(logging.CRITICAL)
logging.getLogger('github').setLevel(logging.CRITICAL)
logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False)
backfill_github_deploykeys()

View file

@ -1,53 +0,0 @@
import logging
import json
from app import app
from data.database import configure, RepositoryNotification, ExternalNotificationMethod
configure(app.config)
logger = logging.getLogger(__name__)
def run_slackwebhook_migration():
slack_method = ExternalNotificationMethod.get(ExternalNotificationMethod.name == "slack")
encountered = set()
while True:
found = list(RepositoryNotification.select(RepositoryNotification.uuid,
RepositoryNotification.config_json)
.where(RepositoryNotification.method == slack_method,
RepositoryNotification.config_json ** "%subdomain%",
~(RepositoryNotification.config_json ** "%url%")))
found = [f for f in found if not f.uuid in encountered]
if not found:
logger.debug('No additional records found')
return
logger.debug('Found %s records to be changed', len(found))
for notification in found:
encountered.add(notification.uuid)
try:
config = json.loads(notification.config_json)
except:
logging.error("Cannot parse config for noticification %s", notification.uuid)
continue
logger.debug("Checking notification %s", notification.uuid)
if 'subdomain' in config and 'token' in config:
subdomain = config['subdomain']
token = config['token']
new_url = 'https://%s.slack.com/services/hooks/incoming-webhook?token=%s' % (subdomain, token)
config['url'] = new_url
logger.debug("Updating notification %s to URL: %s", notification.uuid, new_url)
notification.config_json = json.dumps(config)
notification.save()
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('boto').setLevel(logging.CRITICAL)
run_slackwebhook_migration()

View file

@ -1,105 +0,0 @@
import logging
import zlib
from data import model
from data.database import ImageStorage
from app import app, storage as store
from data.database import db, db_random_func
from util.registry.gzipstream import ZLIB_GZIP_WINDOW
logger = logging.getLogger(__name__)
CHUNK_SIZE = 5 * 1024 * 1024
def backfill_sizes_from_data():
logger.setLevel(logging.DEBUG)
logger.debug('Starting uncompressed image size backfill')
logger.debug('NOTE: This can be a LONG RUNNING OPERATION. Please wait!')
# Check for any uncompressed images.
has_images = bool(list(ImageStorage
.select(ImageStorage.uuid)
.where(ImageStorage.uncompressed_size >> None,
ImageStorage.image_size > 0,
ImageStorage.uploading == False)
.limit(1)))
if not has_images:
logger.debug('Uncompressed backfill: No migration needed')
return
logger.debug('Uncompressed backfill: Starting migration')
encountered = set()
while True:
# Load the record from the DB.
batch_ids = list(ImageStorage
.select(ImageStorage.uuid)
.where(ImageStorage.uncompressed_size >> None,
ImageStorage.image_size > 0,
ImageStorage.uploading == False)
.limit(100)
.order_by(db_random_func()))
batch_ids = set([s.uuid for s in batch_ids]) - encountered
logger.debug('Found %s images to process', len(batch_ids))
if len(batch_ids) == 0:
# We're done!
return
counter = 1
for uuid in batch_ids:
encountered.add(uuid)
logger.debug('Processing image ID %s (%s/%s)', uuid, counter, len(batch_ids))
counter = counter + 1
try:
with_locs = model.storage.get_storage_by_uuid(uuid)
if with_locs.uncompressed_size is not None:
logger.debug('Somebody else already filled this in for us: %s', uuid)
continue
# Read the layer from backing storage and calculate the uncompressed size.
logger.debug('Loading data: %s (%s bytes)', uuid, with_locs.image_size)
decompressor = zlib.decompressobj(ZLIB_GZIP_WINDOW)
uncompressed_size = 0
with store.stream_read_file(with_locs.locations, store.v1_image_layer_path(uuid)) as stream:
while True:
current_data = stream.read(CHUNK_SIZE)
if len(current_data) == 0:
break
while current_data:
uncompressed_size += len(decompressor.decompress(current_data, CHUNK_SIZE))
current_data = decompressor.unconsumed_tail
# Write the size to the image storage. We do so under a transaction AFTER checking to
# make sure the image storage still exists and has not changed.
logger.debug('Writing entry: %s. Size: %s', uuid, uncompressed_size)
with app.config['DB_TRANSACTION_FACTORY'](db):
current_record = model.storage.get_storage_by_uuid(uuid)
if not current_record.uploading and current_record.uncompressed_size == None:
current_record.uncompressed_size = uncompressed_size
current_record.save()
else:
logger.debug('Somebody else already filled this in for us, after we did the work: %s',
uuid)
except model.InvalidImageException:
logger.warning('Storage with uuid no longer exists: %s', uuid)
except IOError:
logger.warning('IOError on %s', uuid)
except MemoryError:
logger.warning('MemoryError on %s', uuid)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('boto').setLevel(logging.CRITICAL)
logging.getLogger('peewee').setLevel(logging.CRITICAL)
backfill_sizes_from_data()