Collapse all migrations prior to 2.0.0 into one.
This commit is contained in:
parent
6de039dc97
commit
b5834a8a66
105 changed files with 957 additions and 4692 deletions
|
@ -1,55 +0,0 @@
|
|||
import logging
|
||||
|
||||
from data.database import ImageStorage, Image, db, db_for_update
|
||||
from app import app
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def backfill_aggregate_sizes():
|
||||
""" Generates aggregate sizes for any image storage entries without them """
|
||||
logger.debug('Aggregate sizes backfill: Began execution')
|
||||
while True:
|
||||
batch_image_ids = list(Image
|
||||
.select(Image.id)
|
||||
.where(Image.aggregate_size >> None)
|
||||
.limit(100))
|
||||
|
||||
if len(batch_image_ids) == 0:
|
||||
# There are no storages left to backfill. We're done!
|
||||
logger.debug('Aggregate sizes backfill: Backfill completed')
|
||||
return
|
||||
|
||||
logger.debug('Aggregate sizes backfill: Found %s records to update', len(batch_image_ids))
|
||||
for image_id in batch_image_ids:
|
||||
logger.debug('Updating image : %s', image_id.id)
|
||||
|
||||
with app.config['DB_TRANSACTION_FACTORY'](db):
|
||||
try:
|
||||
image = (Image
|
||||
.select(Image, ImageStorage)
|
||||
.join(ImageStorage)
|
||||
.where(Image.id == image_id)
|
||||
.get())
|
||||
|
||||
aggregate_size = image.storage.image_size
|
||||
|
||||
image_ids = image.ancestor_id_list()
|
||||
for image_id in image_ids:
|
||||
to_add = db_for_update(Image
|
||||
.select(Image, ImageStorage)
|
||||
.join(ImageStorage)
|
||||
.where(Image.id == image_id)).get()
|
||||
aggregate_size += to_add.storage.image_size
|
||||
|
||||
image.aggregate_size = aggregate_size
|
||||
image.save()
|
||||
except Image.DoesNotExist:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.getLogger('peewee').setLevel(logging.CRITICAL)
|
||||
backfill_aggregate_sizes()
|
|
@ -1,145 +0,0 @@
|
|||
import logging
|
||||
|
||||
from peewee import (JOIN_LEFT_OUTER, CharField, BigIntegerField, BooleanField, ForeignKeyField,
|
||||
IntegerField, IntegrityError, fn)
|
||||
|
||||
from data.database import BaseModel, CloseForLongOperation
|
||||
from data.fields import Base64BinaryField
|
||||
from app import app, storage
|
||||
from digest import checksums
|
||||
from util.migrate.allocator import yield_random_entries
|
||||
from util.registry.torrent import PieceHasher
|
||||
from util.registry.filelike import wrap_with_handler
|
||||
|
||||
|
||||
BATCH_SIZE = 1000
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Vendor the information from tables we will be writing to at the time of this migration
|
||||
class ImageStorage(BaseModel):
|
||||
uuid = CharField(index=True, unique=True)
|
||||
checksum = CharField(null=True)
|
||||
image_size = BigIntegerField(null=True)
|
||||
uncompressed_size = BigIntegerField(null=True)
|
||||
uploading = BooleanField(default=True, null=True)
|
||||
cas_path = BooleanField(default=True)
|
||||
content_checksum = CharField(null=True, index=True)
|
||||
|
||||
|
||||
class ImageStorageLocation(BaseModel):
|
||||
name = CharField(unique=True, index=True)
|
||||
|
||||
|
||||
class ImageStoragePlacement(BaseModel):
|
||||
storage = ForeignKeyField(ImageStorage)
|
||||
location = ForeignKeyField(ImageStorageLocation)
|
||||
|
||||
|
||||
class TorrentInfo(BaseModel):
|
||||
storage = ForeignKeyField(ImageStorage)
|
||||
piece_length = IntegerField()
|
||||
pieces = Base64BinaryField()
|
||||
|
||||
|
||||
def _get_image_storage_locations(storage_id):
|
||||
placements_query = (ImageStoragePlacement
|
||||
.select(ImageStoragePlacement, ImageStorageLocation)
|
||||
.join(ImageStorageLocation)
|
||||
.switch(ImageStoragePlacement)
|
||||
.join(ImageStorage, JOIN_LEFT_OUTER)
|
||||
.where(ImageStorage.id == storage_id))
|
||||
|
||||
locations = set()
|
||||
for placement in placements_query:
|
||||
locations.add(placement.location.name)
|
||||
|
||||
return locations
|
||||
|
||||
|
||||
def _get_layer_path(storage_record):
|
||||
""" Returns the path in the storage engine to the layer data referenced by the storage row. """
|
||||
if not storage_record.cas_path:
|
||||
logger.debug('Serving layer from legacy v1 path: %s', storage_record.uuid)
|
||||
return storage.v1_image_layer_path(storage_record.uuid)
|
||||
return storage.blob_path(storage_record.content_checksum)
|
||||
|
||||
|
||||
def backfill_content_checksums_and_torrent_pieces(piece_length):
|
||||
""" Hashes the entire file for the content associated with an imagestorage. """
|
||||
logger.debug('Began execution')
|
||||
logger.debug('This may be a long operation!')
|
||||
|
||||
def batch_query():
|
||||
return (ImageStorage
|
||||
.select(ImageStorage.id, ImageStorage.uuid, ImageStorage.content_checksum,
|
||||
ImageStorage.cas_path)
|
||||
.join(TorrentInfo, JOIN_LEFT_OUTER, on=((TorrentInfo.storage == ImageStorage.id) &
|
||||
(TorrentInfo.piece_length == piece_length)))
|
||||
.where((TorrentInfo.id >> None) | (ImageStorage.content_checksum >> None)))
|
||||
|
||||
max_id = ImageStorage.select(fn.Max(ImageStorage.id)).scalar()
|
||||
|
||||
checksums_written = 0
|
||||
pieces_written = 0
|
||||
for candidate_storage, abort in yield_random_entries(batch_query, ImageStorage.id, BATCH_SIZE,
|
||||
max_id):
|
||||
locations = _get_image_storage_locations(candidate_storage.id)
|
||||
|
||||
checksum = candidate_storage.content_checksum
|
||||
torrent_pieces = ''
|
||||
with CloseForLongOperation(app.config):
|
||||
try:
|
||||
# Compute the checksum
|
||||
layer_path = _get_layer_path(candidate_storage)
|
||||
with storage.stream_read_file(locations, layer_path) as layer_data_handle:
|
||||
hasher = PieceHasher(piece_length)
|
||||
wrapped = wrap_with_handler(layer_data_handle, hasher.update)
|
||||
checksum = 'sha256:{0}'.format(checksums.sha256_file(wrapped))
|
||||
torrent_pieces = hasher.final_piece_hashes()
|
||||
except Exception as exc:
|
||||
logger.exception('Unable to compute hashes for storage: %s', candidate_storage.uuid)
|
||||
|
||||
# Create a fallback value for the checksum
|
||||
if checksum is None:
|
||||
checksum = 'unknown:{0}'.format(exc.__class__.__name__)
|
||||
|
||||
torrent_collision = False
|
||||
checksum_collision = False
|
||||
|
||||
# Now update the ImageStorage with the checksum
|
||||
num_updated = (ImageStorage
|
||||
.update(content_checksum=checksum)
|
||||
.where(ImageStorage.id == candidate_storage.id,
|
||||
ImageStorage.content_checksum >> None)).execute()
|
||||
checksums_written += num_updated
|
||||
if num_updated == 0:
|
||||
checksum_collision = True
|
||||
|
||||
try:
|
||||
TorrentInfo.create(storage=candidate_storage.id, piece_length=piece_length,
|
||||
pieces=torrent_pieces)
|
||||
pieces_written += 1
|
||||
except IntegrityError:
|
||||
torrent_collision = True
|
||||
|
||||
if torrent_collision and checksum_collision:
|
||||
logger.info('Another worker pre-empted us for storage: %s', candidate_storage.uuid)
|
||||
abort.set()
|
||||
|
||||
if (pieces_written % BATCH_SIZE) == 0 or (checksums_written % BATCH_SIZE) == 0:
|
||||
logger.debug('%s checksums written, %s torrent pieces written', checksums_written,
|
||||
pieces_written)
|
||||
|
||||
logger.debug('Completed, %s checksums written, %s torrent pieces written', checksums_written,
|
||||
pieces_written)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
#logging.getLogger('peewee').setLevel(logging.WARNING)
|
||||
logging.getLogger('boto').setLevel(logging.WARNING)
|
||||
logging.getLogger('data.database').setLevel(logging.WARNING)
|
||||
backfill_content_checksums_and_torrent_pieces(app.config['BITTORRENT_PIECE_SIZE'])
|
|
@ -1,87 +0,0 @@
|
|||
import logging
|
||||
|
||||
from peewee import (CharField, BigIntegerField, BooleanField, ForeignKeyField, DateTimeField,
|
||||
TextField)
|
||||
from data.database import BaseModel, db, db_for_update
|
||||
from app import app
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Repository(BaseModel):
|
||||
pass
|
||||
|
||||
|
||||
# Vendor the information from tables we will be writing to at the time of this migration
|
||||
class ImageStorage(BaseModel):
|
||||
created = DateTimeField(null=True)
|
||||
comment = TextField(null=True)
|
||||
command = TextField(null=True)
|
||||
aggregate_size = BigIntegerField(null=True)
|
||||
uploading = BooleanField(default=True, null=True)
|
||||
|
||||
|
||||
class Image(BaseModel):
|
||||
# This class is intentionally denormalized. Even though images are supposed
|
||||
# to be globally unique we can't treat them as such for permissions and
|
||||
# security reasons. So rather than Repository <-> Image being many to many
|
||||
# each image now belongs to exactly one repository.
|
||||
docker_image_id = CharField(index=True)
|
||||
repository = ForeignKeyField(Repository)
|
||||
|
||||
# '/' separated list of ancestory ids, e.g. /1/2/6/7/10/
|
||||
ancestors = CharField(index=True, default='/', max_length=64535, null=True)
|
||||
|
||||
storage = ForeignKeyField(ImageStorage, index=True, null=True)
|
||||
|
||||
created = DateTimeField(null=True)
|
||||
comment = TextField(null=True)
|
||||
command = TextField(null=True)
|
||||
aggregate_size = BigIntegerField(null=True)
|
||||
v1_json_metadata = TextField(null=True)
|
||||
|
||||
|
||||
def backfill_image_fields():
|
||||
""" Copies metadata from image storages to their images. """
|
||||
logger.debug('Image metadata backfill: Began execution')
|
||||
while True:
|
||||
batch_image_ids = list(Image
|
||||
.select(Image.id)
|
||||
.join(ImageStorage)
|
||||
.where(Image.created >> None, Image.comment >> None,
|
||||
Image.command >> None, Image.aggregate_size >> None,
|
||||
ImageStorage.uploading == False,
|
||||
~((ImageStorage.created >> None) &
|
||||
(ImageStorage.comment >> None) &
|
||||
(ImageStorage.command >> None) &
|
||||
(ImageStorage.aggregate_size >> None)))
|
||||
.limit(100))
|
||||
|
||||
if len(batch_image_ids) == 0:
|
||||
logger.debug('Image metadata backfill: Backfill completed')
|
||||
return
|
||||
|
||||
logger.debug('Image metadata backfill: Found %s records to update', len(batch_image_ids))
|
||||
for image_id in batch_image_ids:
|
||||
logger.debug('Updating image: %s', image_id.id)
|
||||
|
||||
with app.config['DB_TRANSACTION_FACTORY'](db):
|
||||
try:
|
||||
image = db_for_update(Image
|
||||
.select(Image, ImageStorage)
|
||||
.join(ImageStorage)
|
||||
.where(Image.id == image_id.id)).get()
|
||||
|
||||
image.created = image.storage.created
|
||||
image.comment = image.storage.comment
|
||||
image.command = image.storage.command
|
||||
image.aggregate_size = image.storage.aggregate_size
|
||||
image.save()
|
||||
except Image.DoesNotExist:
|
||||
pass
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.getLogger('peewee').setLevel(logging.CRITICAL)
|
||||
backfill_image_fields()
|
|
@ -1,82 +0,0 @@
|
|||
import logging
|
||||
|
||||
from data.database import BaseModel
|
||||
from peewee import (fn, CharField, BigIntegerField, ForeignKeyField, BooleanField, DateTimeField,
|
||||
TextField, IntegerField)
|
||||
from app import app
|
||||
from util.migrate.allocator import yield_random_entries
|
||||
|
||||
|
||||
BATCH_SIZE = 1000
|
||||
|
||||
|
||||
class Repository(BaseModel):
|
||||
pass
|
||||
|
||||
|
||||
# Vendor the information from tables we will be writing to at the time of this migration
|
||||
class ImageStorage(BaseModel):
|
||||
uuid = CharField(index=True, unique=True)
|
||||
checksum = CharField(null=True)
|
||||
image_size = BigIntegerField(null=True)
|
||||
uncompressed_size = BigIntegerField(null=True)
|
||||
uploading = BooleanField(default=True, null=True)
|
||||
cas_path = BooleanField(default=True)
|
||||
content_checksum = CharField(null=True, index=True)
|
||||
|
||||
|
||||
class Image(BaseModel):
|
||||
docker_image_id = CharField(index=True)
|
||||
repository = ForeignKeyField(Repository)
|
||||
ancestors = CharField(index=True, default='/', max_length=64535, null=True)
|
||||
storage = ForeignKeyField(ImageStorage, index=True, null=True)
|
||||
created = DateTimeField(null=True)
|
||||
comment = TextField(null=True)
|
||||
command = TextField(null=True)
|
||||
aggregate_size = BigIntegerField(null=True)
|
||||
v1_json_metadata = TextField(null=True)
|
||||
v1_checksum = CharField(null=True)
|
||||
|
||||
security_indexed = BooleanField(default=False)
|
||||
security_indexed_engine = IntegerField(default=-1)
|
||||
parent_id = IntegerField(index=True, null=True)
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def backfill_parent_id():
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
logger.debug('backfill_parent_id: Starting')
|
||||
logger.debug('backfill_parent_id: This can be a LONG RUNNING OPERATION. Please wait!')
|
||||
|
||||
def fetch_batch():
|
||||
return (Image
|
||||
.select(Image.id, Image.ancestors)
|
||||
.join(ImageStorage)
|
||||
.where(Image.parent_id >> None, Image.ancestors != '/',
|
||||
ImageStorage.uploading == False))
|
||||
|
||||
max_id = Image.select(fn.Max(Image.id)).scalar()
|
||||
|
||||
written = 0
|
||||
for to_backfill, abort in yield_random_entries(fetch_batch, Image.id, BATCH_SIZE, max_id):
|
||||
computed_parent = int(to_backfill.ancestors.split('/')[-2])
|
||||
num_changed = (Image
|
||||
.update(parent_id=computed_parent)
|
||||
.where(Image.id == to_backfill.id, Image.parent_id >> None)).execute()
|
||||
if num_changed == 0:
|
||||
logger.info('Collision with another worker, aborting batch')
|
||||
abort.set()
|
||||
written += num_changed
|
||||
if (written % BATCH_SIZE) == 0:
|
||||
logger.debug('%s entries written', written)
|
||||
|
||||
logger.debug('backfill_parent_id: Completed, updated %s entries', written)
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.getLogger('peewee').setLevel(logging.CRITICAL)
|
||||
|
||||
backfill_parent_id()
|
|
@ -1,54 +0,0 @@
|
|||
import logging
|
||||
import uuid
|
||||
|
||||
from data.database import User, db
|
||||
from app import app
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
def backfill_user_uuids():
|
||||
""" Generates UUIDs for any Users without them. """
|
||||
LOGGER.setLevel(logging.DEBUG)
|
||||
LOGGER.debug('User UUID Backfill: Began execution')
|
||||
|
||||
|
||||
# Check to see if any users are missing uuids.
|
||||
has_missing_uuids = True
|
||||
try:
|
||||
User.select(User.id).where(User.uuid >> None).get()
|
||||
except User.DoesNotExist:
|
||||
has_missing_uuids = False
|
||||
|
||||
if not has_missing_uuids:
|
||||
LOGGER.debug('User UUID Backfill: No migration needed')
|
||||
return
|
||||
|
||||
LOGGER.debug('User UUID Backfill: Starting migration')
|
||||
while True:
|
||||
batch_user_ids = list(User
|
||||
.select(User.id)
|
||||
.where(User.uuid >> None)
|
||||
.limit(100))
|
||||
|
||||
if len(batch_user_ids) == 0:
|
||||
# There are no users left to backfill. We're done!
|
||||
LOGGER.debug('User UUID Backfill: Backfill completed')
|
||||
return
|
||||
|
||||
LOGGER.debug('User UUID Backfill: Found %s records to update', len(batch_user_ids))
|
||||
for user_id in batch_user_ids:
|
||||
with app.config['DB_TRANSACTION_FACTORY'](db):
|
||||
try:
|
||||
user = User.select(User.id, User.uuid).where(User.id == user_id).get()
|
||||
user.uuid = str(uuid.uuid4())
|
||||
user.save(only=[User.uuid])
|
||||
except User.DoesNotExist:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.getLogger('boto').setLevel(logging.CRITICAL)
|
||||
logging.getLogger('peewee').setLevel(logging.CRITICAL)
|
||||
|
||||
backfill_user_uuids()
|
|
@ -1,75 +0,0 @@
|
|||
import logging
|
||||
|
||||
from peewee import (CharField, BigIntegerField, BooleanField, ForeignKeyField, DateTimeField,
|
||||
TextField, fn)
|
||||
from data.database import BaseModel
|
||||
from util.migrate.allocator import yield_random_entries
|
||||
from app import app
|
||||
|
||||
|
||||
BATCH_SIZE = 1000
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Repository(BaseModel):
|
||||
pass
|
||||
|
||||
|
||||
# Vendor the information from tables we will be writing to at the time of this migration
|
||||
class ImageStorage(BaseModel):
|
||||
uuid = CharField(index=True, unique=True)
|
||||
checksum = CharField(null=True)
|
||||
image_size = BigIntegerField(null=True)
|
||||
uncompressed_size = BigIntegerField(null=True)
|
||||
uploading = BooleanField(default=True, null=True)
|
||||
cas_path = BooleanField(default=True)
|
||||
content_checksum = CharField(null=True, index=True)
|
||||
|
||||
|
||||
class Image(BaseModel):
|
||||
docker_image_id = CharField(index=True)
|
||||
repository = ForeignKeyField(Repository)
|
||||
ancestors = CharField(index=True, default='/', max_length=64535, null=True)
|
||||
storage = ForeignKeyField(ImageStorage, index=True, null=True)
|
||||
created = DateTimeField(null=True)
|
||||
comment = TextField(null=True)
|
||||
command = TextField(null=True)
|
||||
aggregate_size = BigIntegerField(null=True)
|
||||
v1_json_metadata = TextField(null=True)
|
||||
v1_checksum = CharField(null=True)
|
||||
|
||||
|
||||
def backfill_checksums():
|
||||
""" Copies checksums from image storages to their images. """
|
||||
logger.debug('Began execution')
|
||||
logger.debug('This may be a long operation!')
|
||||
def batch_query():
|
||||
return (Image
|
||||
.select(Image, ImageStorage)
|
||||
.join(ImageStorage)
|
||||
.where(Image.v1_checksum >> None, ImageStorage.uploading == False,
|
||||
~(ImageStorage.checksum >> None)))
|
||||
|
||||
max_id = Image.select(fn.Max(Image.id)).scalar()
|
||||
|
||||
written = 0
|
||||
for candidate_image, abort in yield_random_entries(batch_query, Image.id, BATCH_SIZE, max_id):
|
||||
num_changed = (Image
|
||||
.update(v1_checksum=candidate_image.storage.checksum)
|
||||
.where(Image.id == candidate_image.id, Image.v1_checksum >> None)).execute()
|
||||
if num_changed == 0:
|
||||
logger.info('Collision with another worker, aborting batch')
|
||||
abort.set()
|
||||
written += num_changed
|
||||
if (written % BATCH_SIZE) == 0:
|
||||
logger.debug('%s entries written', written)
|
||||
|
||||
logger.debug('Completed, updated %s entries', written)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.getLogger('peewee').setLevel(logging.CRITICAL)
|
||||
backfill_checksums()
|
|
@ -1,117 +0,0 @@
|
|||
import logging
|
||||
|
||||
from peewee import JOIN_LEFT_OUTER
|
||||
|
||||
from peewee import (CharField, BigIntegerField, BooleanField, ForeignKeyField, DateTimeField,
|
||||
TextField)
|
||||
|
||||
from data.database import BaseModel, db, db_for_update
|
||||
from app import app, storage
|
||||
from data import model
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Repository(BaseModel):
|
||||
pass
|
||||
|
||||
|
||||
# Vendor the information from tables we will be writing to at the time of this migration
|
||||
class ImageStorage(BaseModel):
|
||||
uuid = CharField(index=True, unique=True)
|
||||
checksum = CharField(null=True)
|
||||
image_size = BigIntegerField(null=True)
|
||||
uncompressed_size = BigIntegerField(null=True)
|
||||
uploading = BooleanField(default=True, null=True)
|
||||
|
||||
|
||||
class Image(BaseModel):
|
||||
# This class is intentionally denormalized. Even though images are supposed
|
||||
# to be globally unique we can't treat them as such for permissions and
|
||||
# security reasons. So rather than Repository <-> Image being many to many
|
||||
# each image now belongs to exactly one repository.
|
||||
docker_image_id = CharField(index=True)
|
||||
repository = ForeignKeyField(Repository)
|
||||
|
||||
# '/' separated list of ancestory ids, e.g. /1/2/6/7/10/
|
||||
ancestors = CharField(index=True, default='/', max_length=64535, null=True)
|
||||
|
||||
storage = ForeignKeyField(ImageStorage, index=True, null=True)
|
||||
|
||||
created = DateTimeField(null=True)
|
||||
comment = TextField(null=True)
|
||||
command = TextField(null=True)
|
||||
aggregate_size = BigIntegerField(null=True)
|
||||
v1_json_metadata = TextField(null=True)
|
||||
|
||||
|
||||
class ImageStorageLocation(BaseModel):
|
||||
name = CharField(unique=True, index=True)
|
||||
|
||||
|
||||
class ImageStoragePlacement(BaseModel):
|
||||
storage = ForeignKeyField(ImageStorage)
|
||||
location = ForeignKeyField(ImageStorageLocation)
|
||||
|
||||
|
||||
def image_json_path(storage_uuid):
|
||||
base_path = storage._image_path(storage_uuid)
|
||||
return '{0}json'.format(base_path)
|
||||
|
||||
|
||||
def backfill_v1_metadata():
|
||||
""" Copies metadata from image storages to their images. """
|
||||
logger.debug('Image v1 metadata backfill: Began execution')
|
||||
|
||||
while True:
|
||||
batch_image_ids = list(Image
|
||||
.select(Image.id)
|
||||
.join(ImageStorage)
|
||||
.where(Image.v1_json_metadata >> None, ImageStorage.uploading == False)
|
||||
.limit(100))
|
||||
|
||||
if len(batch_image_ids) == 0:
|
||||
logger.debug('Image v1 metadata backfill: Backfill completed')
|
||||
return
|
||||
|
||||
logger.debug('Image v1 metadata backfill: Found %s records to update', len(batch_image_ids))
|
||||
for one_id in batch_image_ids:
|
||||
with app.config['DB_TRANSACTION_FACTORY'](db):
|
||||
try:
|
||||
logger.debug('Loading image: %s', one_id.id)
|
||||
|
||||
raw_query = (ImageStoragePlacement
|
||||
.select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation)
|
||||
.join(ImageStorageLocation)
|
||||
.switch(ImageStoragePlacement)
|
||||
.join(ImageStorage, JOIN_LEFT_OUTER)
|
||||
.join(Image)
|
||||
.where(Image.id == one_id.id))
|
||||
|
||||
placement_query = db_for_update(raw_query)
|
||||
|
||||
repo_image_list = model.image.invert_placement_query_results(placement_query)
|
||||
if len(repo_image_list) > 1:
|
||||
logger.error('Found more images than we requested, something is wrong with the query')
|
||||
return
|
||||
|
||||
repo_image = repo_image_list[0]
|
||||
uuid = repo_image.storage.uuid
|
||||
json_path = image_json_path(uuid)
|
||||
|
||||
logger.debug('Updating image: %s from: %s', repo_image.id, json_path)
|
||||
try:
|
||||
data = storage.get_content(repo_image.storage.locations, json_path)
|
||||
except IOError:
|
||||
data = "{}"
|
||||
logger.warning('failed to find v1 metadata, defaulting to {}')
|
||||
repo_image.v1_json_metadata = data
|
||||
repo_image.save()
|
||||
except ImageStoragePlacement.DoesNotExist:
|
||||
pass
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
# logging.getLogger('peewee').setLevel(logging.CRITICAL)
|
||||
backfill_v1_metadata()
|
|
@ -1,52 +0,0 @@
|
|||
import uuid
|
||||
import logging
|
||||
|
||||
from peewee import IntegrityError, CharField
|
||||
|
||||
from app import app
|
||||
from data.database import BaseModel
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
ENUM_CLASSES_WITH_SEQUENCES = [
|
||||
'TeamRole',
|
||||
'LoginService',
|
||||
'Visibility',
|
||||
'Role',
|
||||
'AccessTokenKind',
|
||||
'BuildTriggerService',
|
||||
'ImageStorageTransformation',
|
||||
'ImageStorageSignatureKind',
|
||||
'ImageStorageLocation',
|
||||
'LogEntryKind',
|
||||
'NotificationKind',
|
||||
'ExternalNotificationEvent',
|
||||
'ExternalNotificationMethod',
|
||||
]
|
||||
|
||||
|
||||
def reset_enum_sequences():
|
||||
for class_name in ENUM_CLASSES_WITH_SEQUENCES:
|
||||
reset_sequence(class_name)
|
||||
|
||||
|
||||
def reset_sequence(class_name):
|
||||
logger.info('Resetting sequence for table: %s', class_name.lower())
|
||||
unique_name = '%s' % uuid.uuid4()
|
||||
|
||||
Model = type(class_name, (BaseModel,), {'name': CharField(index=True)})
|
||||
|
||||
for skips in xrange(50):
|
||||
try:
|
||||
Model.create(name=unique_name).delete_instance()
|
||||
logger.info('Advanced sequence %s numbers', skips)
|
||||
break
|
||||
except IntegrityError:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
reset_enum_sequences()
|
|
@ -1,116 +0,0 @@
|
|||
import logging
|
||||
import json
|
||||
|
||||
from app import app
|
||||
from data.database import configure, BaseModel, uuid_generator
|
||||
from peewee import *
|
||||
from bitbucket import BitBucket
|
||||
from buildtrigger.bitbuckethandler import BitbucketBuildTrigger
|
||||
|
||||
configure(app.config)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Note: We vendor the RepositoryBuildTrigger and its dependencies here
|
||||
class Repository(BaseModel):
|
||||
pass
|
||||
|
||||
class BuildTriggerService(BaseModel):
|
||||
name = CharField(index=True, unique=True)
|
||||
|
||||
class AccessToken(BaseModel):
|
||||
pass
|
||||
|
||||
class User(BaseModel):
|
||||
pass
|
||||
|
||||
class RepositoryBuildTrigger(BaseModel):
|
||||
uuid = CharField(default=uuid_generator)
|
||||
service = ForeignKeyField(BuildTriggerService, index=True)
|
||||
repository = ForeignKeyField(Repository, index=True)
|
||||
connected_user = ForeignKeyField(User)
|
||||
auth_token = CharField(null=True)
|
||||
private_key = TextField(null=True)
|
||||
config = TextField(default='{}')
|
||||
write_token = ForeignKeyField(AccessToken, null=True)
|
||||
pull_robot = ForeignKeyField(User, related_name='triggerpullrobot')
|
||||
|
||||
|
||||
def run_bitbucket_migration():
|
||||
bitbucket_trigger = BuildTriggerService.get(BuildTriggerService.name == "bitbucket")
|
||||
|
||||
encountered = set()
|
||||
while True:
|
||||
found = list(RepositoryBuildTrigger.select().where(
|
||||
RepositoryBuildTrigger.service == bitbucket_trigger,
|
||||
RepositoryBuildTrigger.config ** "%\"hook_id%"))
|
||||
|
||||
found = [f for f in found if not f.uuid in encountered]
|
||||
|
||||
if not found:
|
||||
logger.debug('No additional records found')
|
||||
return
|
||||
|
||||
logger.debug('Found %s records to be changed', len(found))
|
||||
for trigger in found:
|
||||
encountered.add(trigger.uuid)
|
||||
|
||||
try:
|
||||
config = json.loads(trigger.config)
|
||||
except:
|
||||
logging.error("Cannot parse config for trigger %s", trigger.uuid)
|
||||
continue
|
||||
|
||||
logger.debug("Checking trigger %s", trigger.uuid)
|
||||
if 'hook_id' in config:
|
||||
logger.debug("Updating trigger %s to a webhook", trigger.uuid)
|
||||
|
||||
trigger_handler = BitbucketBuildTrigger(trigger)
|
||||
client = trigger_handler._get_repository_client()
|
||||
|
||||
hook_id = config['hook_id']
|
||||
|
||||
# Lookup the old service hook.
|
||||
logger.debug("Looking up old service URL for trigger %s", trigger.uuid)
|
||||
(result, hook_data, err_msg) = client.services().get(hook_id)
|
||||
if not result or not hook_data:
|
||||
logger.error('Error when retrieving service hook for trigger %s: %s', trigger.uuid, err_msg)
|
||||
continue
|
||||
|
||||
if not 'webhook_id' in config:
|
||||
hook_data = hook_data[0]['service']
|
||||
webhook_url = [f for f in hook_data['fields'] if f['name'] == 'URL'][0]['value']
|
||||
logger.debug("Adding webhook for trigger %s: %s", trigger.uuid, webhook_url)
|
||||
|
||||
# Add the new web hook.
|
||||
description = 'Webhook for invoking builds on %s' % app.config['REGISTRY_TITLE_SHORT']
|
||||
webhook_events = ['repo:push']
|
||||
(result, data, err_msg) = client.webhooks().create(description, webhook_url, webhook_events)
|
||||
if not result:
|
||||
logger.error('Error when adding webhook for trigger %s: %s', trigger.uuid, err_msg)
|
||||
continue
|
||||
|
||||
config['webhook_id'] = data['uuid']
|
||||
trigger.config = json.dumps(config)
|
||||
trigger.save()
|
||||
|
||||
# Remove the old service hook.
|
||||
logger.debug("Deleting old service URL for trigger %s", trigger.uuid)
|
||||
(result, _, err_msg) = client.services().delete(hook_id)
|
||||
if not result:
|
||||
logger.error('Error when deleting service hook for trigger %s: %s', trigger.uuid, err_msg)
|
||||
continue
|
||||
|
||||
del config['hook_id']
|
||||
|
||||
# Update the config.
|
||||
trigger.config = json.dumps(config)
|
||||
trigger.save()
|
||||
logger.debug("Trigger %s updated to a webhook", trigger.uuid)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.getLogger('boto').setLevel(logging.CRITICAL)
|
||||
|
||||
run_bitbucket_migration()
|
|
@ -1,125 +0,0 @@
|
|||
import logging
|
||||
import logging.config
|
||||
import json
|
||||
|
||||
from data.database import (db, db_for_update, BaseModel, CharField, ForeignKeyField,
|
||||
TextField, BooleanField)
|
||||
from app import app
|
||||
from buildtrigger.basehandler import BuildTriggerHandler
|
||||
from util.security.ssh import generate_ssh_keypair
|
||||
from github import GithubException
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class BuildTriggerService(BaseModel):
|
||||
name = CharField(index=True, unique=True)
|
||||
|
||||
class Repository(BaseModel):
|
||||
pass
|
||||
|
||||
class User(BaseModel):
|
||||
pass
|
||||
|
||||
class AccessToken(BaseModel):
|
||||
pass
|
||||
|
||||
class RepositoryBuildTrigger(BaseModel):
|
||||
uuid = CharField()
|
||||
service = ForeignKeyField(BuildTriggerService, index=True)
|
||||
repository = ForeignKeyField(Repository, index=True)
|
||||
connected_user = ForeignKeyField(User)
|
||||
auth_token = CharField(null=True)
|
||||
private_key = TextField(null=True)
|
||||
config = TextField(default='{}')
|
||||
write_token = ForeignKeyField(AccessToken, null=True)
|
||||
pull_robot = ForeignKeyField(User, null=True, related_name='triggerpullrobot')
|
||||
|
||||
used_legacy_github = BooleanField(null=True, default=False)
|
||||
|
||||
|
||||
def backfill_github_deploykeys():
|
||||
""" Generates and saves private deploy keys for any GitHub build triggers still relying on
|
||||
the old buildpack behavior. """
|
||||
logger.setLevel(logging.DEBUG)
|
||||
logger.debug('GitHub deploy key backfill: Began execution')
|
||||
|
||||
encountered = set()
|
||||
github_service = BuildTriggerService.get(name='github')
|
||||
|
||||
while True:
|
||||
build_trigger_ids = list(RepositoryBuildTrigger
|
||||
.select(RepositoryBuildTrigger.id)
|
||||
.where(RepositoryBuildTrigger.private_key >> None)
|
||||
.where(RepositoryBuildTrigger.service == github_service)
|
||||
.where(RepositoryBuildTrigger.used_legacy_github >> None)
|
||||
.limit(100))
|
||||
|
||||
filtered_ids = [trigger.id for trigger in build_trigger_ids if trigger.id not in encountered]
|
||||
if len(filtered_ids) == 0:
|
||||
# We're done!
|
||||
logger.debug('GitHub deploy key backfill: Backfill completed')
|
||||
return
|
||||
|
||||
logger.debug('GitHub deploy key backfill: Found %s records to update', len(filtered_ids))
|
||||
for trigger_id in filtered_ids:
|
||||
encountered.add(trigger_id)
|
||||
logger.debug('Updating build trigger: %s', trigger_id)
|
||||
|
||||
with app.config['DB_TRANSACTION_FACTORY'](db):
|
||||
try:
|
||||
query = RepositoryBuildTrigger.select().where(RepositoryBuildTrigger.id == trigger_id)
|
||||
trigger = db_for_update(query).get()
|
||||
except RepositoryBuildTrigger.DoesNotExist:
|
||||
logger.debug('Could not find build trigger %s', trigger_id)
|
||||
continue
|
||||
|
||||
trigger.used_legacy_github = True
|
||||
trigger.save()
|
||||
|
||||
handler = BuildTriggerHandler.get_handler(trigger)
|
||||
|
||||
config = handler.config
|
||||
if not 'build_source' in config:
|
||||
logger.debug('Could not find build source for trigger %s', trigger_id)
|
||||
continue
|
||||
|
||||
build_source = config['build_source']
|
||||
gh_client = handler._get_client()
|
||||
|
||||
# Find the GitHub repository.
|
||||
try:
|
||||
gh_repo = gh_client.get_repo(build_source)
|
||||
except GithubException:
|
||||
logger.exception('Cannot find repository %s for trigger %s', build_source, trigger.id)
|
||||
continue
|
||||
|
||||
# Add a deploy key to the GitHub repository.
|
||||
public_key, private_key = generate_ssh_keypair()
|
||||
config['credentials'] = [
|
||||
{
|
||||
'name': 'SSH Public Key',
|
||||
'value': public_key,
|
||||
},
|
||||
]
|
||||
|
||||
logger.debug('Adding deploy key to build trigger %s', trigger.id)
|
||||
try:
|
||||
deploy_key = gh_repo.create_key('%s Builder' % app.config['REGISTRY_TITLE'], public_key)
|
||||
config['deploy_key_id'] = deploy_key.id
|
||||
except GithubException:
|
||||
logger.exception('Cannot add deploy key to repository %s for trigger %s', build_source, trigger.id)
|
||||
continue
|
||||
|
||||
logger.debug('Saving deploy key for trigger %s', trigger.id)
|
||||
trigger.used_legacy_github = True
|
||||
trigger.private_key = private_key
|
||||
trigger.config = json.dumps(config)
|
||||
trigger.save()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.getLogger('boto').setLevel(logging.CRITICAL)
|
||||
logging.getLogger('github').setLevel(logging.CRITICAL)
|
||||
|
||||
logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False)
|
||||
backfill_github_deploykeys()
|
|
@ -1,53 +0,0 @@
|
|||
import logging
|
||||
import json
|
||||
|
||||
from app import app
|
||||
from data.database import configure, RepositoryNotification, ExternalNotificationMethod
|
||||
|
||||
configure(app.config)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def run_slackwebhook_migration():
|
||||
slack_method = ExternalNotificationMethod.get(ExternalNotificationMethod.name == "slack")
|
||||
|
||||
encountered = set()
|
||||
while True:
|
||||
found = list(RepositoryNotification.select(RepositoryNotification.uuid,
|
||||
RepositoryNotification.config_json)
|
||||
.where(RepositoryNotification.method == slack_method,
|
||||
RepositoryNotification.config_json ** "%subdomain%",
|
||||
~(RepositoryNotification.config_json ** "%url%")))
|
||||
|
||||
found = [f for f in found if not f.uuid in encountered]
|
||||
|
||||
if not found:
|
||||
logger.debug('No additional records found')
|
||||
return
|
||||
|
||||
logger.debug('Found %s records to be changed', len(found))
|
||||
for notification in found:
|
||||
encountered.add(notification.uuid)
|
||||
|
||||
try:
|
||||
config = json.loads(notification.config_json)
|
||||
except:
|
||||
logging.error("Cannot parse config for noticification %s", notification.uuid)
|
||||
continue
|
||||
|
||||
logger.debug("Checking notification %s", notification.uuid)
|
||||
if 'subdomain' in config and 'token' in config:
|
||||
subdomain = config['subdomain']
|
||||
token = config['token']
|
||||
new_url = 'https://%s.slack.com/services/hooks/incoming-webhook?token=%s' % (subdomain, token)
|
||||
config['url'] = new_url
|
||||
|
||||
logger.debug("Updating notification %s to URL: %s", notification.uuid, new_url)
|
||||
notification.config_json = json.dumps(config)
|
||||
notification.save()
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.getLogger('boto').setLevel(logging.CRITICAL)
|
||||
|
||||
run_slackwebhook_migration()
|
|
@ -1,105 +0,0 @@
|
|||
import logging
|
||||
import zlib
|
||||
|
||||
from data import model
|
||||
from data.database import ImageStorage
|
||||
from app import app, storage as store
|
||||
from data.database import db, db_random_func
|
||||
from util.registry.gzipstream import ZLIB_GZIP_WINDOW
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
CHUNK_SIZE = 5 * 1024 * 1024
|
||||
|
||||
def backfill_sizes_from_data():
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
logger.debug('Starting uncompressed image size backfill')
|
||||
logger.debug('NOTE: This can be a LONG RUNNING OPERATION. Please wait!')
|
||||
|
||||
# Check for any uncompressed images.
|
||||
has_images = bool(list(ImageStorage
|
||||
.select(ImageStorage.uuid)
|
||||
.where(ImageStorage.uncompressed_size >> None,
|
||||
ImageStorage.image_size > 0,
|
||||
ImageStorage.uploading == False)
|
||||
.limit(1)))
|
||||
|
||||
if not has_images:
|
||||
logger.debug('Uncompressed backfill: No migration needed')
|
||||
return
|
||||
|
||||
logger.debug('Uncompressed backfill: Starting migration')
|
||||
encountered = set()
|
||||
while True:
|
||||
# Load the record from the DB.
|
||||
batch_ids = list(ImageStorage
|
||||
.select(ImageStorage.uuid)
|
||||
.where(ImageStorage.uncompressed_size >> None,
|
||||
ImageStorage.image_size > 0,
|
||||
ImageStorage.uploading == False)
|
||||
.limit(100)
|
||||
.order_by(db_random_func()))
|
||||
|
||||
batch_ids = set([s.uuid for s in batch_ids]) - encountered
|
||||
logger.debug('Found %s images to process', len(batch_ids))
|
||||
if len(batch_ids) == 0:
|
||||
# We're done!
|
||||
return
|
||||
|
||||
counter = 1
|
||||
for uuid in batch_ids:
|
||||
encountered.add(uuid)
|
||||
|
||||
logger.debug('Processing image ID %s (%s/%s)', uuid, counter, len(batch_ids))
|
||||
counter = counter + 1
|
||||
|
||||
try:
|
||||
with_locs = model.storage.get_storage_by_uuid(uuid)
|
||||
if with_locs.uncompressed_size is not None:
|
||||
logger.debug('Somebody else already filled this in for us: %s', uuid)
|
||||
continue
|
||||
|
||||
# Read the layer from backing storage and calculate the uncompressed size.
|
||||
logger.debug('Loading data: %s (%s bytes)', uuid, with_locs.image_size)
|
||||
decompressor = zlib.decompressobj(ZLIB_GZIP_WINDOW)
|
||||
|
||||
uncompressed_size = 0
|
||||
with store.stream_read_file(with_locs.locations, store.v1_image_layer_path(uuid)) as stream:
|
||||
while True:
|
||||
current_data = stream.read(CHUNK_SIZE)
|
||||
if len(current_data) == 0:
|
||||
break
|
||||
|
||||
while current_data:
|
||||
uncompressed_size += len(decompressor.decompress(current_data, CHUNK_SIZE))
|
||||
current_data = decompressor.unconsumed_tail
|
||||
|
||||
# Write the size to the image storage. We do so under a transaction AFTER checking to
|
||||
# make sure the image storage still exists and has not changed.
|
||||
logger.debug('Writing entry: %s. Size: %s', uuid, uncompressed_size)
|
||||
with app.config['DB_TRANSACTION_FACTORY'](db):
|
||||
current_record = model.storage.get_storage_by_uuid(uuid)
|
||||
|
||||
if not current_record.uploading and current_record.uncompressed_size == None:
|
||||
current_record.uncompressed_size = uncompressed_size
|
||||
current_record.save()
|
||||
else:
|
||||
logger.debug('Somebody else already filled this in for us, after we did the work: %s',
|
||||
uuid)
|
||||
|
||||
except model.InvalidImageException:
|
||||
logger.warning('Storage with uuid no longer exists: %s', uuid)
|
||||
except IOError:
|
||||
logger.warning('IOError on %s', uuid)
|
||||
except MemoryError:
|
||||
logger.warning('MemoryError on %s', uuid)
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.getLogger('boto').setLevel(logging.CRITICAL)
|
||||
logging.getLogger('peewee').setLevel(logging.CRITICAL)
|
||||
|
||||
backfill_sizes_from_data()
|
Reference in a new issue