Merge remote-tracking branch 'upstream/v2-phase4' into python-registry-v2
This commit is contained in:
commit
e7a6176594
105 changed files with 4439 additions and 2074 deletions
76
util/dict_wrappers.py
Normal file
76
util/dict_wrappers.py
Normal file
|
@ -0,0 +1,76 @@
|
|||
import json
|
||||
from jsonpath_rw import parse
|
||||
|
||||
class SafeDictSetter(object):
|
||||
""" Specialized write-only dictionary wrapper class that allows for setting
|
||||
nested keys via a path syntax.
|
||||
|
||||
Example:
|
||||
sds = SafeDictSetter()
|
||||
sds['foo.bar.baz'] = 'hello' # Sets 'foo' = {'bar': {'baz': 'hello'}}
|
||||
sds['somekey'] = None # Does not set the key since the value is None
|
||||
"""
|
||||
def __init__(self, initial_object=None):
|
||||
self._object = initial_object or {}
|
||||
|
||||
def __setitem__(self, path, value):
|
||||
self.set(path, value)
|
||||
|
||||
def set(self, path, value, allow_none=False):
|
||||
""" Sets the value of the given path to the given value. """
|
||||
if value is None and not allow_none:
|
||||
return
|
||||
|
||||
pieces = path.split('.')
|
||||
current = self._object
|
||||
|
||||
for piece in pieces[:len(pieces)-1]:
|
||||
current_obj = current.get(piece, {})
|
||||
if not isinstance(current_obj, dict):
|
||||
raise Exception('Key %s is a non-object value: %s' % (piece, current_obj))
|
||||
|
||||
current[piece] = current_obj
|
||||
current = current_obj
|
||||
|
||||
current[pieces[-1]] = value
|
||||
|
||||
def dict_value(self):
|
||||
""" Returns the dict value built. """
|
||||
return self._object
|
||||
|
||||
def json_value(self):
|
||||
""" Returns the JSON string value of the dictionary built. """
|
||||
return json.dumps(self._object)
|
||||
|
||||
|
||||
class JSONPathDict(object):
|
||||
""" Specialized read-only dictionary wrapper class that uses the jsonpath_rw library
|
||||
to access keys via an X-Path-like syntax.
|
||||
|
||||
Example:
|
||||
pd = JSONPathDict({'hello': {'hi': 'there'}})
|
||||
pd['hello.hi'] # Returns 'there'
|
||||
"""
|
||||
def __init__(self, dict_value):
|
||||
""" Init the helper with the JSON object.
|
||||
"""
|
||||
self._object = dict_value
|
||||
|
||||
def __getitem__(self, path):
|
||||
return self.get(path)
|
||||
|
||||
def get(self, path, not_found_handler=None):
|
||||
""" Returns the value found at the given path. Path is a json-path expression. """
|
||||
jsonpath_expr = parse(path)
|
||||
matches = jsonpath_expr.find(self._object)
|
||||
if not matches:
|
||||
return not_found_handler() if not_found_handler else None
|
||||
|
||||
match = matches[0].value
|
||||
if not match:
|
||||
return not_found_handler() if not_found_handler else None
|
||||
|
||||
if isinstance(match, dict):
|
||||
return JSONPathDict(match)
|
||||
|
||||
return match
|
|
@ -1,44 +1,50 @@
|
|||
import logging
|
||||
|
||||
from data.database import ImageStorage, Image, db
|
||||
from data.database import ImageStorage, Image, db, db_for_update
|
||||
from app import app
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def backfill_aggregate_sizes():
|
||||
""" Generates aggregate sizes for any image storage entries without them """
|
||||
LOGGER.setLevel(logging.DEBUG)
|
||||
LOGGER.debug('Aggregate sizes backfill: Began execution')
|
||||
logger.debug('Aggregate sizes backfill: Began execution')
|
||||
while True:
|
||||
batch_storage_ids = list(ImageStorage
|
||||
.select(ImageStorage.id)
|
||||
.where(ImageStorage.aggregate_size >> None)
|
||||
.limit(10))
|
||||
batch_image_ids = list(Image
|
||||
.select(Image.id)
|
||||
.where(Image.aggregate_size >> None)
|
||||
.limit(100))
|
||||
|
||||
if len(batch_storage_ids) == 0:
|
||||
if len(batch_image_ids) == 0:
|
||||
# There are no storages left to backfill. We're done!
|
||||
LOGGER.debug('Aggregate sizes backfill: Backfill completed')
|
||||
logger.debug('Aggregate sizes backfill: Backfill completed')
|
||||
return
|
||||
|
||||
LOGGER.debug('Aggregate sizes backfill: Found %s records to update', len(batch_storage_ids))
|
||||
for image_storage_id in batch_storage_ids:
|
||||
LOGGER.debug('Updating image storage: %s', image_storage_id.id)
|
||||
logger.debug('Aggregate sizes backfill: Found %s records to update', len(batch_image_ids))
|
||||
for image_id in batch_image_ids:
|
||||
logger.debug('Updating image : %s', image_id.id)
|
||||
|
||||
with app.config['DB_TRANSACTION_FACTORY'](db):
|
||||
try:
|
||||
storage = ImageStorage.select().where(ImageStorage.id == image_storage_id.id).get()
|
||||
image = Image.select().where(Image.storage == storage).get()
|
||||
image = (Image
|
||||
.select(Image, ImageStorage)
|
||||
.join(ImageStorage)
|
||||
.where(Image.id == image_id)
|
||||
.get())
|
||||
|
||||
aggregate_size = image.storage.image_size
|
||||
|
||||
image_ids = image.ancestors.split('/')[1:-1]
|
||||
aggregate_size = storage.image_size
|
||||
for image_id in image_ids:
|
||||
current_image = Image.select().where(Image.id == image_id).join(ImageStorage)
|
||||
aggregate_size += image.storage.image_size
|
||||
to_add = db_for_update(Image
|
||||
.select(Image, ImageStorage)
|
||||
.join(ImageStorage)
|
||||
.where(Image.id == image_id)).get()
|
||||
aggregate_size += to_add.storage.image_size
|
||||
|
||||
storage.aggregate_size = aggregate_size
|
||||
storage.save()
|
||||
except ImageStorage.DoesNotExist:
|
||||
pass
|
||||
image.aggregate_size = aggregate_size
|
||||
image.save()
|
||||
except Image.DoesNotExist:
|
||||
pass
|
||||
|
||||
|
|
87
util/migrate/backfill_image_fields.py
Normal file
87
util/migrate/backfill_image_fields.py
Normal file
|
@ -0,0 +1,87 @@
|
|||
import logging
|
||||
|
||||
from peewee import (CharField, BigIntegerField, BooleanField, ForeignKeyField, DateTimeField,
|
||||
TextField)
|
||||
from data.database import BaseModel, db, db_for_update
|
||||
from app import app
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Repository(BaseModel):
|
||||
pass
|
||||
|
||||
|
||||
# Vendor the information from tables we will be writing to at the time of this migration
|
||||
class ImageStorage(BaseModel):
|
||||
created = DateTimeField(null=True)
|
||||
comment = TextField(null=True)
|
||||
command = TextField(null=True)
|
||||
aggregate_size = BigIntegerField(null=True)
|
||||
uploading = BooleanField(default=True, null=True)
|
||||
|
||||
|
||||
class Image(BaseModel):
|
||||
# This class is intentionally denormalized. Even though images are supposed
|
||||
# to be globally unique we can't treat them as such for permissions and
|
||||
# security reasons. So rather than Repository <-> Image being many to many
|
||||
# each image now belongs to exactly one repository.
|
||||
docker_image_id = CharField(index=True)
|
||||
repository = ForeignKeyField(Repository)
|
||||
|
||||
# '/' separated list of ancestory ids, e.g. /1/2/6/7/10/
|
||||
ancestors = CharField(index=True, default='/', max_length=64535, null=True)
|
||||
|
||||
storage = ForeignKeyField(ImageStorage, index=True, null=True)
|
||||
|
||||
created = DateTimeField(null=True)
|
||||
comment = TextField(null=True)
|
||||
command = TextField(null=True)
|
||||
aggregate_size = BigIntegerField(null=True)
|
||||
v1_json_metadata = TextField(null=True)
|
||||
|
||||
|
||||
def backfill_image_fields():
|
||||
""" Copies metadata from image storages to their images. """
|
||||
logger.debug('Image metadata backfill: Began execution')
|
||||
while True:
|
||||
batch_image_ids = list(Image
|
||||
.select(Image.id)
|
||||
.join(ImageStorage)
|
||||
.where(Image.created >> None, Image.comment >> None,
|
||||
Image.command >> None, Image.aggregate_size >> None,
|
||||
ImageStorage.uploading == False,
|
||||
~((ImageStorage.created >> None) &
|
||||
(ImageStorage.comment >> None) &
|
||||
(ImageStorage.command >> None) &
|
||||
(ImageStorage.aggregate_size >> None)))
|
||||
.limit(100))
|
||||
|
||||
if len(batch_image_ids) == 0:
|
||||
logger.debug('Image metadata backfill: Backfill completed')
|
||||
return
|
||||
|
||||
logger.debug('Image metadata backfill: Found %s records to update', len(batch_image_ids))
|
||||
for image_id in batch_image_ids:
|
||||
logger.debug('Updating image: %s', image_id.id)
|
||||
|
||||
with app.config['DB_TRANSACTION_FACTORY'](db):
|
||||
try:
|
||||
image = db_for_update(Image
|
||||
.select(Image, ImageStorage)
|
||||
.join(ImageStorage)
|
||||
.where(Image.id == image_id.id)).get()
|
||||
|
||||
image.created = image.storage.created
|
||||
image.comment = image.storage.comment
|
||||
image.command = image.storage.command
|
||||
image.aggregate_size = image.storage.aggregate_size
|
||||
image.save()
|
||||
except Image.DoesNotExist:
|
||||
pass
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.getLogger('peewee').setLevel(logging.CRITICAL)
|
||||
backfill_image_fields()
|
72
util/migrate/backfill_v1_metadata.py
Normal file
72
util/migrate/backfill_v1_metadata.py
Normal file
|
@ -0,0 +1,72 @@
|
|||
import logging
|
||||
|
||||
from peewee import JOIN_LEFT_OUTER
|
||||
|
||||
from data.database import (Image, ImageStorage, ImageStoragePlacement, ImageStorageLocation, db,
|
||||
db_for_update)
|
||||
from app import app, storage
|
||||
from data import model
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def image_json_path(storage_uuid):
|
||||
base_path = storage.image_path(storage_uuid)
|
||||
return '{0}json'.format(base_path)
|
||||
|
||||
|
||||
def backfill_v1_metadata():
|
||||
""" Copies metadata from image storages to their images. """
|
||||
logger.debug('Image v1 metadata backfill: Began execution')
|
||||
while True:
|
||||
batch_image_ids = list(Image
|
||||
.select(Image.id)
|
||||
.join(ImageStorage)
|
||||
.where(Image.v1_json_metadata >> None, ImageStorage.uploading == False)
|
||||
.limit(100))
|
||||
|
||||
if len(batch_image_ids) == 0:
|
||||
logger.debug('Image v1 metadata backfill: Backfill completed')
|
||||
return
|
||||
|
||||
logger.debug('Image v1 metadata backfill: Found %s records to update', len(batch_image_ids))
|
||||
for one_id in batch_image_ids:
|
||||
with app.config['DB_TRANSACTION_FACTORY'](db):
|
||||
try:
|
||||
logger.debug('Loading image: %s', one_id.id)
|
||||
|
||||
raw_query = (ImageStoragePlacement
|
||||
.select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation)
|
||||
.join(ImageStorageLocation)
|
||||
.switch(ImageStoragePlacement)
|
||||
.join(ImageStorage, JOIN_LEFT_OUTER)
|
||||
.join(Image)
|
||||
.where(Image.id == one_id.id))
|
||||
|
||||
placement_query = db_for_update(raw_query)
|
||||
|
||||
repo_image_list = model.image.invert_placement_query_results(placement_query)
|
||||
if len(repo_image_list) > 1:
|
||||
logger.error('Found more images than we requested, something is wrong with the query')
|
||||
return
|
||||
|
||||
repo_image = repo_image_list[0]
|
||||
uuid = repo_image.storage.uuid
|
||||
json_path = image_json_path(uuid)
|
||||
|
||||
logger.debug('Updating image: %s from: %s', repo_image.id, json_path)
|
||||
try:
|
||||
data = storage.get_content(repo_image.storage.locations, json_path)
|
||||
except IOError:
|
||||
data = None
|
||||
logger.exception('failed to find v1 metadata, defaulting to None')
|
||||
repo_image.v1_json_metadata = data
|
||||
repo_image.save()
|
||||
except ImageStoragePlacement.DoesNotExist:
|
||||
pass
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
# logging.getLogger('peewee').setLevel(logging.CRITICAL)
|
||||
backfill_v1_metadata()
|
|
@ -5,7 +5,7 @@ from app import app
|
|||
from data.database import configure, BaseModel, uuid_generator
|
||||
from peewee import *
|
||||
from bitbucket import BitBucket
|
||||
from endpoints.trigger import BitbucketBuildTrigger
|
||||
from buildtrigger.bitbuckethandler import BitbucketBuildTrigger
|
||||
|
||||
configure(app.config)
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ import json
|
|||
|
||||
from data.database import RepositoryBuildTrigger, BuildTriggerService, db, db_for_update
|
||||
from app import app
|
||||
from endpoints.trigger import BuildTriggerHandler
|
||||
from buildtrigger.basehandler import BuildTriggerHandler
|
||||
from util.security.ssh import generate_ssh_keypair
|
||||
from github import GithubException
|
||||
|
||||
|
@ -24,7 +24,8 @@ def backfill_github_deploykeys():
|
|||
.select(RepositoryBuildTrigger.id)
|
||||
.where(RepositoryBuildTrigger.private_key >> None)
|
||||
.where(RepositoryBuildTrigger.service == github_service)
|
||||
.limit(10))
|
||||
.where(RepositoryBuildTrigger.used_legacy_github >> None)
|
||||
.limit(100))
|
||||
|
||||
filtered_ids = [trigger.id for trigger in build_trigger_ids if trigger.id not in encountered]
|
||||
if len(filtered_ids) == 0:
|
||||
|
@ -39,15 +40,22 @@ def backfill_github_deploykeys():
|
|||
|
||||
with app.config['DB_TRANSACTION_FACTORY'](db):
|
||||
try:
|
||||
query = RepositoryBuildTrigger.select(RepositoryBuildTrigger.id == trigger_id)
|
||||
query = RepositoryBuildTrigger.select().where(RepositoryBuildTrigger.id == trigger_id)
|
||||
trigger = db_for_update(query).get()
|
||||
except RepositoryBuildTrigger.DoesNotExist:
|
||||
logger.debug('Could not find build trigger %s', trigger_id)
|
||||
continue
|
||||
|
||||
trigger.used_legacy_github = True
|
||||
trigger.save()
|
||||
|
||||
handler = BuildTriggerHandler.get_handler(trigger)
|
||||
|
||||
config = handler.config
|
||||
if not 'build_source' in config:
|
||||
logger.debug('Could not find build source for trigger %s', trigger_id)
|
||||
continue
|
||||
|
||||
build_source = config['build_source']
|
||||
gh_client = handler._get_client()
|
||||
|
||||
|
@ -83,5 +91,8 @@ def backfill_github_deploykeys():
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.getLogger('boto').setLevel(logging.CRITICAL)
|
||||
logging.getLogger('github').setLevel(logging.CRITICAL)
|
||||
|
||||
logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False)
|
||||
backfill_github_deploykeys()
|
||||
|
|
|
@ -67,7 +67,7 @@ def backfill_sizes_from_data():
|
|||
decompressor = zlib.decompressobj(ZLIB_GZIP_WINDOW)
|
||||
|
||||
uncompressed_size = 0
|
||||
with store.stream_read_file(with_locs.locations, store.image_layer_path(uuid)) as stream:
|
||||
with store.stream_read_file(with_locs.locations, store.v1_image_layer_path(uuid)) as stream:
|
||||
while True:
|
||||
current_data = stream.read(CHUNK_SIZE)
|
||||
if len(current_data) == 0:
|
||||
|
|
|
@ -6,6 +6,10 @@ from uuid import uuid4
|
|||
|
||||
REPOSITORY_NAME_REGEX = re.compile(r'^[\.a-zA-Z0-9_-]+$')
|
||||
|
||||
TAG_REGEX = re.compile(r'^[\w][\w\.-]{0,127}$')
|
||||
TAG_ERROR = ('Invalid tag: must match [A-Za-z0-9_.-], NOT start with "." or "-", '
|
||||
'and can contain 1-128 characters')
|
||||
|
||||
def parse_namespace_repository(repository, include_tag=False):
|
||||
parts = repository.rstrip('/').split('/', 1)
|
||||
if len(parts) < 2:
|
||||
|
|
|
@ -6,7 +6,8 @@ import anunidecode
|
|||
INVALID_PASSWORD_MESSAGE = 'Invalid password, password must be at least ' + \
|
||||
'8 characters and contain no whitespace.'
|
||||
INVALID_USERNAME_CHARACTERS = r'[^a-z0-9_]'
|
||||
VALID_CHARACTERS = '_' + string.digits + string.lowercase
|
||||
VALID_CHARACTERS = string.digits + string.lowercase
|
||||
|
||||
MIN_LENGTH = 4
|
||||
MAX_LENGTH = 30
|
||||
|
||||
|
@ -48,8 +49,13 @@ def _gen_filler_chars(num_filler_chars):
|
|||
|
||||
|
||||
def generate_valid_usernames(input_username):
|
||||
# Docker's regex: [a-z0-9]+(?:[._-][a-z0-9]+)*
|
||||
normalized = input_username.encode('unidecode', 'ignore').strip().lower()
|
||||
prefix = re.sub(INVALID_USERNAME_CHARACTERS, '_', normalized)[:30]
|
||||
prefix = re.sub(r'_{2,}', '_', prefix)
|
||||
|
||||
if prefix.endswith('_'):
|
||||
prefix = prefix[0:len(prefix) - 1]
|
||||
|
||||
num_filler_chars = max(0, MIN_LENGTH - len(prefix))
|
||||
|
||||
|
|
Reference in a new issue