Merge remote-tracking branch 'upstream/v2-phase4' into python-registry-v2

2015-10-22 16:59:28 -04:00 · 2015-10-22 16:59:28 -04:00 · e7a6176594
commit e7a6176594
parent 82e4adc6ca f2d4907528
105 changed files with 4439 additions and 2074 deletions
--- a/util/dict_wrappers.py
+++ b/util/dict_wrappers.py
@ -0,0 +1,76 @@
+import json
+from jsonpath_rw import parse
+
+class SafeDictSetter(object):
+  """ Specialized write-only dictionary wrapper class that allows for setting
+      nested keys via a path syntax.
+
+      Example:
+        sds = SafeDictSetter()
+        sds['foo.bar.baz'] = 'hello' # Sets 'foo' = {'bar': {'baz': 'hello'}}
+        sds['somekey'] = None # Does not set the key since the value is None
+  """
+  def __init__(self, initial_object=None):
+    self._object = initial_object or {}
+
+  def __setitem__(self, path, value):
+    self.set(path, value)
+
+  def set(self, path, value, allow_none=False):
+    """ Sets the value of the given path to the given value. """
+    if value is None and not allow_none:
+      return
+
+    pieces = path.split('.')
+    current = self._object
+
+    for piece in pieces[:len(pieces)-1]:
+      current_obj = current.get(piece, {})
+      if not isinstance(current_obj, dict):
+        raise Exception('Key %s is a non-object value: %s' % (piece, current_obj))
+
+      current[piece] = current_obj
+      current = current_obj
+
+    current[pieces[-1]] = value
+
+  def dict_value(self):
+    """ Returns the dict value built. """
+    return self._object
+
+  def json_value(self):
+    """ Returns the JSON string value of the dictionary built. """
+    return json.dumps(self._object)
+
+
+class JSONPathDict(object):
+  """ Specialized read-only dictionary wrapper class that uses the jsonpath_rw library
+      to access keys via an X-Path-like syntax.
+
+      Example:
+        pd = JSONPathDict({'hello': {'hi': 'there'}})
+        pd['hello.hi'] # Returns 'there'
+  """
+  def __init__(self, dict_value):
+    """ Init the helper with the JSON object.
+    """
+    self._object = dict_value
+
+  def __getitem__(self, path):
+    return self.get(path)
+
+  def get(self, path, not_found_handler=None):
+    """ Returns the value found at the given path. Path is a json-path expression. """
+    jsonpath_expr = parse(path)
+    matches = jsonpath_expr.find(self._object)
+    if not matches:
+      return not_found_handler() if not_found_handler else None
+
+    match = matches[0].value
+    if not match:
+      return not_found_handler() if not_found_handler else None
+
+    if isinstance(match, dict):
+      return JSONPathDict(match)
+
+    return match
--- a/util/migrate/backfill_aggregate_sizes.py
+++ b/util/migrate/backfill_aggregate_sizes.py
@ -1,44 +1,50 @@
 import logging

-from data.database import ImageStorage, Image, db
+from data.database import ImageStorage, Image, db, db_for_update
 from app import app

-LOGGER = logging.getLogger(__name__)
+
+logger = logging.getLogger(__name__)
+

 def backfill_aggregate_sizes():
  """ Generates aggregate sizes for any image storage entries without them """
-  LOGGER.setLevel(logging.DEBUG)
-  LOGGER.debug('Aggregate sizes backfill: Began execution')
+  logger.debug('Aggregate sizes backfill: Began execution')
  while True:
-    batch_storage_ids = list(ImageStorage
-                          .select(ImageStorage.id)
-                          .where(ImageStorage.aggregate_size >> None)
-                          .limit(10))
+    batch_image_ids = list(Image
+                           .select(Image.id)
+                           .where(Image.aggregate_size >> None)
+                           .limit(100))

-    if len(batch_storage_ids) == 0:
+    if len(batch_image_ids) == 0:
      # There are no storages left to backfill. We're done!
-      LOGGER.debug('Aggregate sizes backfill: Backfill completed')
+      logger.debug('Aggregate sizes backfill: Backfill completed')
      return

-    LOGGER.debug('Aggregate sizes backfill: Found %s records to update', len(batch_storage_ids))
-    for image_storage_id in batch_storage_ids:
-      LOGGER.debug('Updating image storage: %s', image_storage_id.id)
+    logger.debug('Aggregate sizes backfill: Found %s records to update', len(batch_image_ids))
+    for image_id in batch_image_ids:
+      logger.debug('Updating image : %s', image_id.id)

      with app.config['DB_TRANSACTION_FACTORY'](db):
        try:
-          storage = ImageStorage.select().where(ImageStorage.id == image_storage_id.id).get()
-          image  = Image.select().where(Image.storage == storage).get()
+          image = (Image
+                   .select(Image, ImageStorage)
+                   .join(ImageStorage)
+                   .where(Image.id == image_id)
+                   .get())
+
+          aggregate_size = image.storage.image_size

          image_ids = image.ancestors.split('/')[1:-1]
-          aggregate_size = storage.image_size
          for image_id in image_ids:
-            current_image = Image.select().where(Image.id == image_id).join(ImageStorage)
-            aggregate_size += image.storage.image_size
+            to_add = db_for_update(Image
+                                   .select(Image, ImageStorage)
+                                   .join(ImageStorage)
+                                   .where(Image.id == image_id)).get()
+            aggregate_size += to_add.storage.image_size

-          storage.aggregate_size = aggregate_size
-          storage.save()
-        except ImageStorage.DoesNotExist:
-          pass
+          image.aggregate_size = aggregate_size
+          image.save()
        except Image.DoesNotExist:
          pass

--- a/util/migrate/backfill_image_fields.py
+++ b/util/migrate/backfill_image_fields.py
@ -0,0 +1,87 @@
+import logging
+
+from peewee import (CharField, BigIntegerField, BooleanField, ForeignKeyField, DateTimeField,
+                    TextField)
+from data.database import BaseModel, db, db_for_update
+from app import app
+
+
+logger = logging.getLogger(__name__)
+
+
+class Repository(BaseModel):
+  pass
+
+
+# Vendor the information from tables we will be writing to at the time of this migration
+class ImageStorage(BaseModel):
+  created = DateTimeField(null=True)
+  comment = TextField(null=True)
+  command = TextField(null=True)
+  aggregate_size = BigIntegerField(null=True)
+  uploading = BooleanField(default=True, null=True)
+
+
+class Image(BaseModel):
+  # This class is intentionally denormalized. Even though images are supposed
+  # to be globally unique we can't treat them as such for permissions and
+  # security reasons. So rather than Repository <-> Image being many to many
+  # each image now belongs to exactly one repository.
+  docker_image_id = CharField(index=True)
+  repository = ForeignKeyField(Repository)
+
+  # '/' separated list of ancestory ids, e.g. /1/2/6/7/10/
+  ancestors = CharField(index=True, default='/', max_length=64535, null=True)
+
+  storage = ForeignKeyField(ImageStorage, index=True, null=True)
+
+  created = DateTimeField(null=True)
+  comment = TextField(null=True)
+  command = TextField(null=True)
+  aggregate_size = BigIntegerField(null=True)
+  v1_json_metadata = TextField(null=True)
+
+
+def backfill_image_fields():
+  """ Copies metadata from image storages to their images. """
+  logger.debug('Image metadata backfill: Began execution')
+  while True:
+    batch_image_ids = list(Image
+                           .select(Image.id)
+                           .join(ImageStorage)
+                           .where(Image.created >> None, Image.comment >> None,
+                                  Image.command >> None, Image.aggregate_size >> None,
+                                  ImageStorage.uploading == False,
+                                  ~((ImageStorage.created >> None) &
+                                    (ImageStorage.comment >> None) &
+                                    (ImageStorage.command >> None) &
+                                    (ImageStorage.aggregate_size >> None)))
+                           .limit(100))
+
+    if len(batch_image_ids) == 0:
+      logger.debug('Image metadata backfill: Backfill completed')
+      return
+
+    logger.debug('Image metadata backfill: Found %s records to update', len(batch_image_ids))
+    for image_id in batch_image_ids:
+      logger.debug('Updating image: %s', image_id.id)
+
+      with app.config['DB_TRANSACTION_FACTORY'](db):
+        try:
+          image = db_for_update(Image
+                                .select(Image, ImageStorage)
+                                .join(ImageStorage)
+                                .where(Image.id == image_id.id)).get()
+
+          image.created = image.storage.created
+          image.comment = image.storage.comment
+          image.command = image.storage.command
+          image.aggregate_size = image.storage.aggregate_size
+          image.save()
+        except Image.DoesNotExist:
+          pass
+
+if __name__ == "__main__":
+  logging.basicConfig(level=logging.DEBUG)
+  logging.getLogger('peewee').setLevel(logging.CRITICAL)
+  backfill_image_fields()
--- a/util/migrate/backfill_v1_metadata.py
+++ b/util/migrate/backfill_v1_metadata.py
@ -0,0 +1,72 @@
+import logging
+
+from peewee import JOIN_LEFT_OUTER
+
+from data.database import (Image, ImageStorage, ImageStoragePlacement, ImageStorageLocation, db,
+                           db_for_update)
+from app import app, storage
+from data import model
+
+
+logger = logging.getLogger(__name__)
+
+
+def image_json_path(storage_uuid):
+  base_path = storage.image_path(storage_uuid)
+  return '{0}json'.format(base_path)
+
+
+def backfill_v1_metadata():
+  """ Copies metadata from image storages to their images. """
+  logger.debug('Image v1 metadata backfill: Began execution')
+  while True:
+    batch_image_ids = list(Image
+                           .select(Image.id)
+                           .join(ImageStorage)
+                           .where(Image.v1_json_metadata >> None, ImageStorage.uploading == False)
+                           .limit(100))
+
+    if len(batch_image_ids) == 0:
+      logger.debug('Image v1 metadata backfill: Backfill completed')
+      return
+
+    logger.debug('Image v1 metadata backfill: Found %s records to update', len(batch_image_ids))
+    for one_id in batch_image_ids:
+      with app.config['DB_TRANSACTION_FACTORY'](db):
+        try:
+          logger.debug('Loading image: %s', one_id.id)
+
+          raw_query = (ImageStoragePlacement
+                       .select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation)
+                       .join(ImageStorageLocation)
+                       .switch(ImageStoragePlacement)
+                       .join(ImageStorage, JOIN_LEFT_OUTER)
+                       .join(Image)
+                       .where(Image.id == one_id.id))
+
+          placement_query = db_for_update(raw_query)
+
+          repo_image_list = model.image.invert_placement_query_results(placement_query)
+          if len(repo_image_list) > 1:
+            logger.error('Found more images than we requested, something is wrong with the query')
+            return
+
+          repo_image = repo_image_list[0]
+          uuid = repo_image.storage.uuid
+          json_path = image_json_path(uuid)
+
+          logger.debug('Updating image: %s from: %s', repo_image.id, json_path)
+          try:
+            data = storage.get_content(repo_image.storage.locations, json_path)
+          except IOError:
+            data = None
+            logger.exception('failed to find v1 metadata, defaulting to None')
+          repo_image.v1_json_metadata = data
+          repo_image.save()
+        except ImageStoragePlacement.DoesNotExist:
+          pass
+
+if __name__ == "__main__":
+  logging.basicConfig(level=logging.DEBUG)
+  # logging.getLogger('peewee').setLevel(logging.CRITICAL)
+  backfill_v1_metadata()
--- a/util/migrate/migratebitbucketservices.py
+++ b/util/migrate/migratebitbucketservices.py
@ -5,7 +5,7 @@ from app import app
 from data.database import configure, BaseModel, uuid_generator
 from peewee import *
 from bitbucket import BitBucket
-from endpoints.trigger import BitbucketBuildTrigger
+from buildtrigger.bitbuckethandler import BitbucketBuildTrigger

 configure(app.config)

--- a/util/migrate/migrategithubdeploykeys.py
+++ b/util/migrate/migrategithubdeploykeys.py
@ -4,7 +4,7 @@ import json

 from data.database import RepositoryBuildTrigger, BuildTriggerService, db, db_for_update
 from app import app
-from endpoints.trigger import BuildTriggerHandler
+from buildtrigger.basehandler import BuildTriggerHandler
 from util.security.ssh import generate_ssh_keypair
 from github import GithubException

@ -24,7 +24,8 @@ def backfill_github_deploykeys():
                              .select(RepositoryBuildTrigger.id)
                              .where(RepositoryBuildTrigger.private_key >> None)
                              .where(RepositoryBuildTrigger.service == github_service)
-                              .limit(10))
+                              .where(RepositoryBuildTrigger.used_legacy_github >> None)
+                              .limit(100))

    filtered_ids = [trigger.id for trigger in build_trigger_ids if trigger.id not in encountered]
    if len(filtered_ids) == 0:
@ -39,15 +40,22 @@ def backfill_github_deploykeys():

      with app.config['DB_TRANSACTION_FACTORY'](db):
        try:
-          query = RepositoryBuildTrigger.select(RepositoryBuildTrigger.id == trigger_id)
+          query = RepositoryBuildTrigger.select().where(RepositoryBuildTrigger.id == trigger_id)
          trigger = db_for_update(query).get()
        except RepositoryBuildTrigger.DoesNotExist:
          logger.debug('Could not find build trigger %s', trigger_id)
          continue

+        trigger.used_legacy_github = True
+        trigger.save()
+
        handler = BuildTriggerHandler.get_handler(trigger)

        config = handler.config
+        if not 'build_source' in config:
+          logger.debug('Could not find build source for trigger %s', trigger_id)
+          continue
+
        build_source = config['build_source']
        gh_client = handler._get_client()

@ -83,5 +91,8 @@ def backfill_github_deploykeys():


 if __name__ == "__main__":
+  logging.getLogger('boto').setLevel(logging.CRITICAL)
+  logging.getLogger('github').setLevel(logging.CRITICAL)
+
  logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False)
  backfill_github_deploykeys()
--- a/util/migrate/uncompressedsize.py
+++ b/util/migrate/uncompressedsize.py
@ -67,7 +67,7 @@ def backfill_sizes_from_data():
        decompressor = zlib.decompressobj(ZLIB_GZIP_WINDOW)

        uncompressed_size = 0
-        with store.stream_read_file(with_locs.locations, store.image_layer_path(uuid)) as stream:
+        with store.stream_read_file(with_locs.locations, store.v1_image_layer_path(uuid)) as stream:
          while True:
            current_data = stream.read(CHUNK_SIZE)
            if len(current_data) == 0:
--- a/util/names.py
+++ b/util/names.py
@ -6,6 +6,10 @@ from uuid import uuid4

 REPOSITORY_NAME_REGEX = re.compile(r'^[\.a-zA-Z0-9_-]+$')

+TAG_REGEX = re.compile(r'^[\w][\w\.-]{0,127}$')
+TAG_ERROR = ('Invalid tag: must match [A-Za-z0-9_.-], NOT start with "." or "-", '
+             'and can contain 1-128 characters')
+
 def parse_namespace_repository(repository, include_tag=False):
  parts = repository.rstrip('/').split('/', 1)
  if len(parts) < 2:
--- a/util/validation.py
+++ b/util/validation.py
@ -6,7 +6,8 @@ import anunidecode
 INVALID_PASSWORD_MESSAGE = 'Invalid password, password must be at least ' + \
                           '8 characters and contain no whitespace.'
 INVALID_USERNAME_CHARACTERS = r'[^a-z0-9_]'
-VALID_CHARACTERS = '_' + string.digits + string.lowercase
+VALID_CHARACTERS = string.digits + string.lowercase
+
 MIN_LENGTH = 4
 MAX_LENGTH = 30

@ -48,8 +49,13 @@ def _gen_filler_chars(num_filler_chars):


 def generate_valid_usernames(input_username):
+  # Docker's regex: [a-z0-9]+(?:[._-][a-z0-9]+)*
  normalized = input_username.encode('unidecode', 'ignore').strip().lower()
  prefix = re.sub(INVALID_USERNAME_CHARACTERS, '_', normalized)[:30]
+  prefix = re.sub(r'_{2,}', '_', prefix)
+
+  if prefix.endswith('_'):
+    prefix = prefix[0:len(prefix) - 1]

  num_filler_chars = max(0, MIN_LENGTH - len(prefix))