- Update the migrations tool to verify migrations work up and down for both MySQL and PostgresSQL.

- Add migrations for the squashed image tables and for backfilling the uncompressed sizes - Make sure gzip stream uses a max length when determining the uncompressed size
2014-10-07 15:29:56 -04:00 · 2014-10-07 15:29:56 -04:00 · f4daa5e97b
commit f4daa5e97b
parent f38ce51943
10 changed files with 152 additions and 43 deletions
--- a/data/database.py
+++ b/data/database.py
@ -168,7 +168,7 @@ class Visibility(BaseModel):


 class Repository(BaseModel):
-  namespace_user = ForeignKeyField(User)
+  namespace_user = ForeignKeyField(User, null=True)
  name = CharField()
  visibility = ForeignKeyField(Visibility)
  description = TextField(null=True)
--- a/data/migrations/env.py
+++ b/data/migrations/env.py
@ -17,7 +17,12 @@ from util.morecollections import AttrDict
 # access to the values within the .ini file in use.
 db_uri = unquote(app.config['DB_URI'])
 if 'GENMIGRATE' in os.environ:
-    db_uri = 'mysql+pymysql://root:password@192.168.59.103/genschema'
+    docker_host = os.environ.get('DOCKER_HOST')
+    docker_host_ip = docker_host[len('tcp://'):].split(':')[0]
+    if os.environ.get('GENMIGRATE') == 'mysql':
+        db_uri = 'mysql+pymysql://root:password@%s/genschema' % (docker_host_ip)
+    else:
+        db_uri = 'postgresql://postgres@%s/genschema' % (docker_host_ip)

 config = context.config
 config.set_main_option('sqlalchemy.url', db_uri)
--- a/data/migrations/generate-schema-migration.sh
+++ b/data/migrations/generate-schema-migration.sh
@ -1,21 +0,0 @@
-set -e 
-
-# Run a MySQL database on port 3306 inside of Docker.
-docker run --name mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=password -d mysql
-
-# Sleep for 5s to get MySQL get started.
-echo 'Sleeping for 5...'
-sleep 5
-
-# Add the database to mysql.
-docker run --link mysql:mysql mysql sh -c 'echo "create database genschema" | mysql -h"$MYSQL_PORT_3306_TCP_ADDR" -P"$MYSQL_PORT_3306_TCP_PORT" -uroot -ppassword'
-
-# Generate a SQLite database with the schema as defined by the existing alembic model.
-GENMIGRATE=true PYTHONPATH=. alembic upgrade head
-
-# Generate the migration to the current model.
-GENMIGRATE=true PYTHONPATH=. alembic revision --autogenerate -m "$@"
-
-# Kill the MySQL instance.
-docker kill mysql
-docker rm mysql
--- a/data/migrations/migration.sh
+++ b/data/migrations/migration.sh
@ -0,0 +1,82 @@
+set -e 
+
+up_mysql() {
+  # Run a SQL database on port 3306 inside of Docker.
+  docker run --name mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=password -d mysql
+
+  # Sleep for 5s to get MySQL get started.
+  echo 'Sleeping for 5...'
+  sleep 5
+
+  # Add the database to mysql.
+  docker run --rm --link mysql:mysql mysql sh -c 'echo "create database genschema" | mysql -h"$MYSQL_PORT_3306_TCP_ADDR" -P"$MYSQL_PORT_3306_TCP_PORT" -uroot -ppassword'
+}
+
+down_mysql() {
+  docker kill mysql
+  docker rm mysql
+}
+
+up_postgres() {
+  # Run a SQL database on port 5432 inside of Docker.
+  docker run --name postgres -p 5432:5432 -d postgres
+
+  # Sleep for 5s to get SQL get started.
+  echo 'Sleeping for 5...'
+  sleep 5
+
+  # Add the database to postgres.
+  docker run --rm --link postgres:postgres postgres sh -c 'echo "create database genschema" | psql -h "$POSTGRES_PORT_5432_TCP_ADDR" -p "$POSTGRES_PORT_5432_TCP_PORT" -U postgres'
+}
+
+down_postgres() {
+  docker kill postgres
+  docker rm postgres
+}
+
+gen_migrate() {
+  # Generate the migration to the current model.
+  GENMIGRATE=$1 PYTHONPATH=. alembic revision --autogenerate -m "$@"
+
+  # Generate a SQLite database with the schema as defined by the existing alembic model.
+  GENMIGRATE=$1 PYTHONPATH=. alembic upgrade head
+}
+
+test_migrate() {
+  # Generate a SQLite database with the schema as defined by the existing alembic model.
+  GENMIGRATE=$1 PYTHONPATH=. alembic upgrade head
+
+  # Downgrade to verify it works in both directions.
+  COUNT=`ls data/migrations/versions/*.py | wc -l | tr -d ' '`
+  GENMIGRATE=$1 PYTHONPATH=. alembic downgrade "-$COUNT"
+}
+
+# Test (and generate, if requested) via MySQL.
+echo '> Starting MySQL'
+up_mysql
+
+if [ ! -z "$@" ]
+  then
+    set +e
+    echo '> Generating Migration'
+    gen_migrate "mysql"
+    set -e
+  fi
+
+echo '> Testing Migration (mysql)'
+set +e
+test_migrate "mysql"
+set -e
+down_mysql
+
+# Test via Postgres.
+echo '> Starting Postgres'
+up_postgres
+
+echo '> Testing Migration (postgres)'
+set +e
+test_migrate "postgres"
+set -e
+down_postgres
+
+
--- a/data/migrations/versions/2430f55c41d5_calculate_uncompressed_sizes_for_all_.py
+++ b/data/migrations/versions/2430f55c41d5_calculate_uncompressed_sizes_for_all_.py
@ -0,0 +1,22 @@
+"""Calculate uncompressed sizes for all images
+
+Revision ID: 2430f55c41d5
+Revises: 3b4d3a4461dc
+Create Date: 2014-10-07 14:50:04.660315
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '2430f55c41d5'
+down_revision = '3b4d3a4461dc'
+
+from alembic import op
+import sqlalchemy as sa
+from util.uncompressedsize import backfill_sizes_from_data
+
+
+def upgrade(tables):
+  backfill_sizes_from_data()
+
+def downgrade(tables):
+  pass
--- a/data/migrations/versions/3f6d26399bd2_add_support_for_squashing_images.py
+++ b/data/migrations/versions/3f6d26399bd2_add_support_for_squashing_images.py
@ -1,25 +1,24 @@
-"""add support for squashing images
+"""Add support for squashed images

-Revision ID: 3f6d26399bd2
-Revises: 34fd69f63809
-Create Date: 2014-09-22 14:37:30.821785
+Revision ID: 3b4d3a4461dc
+Revises: b1d41e2071b
+Create Date: 2014-10-07 14:49:13.105746

 """

 # revision identifiers, used by Alembic.
-revision = '3f6d26399bd2'
-down_revision = '34fd69f63809'
+revision = '3b4d3a4461dc'
+down_revision = 'b1d41e2071b'

 from alembic import op
 import sqlalchemy as sa

-
 def upgrade(tables):
    ### commands auto generated by Alembic - please adjust! ###
    op.create_table('imagestoragetransformation',
    sa.Column('id', sa.Integer(), nullable=False),
    sa.Column('name', sa.String(length=255), nullable=False),
-    sa.PrimaryKeyConstraint('id')
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_imagestoragetransformation'))
    )
    op.create_index('imagestoragetransformation_name', 'imagestoragetransformation', ['name'], unique=True)
    op.create_table('derivedimagestorage',
@ -27,10 +26,10 @@ def upgrade(tables):
    sa.Column('source_id', sa.Integer(), nullable=True),
    sa.Column('derivative_id', sa.Integer(), nullable=False),
    sa.Column('transformation_id', sa.Integer(), nullable=False),
-    sa.ForeignKeyConstraint(['derivative_id'], ['imagestorage.id'], ),
-    sa.ForeignKeyConstraint(['source_id'], ['imagestorage.id'], ),
-    sa.ForeignKeyConstraint(['transformation_id'], ['imagestoragetransformation.id'], ),
-    sa.PrimaryKeyConstraint('id')
+    sa.ForeignKeyConstraint(['derivative_id'], ['imagestorage.id'], name=op.f('fk_derivedimagestorage_derivative_id_imagestorage')),
+    sa.ForeignKeyConstraint(['source_id'], ['imagestorage.id'], name=op.f('fk_derivedimagestorage_source_id_imagestorage')),
+    sa.ForeignKeyConstraint(['transformation_id'], ['imagestoragetransformation.id'], name=op.f('fk_dis_transformation_id_ist')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_derivedimagestorage'))
    )
    op.create_index('derivedimagestorage_derivative_id', 'derivedimagestorage', ['derivative_id'], unique=False)
    op.create_index('derivedimagestorage_source_id', 'derivedimagestorage', ['source_id'], unique=False)
@ -38,18 +37,21 @@ def upgrade(tables):
    op.create_index('derivedimagestorage_transformation_id', 'derivedimagestorage', ['transformation_id'], unique=False)
    op.drop_index('image_repository_id_docker_image_id', table_name='image')
    op.create_index('image_repository_id_docker_image_id', 'image', ['repository_id', 'docker_image_id'], unique=True)
+    op.drop_index('imagestorage_uuid', table_name='imagestorage')
+    op.create_index('imagestorage_uuid', 'imagestorage', ['uuid'], unique=False)
+    op.drop_column(u'repository', 'namespace')
+    op.create_index('repository_namespace_user_id', 'repository', ['namespace_user_id'], unique=False)
    ### end Alembic commands ###


 def downgrade(tables):
    ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index('repository_namespace_user_id', table_name='repository')
+    op.add_column(u'repository', sa.Column('namespace', sa.String(length=255), nullable=True))
+    op.drop_index('imagestorage_uuid', table_name='imagestorage')
+    op.create_index('imagestorage_uuid', 'imagestorage', ['uuid'], unique=True)
    op.drop_index('image_repository_id_docker_image_id', table_name='image')
    op.create_index('image_repository_id_docker_image_id', 'image', ['repository_id', 'docker_image_id'], unique=False)
-    op.drop_index('derivedimagestorage_transformation_id', table_name='derivedimagestorage')
-    op.drop_index('derivedimagestorage_source_id_transformation_id', table_name='derivedimagestorage')
-    op.drop_index('derivedimagestorage_source_id', table_name='derivedimagestorage')
-    op.drop_index('derivedimagestorage_derivative_id', table_name='derivedimagestorage')
    op.drop_table('derivedimagestorage')
-    op.drop_index('imagestoragetransformation_name', table_name='imagestoragetransformation')
    op.drop_table('imagestoragetransformation')
    ### end Alembic commands ###
--- a/data/model/legacy.py
+++ b/data/model/legacy.py
@ -13,7 +13,7 @@ from data.database import (User, Repository, Image, AccessToken, Role, Repositor
                           Notification, ImageStorageLocation, ImageStoragePlacement,
                           ExternalNotificationEvent, ExternalNotificationMethod,
                           RepositoryNotification, RepositoryAuthorizedEmail, TeamMemberInvite,
-                           random_string_generator, db, BUILD_PHASE)
+                           DerivedImageStorage, random_string_generator, db, BUILD_PHASE)
 from peewee import JOIN_LEFT_OUTER, fn
 from util.validation import (validate_username, validate_email, validate_password,
                             INVALID_PASSWORD_MESSAGE)
--- a/test/data/test.db
+++ b/test/data/test.db
--- a/util/gzipstream.py
+++ b/util/gzipstream.py
@ -9,6 +9,8 @@ import zlib
 # http://stackoverflow.com/questions/3122145/zlib-error-error-3-while-decompressing-incorrect-header-check/22310760#22310760
 ZLIB_GZIP_WINDOW = zlib.MAX_WBITS | 32

+CHUNK_SIZE = 5 * 1024 * 1024
+
 class SizeInfo(object):
  def __init__(self):
    self.size = 0
@ -23,6 +25,11 @@ def calculate_size_handler():
  decompressor = zlib.decompressobj(ZLIB_GZIP_WINDOW)

  def fn(buf):
-    size_info.size += len(decompressor.decompress(buf))
+    # Note: We set a maximum CHUNK_SIZE to prevent the decompress from taking too much
+    # memory. As a result, we have to loop until the unconsumed tail is empty.
+    current_data = buf
+    while len(current_data) > 0:
+      size_info.size += len(decompressor.decompress(current_data, CHUNK_SIZE))
+      current_data = decompressor.unconsumed_tail

  return size_info, fn
--- a/tools/uncompressedsize.py
+++ b/tools/uncompressedsize.py
@ -1,5 +1,6 @@
 import logging
 import zlib
+import sys

 from data import model
 from data.database import ImageStorage
@ -15,6 +16,15 @@ CHUNK_SIZE = 5 * 1024 * 1024


 def backfill_sizes_from_data():
+  logger.setLevel(logging.DEBUG)
+  logger.debug('Starting uncompressed image size backfill')
+  
+  formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+  
+  ch = logging.StreamHandler(sys.stdout)
+  ch.setFormatter(formatter)
+  logger.addHandler(ch)
+
  while True:
    # Load the record from the DB.
    batch_ids = list(ImageStorage
@ -47,7 +57,9 @@ def backfill_sizes_from_data():
            if len(current_data) == 0:
              break

-            uncompressed_size += len(decompressor.decompress(current_data))
+            while current_data:
+              uncompressed_size += len(decompressor.decompress(current_data, CHUNK_SIZE))
+              current_data = decompressor.unconsumed_tail

        # Write the size to the image storage. We do so under a transaction AFTER checking to
        # make sure the image storage still exists and has not changed.