- Update the migrations tool to verify migrations work up and down for both MySQL and PostgresSQL.

- Add migrations for the squashed image tables and for backfilling the uncompressed sizes
- Make sure gzip stream uses a max length when determining the uncompressed size
This commit is contained in:
Joseph Schorr 2014-10-07 15:29:56 -04:00
parent f38ce51943
commit f4daa5e97b
10 changed files with 152 additions and 43 deletions

View file

@ -168,7 +168,7 @@ class Visibility(BaseModel):
class Repository(BaseModel): class Repository(BaseModel):
namespace_user = ForeignKeyField(User) namespace_user = ForeignKeyField(User, null=True)
name = CharField() name = CharField()
visibility = ForeignKeyField(Visibility) visibility = ForeignKeyField(Visibility)
description = TextField(null=True) description = TextField(null=True)

View file

@ -17,7 +17,12 @@ from util.morecollections import AttrDict
# access to the values within the .ini file in use. # access to the values within the .ini file in use.
db_uri = unquote(app.config['DB_URI']) db_uri = unquote(app.config['DB_URI'])
if 'GENMIGRATE' in os.environ: if 'GENMIGRATE' in os.environ:
db_uri = 'mysql+pymysql://root:password@192.168.59.103/genschema' docker_host = os.environ.get('DOCKER_HOST')
docker_host_ip = docker_host[len('tcp://'):].split(':')[0]
if os.environ.get('GENMIGRATE') == 'mysql':
db_uri = 'mysql+pymysql://root:password@%s/genschema' % (docker_host_ip)
else:
db_uri = 'postgresql://postgres@%s/genschema' % (docker_host_ip)
config = context.config config = context.config
config.set_main_option('sqlalchemy.url', db_uri) config.set_main_option('sqlalchemy.url', db_uri)

View file

@ -1,21 +0,0 @@
set -e
# Run a MySQL database on port 3306 inside of Docker.
docker run --name mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=password -d mysql
# Sleep for 5s to get MySQL get started.
echo 'Sleeping for 5...'
sleep 5
# Add the database to mysql.
docker run --link mysql:mysql mysql sh -c 'echo "create database genschema" | mysql -h"$MYSQL_PORT_3306_TCP_ADDR" -P"$MYSQL_PORT_3306_TCP_PORT" -uroot -ppassword'
# Generate a SQLite database with the schema as defined by the existing alembic model.
GENMIGRATE=true PYTHONPATH=. alembic upgrade head
# Generate the migration to the current model.
GENMIGRATE=true PYTHONPATH=. alembic revision --autogenerate -m "$@"
# Kill the MySQL instance.
docker kill mysql
docker rm mysql

82
data/migrations/migration.sh Executable file
View file

@ -0,0 +1,82 @@
set -e
up_mysql() {
# Run a SQL database on port 3306 inside of Docker.
docker run --name mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=password -d mysql
# Sleep for 5s to get MySQL get started.
echo 'Sleeping for 5...'
sleep 5
# Add the database to mysql.
docker run --rm --link mysql:mysql mysql sh -c 'echo "create database genschema" | mysql -h"$MYSQL_PORT_3306_TCP_ADDR" -P"$MYSQL_PORT_3306_TCP_PORT" -uroot -ppassword'
}
down_mysql() {
docker kill mysql
docker rm mysql
}
up_postgres() {
# Run a SQL database on port 5432 inside of Docker.
docker run --name postgres -p 5432:5432 -d postgres
# Sleep for 5s to get SQL get started.
echo 'Sleeping for 5...'
sleep 5
# Add the database to postgres.
docker run --rm --link postgres:postgres postgres sh -c 'echo "create database genschema" | psql -h "$POSTGRES_PORT_5432_TCP_ADDR" -p "$POSTGRES_PORT_5432_TCP_PORT" -U postgres'
}
down_postgres() {
docker kill postgres
docker rm postgres
}
gen_migrate() {
# Generate the migration to the current model.
GENMIGRATE=$1 PYTHONPATH=. alembic revision --autogenerate -m "$@"
# Generate a SQLite database with the schema as defined by the existing alembic model.
GENMIGRATE=$1 PYTHONPATH=. alembic upgrade head
}
test_migrate() {
# Generate a SQLite database with the schema as defined by the existing alembic model.
GENMIGRATE=$1 PYTHONPATH=. alembic upgrade head
# Downgrade to verify it works in both directions.
COUNT=`ls data/migrations/versions/*.py | wc -l | tr -d ' '`
GENMIGRATE=$1 PYTHONPATH=. alembic downgrade "-$COUNT"
}
# Test (and generate, if requested) via MySQL.
echo '> Starting MySQL'
up_mysql
if [ ! -z "$@" ]
then
set +e
echo '> Generating Migration'
gen_migrate "mysql"
set -e
fi
echo '> Testing Migration (mysql)'
set +e
test_migrate "mysql"
set -e
down_mysql
# Test via Postgres.
echo '> Starting Postgres'
up_postgres
echo '> Testing Migration (postgres)'
set +e
test_migrate "postgres"
set -e
down_postgres

View file

@ -0,0 +1,22 @@
"""Calculate uncompressed sizes for all images
Revision ID: 2430f55c41d5
Revises: 3b4d3a4461dc
Create Date: 2014-10-07 14:50:04.660315
"""
# revision identifiers, used by Alembic.
revision = '2430f55c41d5'
down_revision = '3b4d3a4461dc'
from alembic import op
import sqlalchemy as sa
from util.uncompressedsize import backfill_sizes_from_data
def upgrade(tables):
backfill_sizes_from_data()
def downgrade(tables):
pass

View file

@ -1,25 +1,24 @@
"""add support for squashing images """Add support for squashed images
Revision ID: 3f6d26399bd2 Revision ID: 3b4d3a4461dc
Revises: 34fd69f63809 Revises: b1d41e2071b
Create Date: 2014-09-22 14:37:30.821785 Create Date: 2014-10-07 14:49:13.105746
""" """
# revision identifiers, used by Alembic. # revision identifiers, used by Alembic.
revision = '3f6d26399bd2' revision = '3b4d3a4461dc'
down_revision = '34fd69f63809' down_revision = 'b1d41e2071b'
from alembic import op from alembic import op
import sqlalchemy as sa import sqlalchemy as sa
def upgrade(tables): def upgrade(tables):
### commands auto generated by Alembic - please adjust! ### ### commands auto generated by Alembic - please adjust! ###
op.create_table('imagestoragetransformation', op.create_table('imagestoragetransformation',
sa.Column('id', sa.Integer(), nullable=False), sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(length=255), nullable=False), sa.Column('name', sa.String(length=255), nullable=False),
sa.PrimaryKeyConstraint('id') sa.PrimaryKeyConstraint('id', name=op.f('pk_imagestoragetransformation'))
) )
op.create_index('imagestoragetransformation_name', 'imagestoragetransformation', ['name'], unique=True) op.create_index('imagestoragetransformation_name', 'imagestoragetransformation', ['name'], unique=True)
op.create_table('derivedimagestorage', op.create_table('derivedimagestorage',
@ -27,10 +26,10 @@ def upgrade(tables):
sa.Column('source_id', sa.Integer(), nullable=True), sa.Column('source_id', sa.Integer(), nullable=True),
sa.Column('derivative_id', sa.Integer(), nullable=False), sa.Column('derivative_id', sa.Integer(), nullable=False),
sa.Column('transformation_id', sa.Integer(), nullable=False), sa.Column('transformation_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['derivative_id'], ['imagestorage.id'], ), sa.ForeignKeyConstraint(['derivative_id'], ['imagestorage.id'], name=op.f('fk_derivedimagestorage_derivative_id_imagestorage')),
sa.ForeignKeyConstraint(['source_id'], ['imagestorage.id'], ), sa.ForeignKeyConstraint(['source_id'], ['imagestorage.id'], name=op.f('fk_derivedimagestorage_source_id_imagestorage')),
sa.ForeignKeyConstraint(['transformation_id'], ['imagestoragetransformation.id'], ), sa.ForeignKeyConstraint(['transformation_id'], ['imagestoragetransformation.id'], name=op.f('fk_dis_transformation_id_ist')),
sa.PrimaryKeyConstraint('id') sa.PrimaryKeyConstraint('id', name=op.f('pk_derivedimagestorage'))
) )
op.create_index('derivedimagestorage_derivative_id', 'derivedimagestorage', ['derivative_id'], unique=False) op.create_index('derivedimagestorage_derivative_id', 'derivedimagestorage', ['derivative_id'], unique=False)
op.create_index('derivedimagestorage_source_id', 'derivedimagestorage', ['source_id'], unique=False) op.create_index('derivedimagestorage_source_id', 'derivedimagestorage', ['source_id'], unique=False)
@ -38,18 +37,21 @@ def upgrade(tables):
op.create_index('derivedimagestorage_transformation_id', 'derivedimagestorage', ['transformation_id'], unique=False) op.create_index('derivedimagestorage_transformation_id', 'derivedimagestorage', ['transformation_id'], unique=False)
op.drop_index('image_repository_id_docker_image_id', table_name='image') op.drop_index('image_repository_id_docker_image_id', table_name='image')
op.create_index('image_repository_id_docker_image_id', 'image', ['repository_id', 'docker_image_id'], unique=True) op.create_index('image_repository_id_docker_image_id', 'image', ['repository_id', 'docker_image_id'], unique=True)
op.drop_index('imagestorage_uuid', table_name='imagestorage')
op.create_index('imagestorage_uuid', 'imagestorage', ['uuid'], unique=False)
op.drop_column(u'repository', 'namespace')
op.create_index('repository_namespace_user_id', 'repository', ['namespace_user_id'], unique=False)
### end Alembic commands ### ### end Alembic commands ###
def downgrade(tables): def downgrade(tables):
### commands auto generated by Alembic - please adjust! ### ### commands auto generated by Alembic - please adjust! ###
op.drop_index('repository_namespace_user_id', table_name='repository')
op.add_column(u'repository', sa.Column('namespace', sa.String(length=255), nullable=True))
op.drop_index('imagestorage_uuid', table_name='imagestorage')
op.create_index('imagestorage_uuid', 'imagestorage', ['uuid'], unique=True)
op.drop_index('image_repository_id_docker_image_id', table_name='image') op.drop_index('image_repository_id_docker_image_id', table_name='image')
op.create_index('image_repository_id_docker_image_id', 'image', ['repository_id', 'docker_image_id'], unique=False) op.create_index('image_repository_id_docker_image_id', 'image', ['repository_id', 'docker_image_id'], unique=False)
op.drop_index('derivedimagestorage_transformation_id', table_name='derivedimagestorage')
op.drop_index('derivedimagestorage_source_id_transformation_id', table_name='derivedimagestorage')
op.drop_index('derivedimagestorage_source_id', table_name='derivedimagestorage')
op.drop_index('derivedimagestorage_derivative_id', table_name='derivedimagestorage')
op.drop_table('derivedimagestorage') op.drop_table('derivedimagestorage')
op.drop_index('imagestoragetransformation_name', table_name='imagestoragetransformation')
op.drop_table('imagestoragetransformation') op.drop_table('imagestoragetransformation')
### end Alembic commands ### ### end Alembic commands ###

View file

@ -13,7 +13,7 @@ from data.database import (User, Repository, Image, AccessToken, Role, Repositor
Notification, ImageStorageLocation, ImageStoragePlacement, Notification, ImageStorageLocation, ImageStoragePlacement,
ExternalNotificationEvent, ExternalNotificationMethod, ExternalNotificationEvent, ExternalNotificationMethod,
RepositoryNotification, RepositoryAuthorizedEmail, TeamMemberInvite, RepositoryNotification, RepositoryAuthorizedEmail, TeamMemberInvite,
random_string_generator, db, BUILD_PHASE) DerivedImageStorage, random_string_generator, db, BUILD_PHASE)
from peewee import JOIN_LEFT_OUTER, fn from peewee import JOIN_LEFT_OUTER, fn
from util.validation import (validate_username, validate_email, validate_password, from util.validation import (validate_username, validate_email, validate_password,
INVALID_PASSWORD_MESSAGE) INVALID_PASSWORD_MESSAGE)

Binary file not shown.

View file

@ -9,6 +9,8 @@ import zlib
# http://stackoverflow.com/questions/3122145/zlib-error-error-3-while-decompressing-incorrect-header-check/22310760#22310760 # http://stackoverflow.com/questions/3122145/zlib-error-error-3-while-decompressing-incorrect-header-check/22310760#22310760
ZLIB_GZIP_WINDOW = zlib.MAX_WBITS | 32 ZLIB_GZIP_WINDOW = zlib.MAX_WBITS | 32
CHUNK_SIZE = 5 * 1024 * 1024
class SizeInfo(object): class SizeInfo(object):
def __init__(self): def __init__(self):
self.size = 0 self.size = 0
@ -23,6 +25,11 @@ def calculate_size_handler():
decompressor = zlib.decompressobj(ZLIB_GZIP_WINDOW) decompressor = zlib.decompressobj(ZLIB_GZIP_WINDOW)
def fn(buf): def fn(buf):
size_info.size += len(decompressor.decompress(buf)) # Note: We set a maximum CHUNK_SIZE to prevent the decompress from taking too much
# memory. As a result, we have to loop until the unconsumed tail is empty.
current_data = buf
while len(current_data) > 0:
size_info.size += len(decompressor.decompress(current_data, CHUNK_SIZE))
current_data = decompressor.unconsumed_tail
return size_info, fn return size_info, fn

View file

@ -1,5 +1,6 @@
import logging import logging
import zlib import zlib
import sys
from data import model from data import model
from data.database import ImageStorage from data.database import ImageStorage
@ -15,6 +16,15 @@ CHUNK_SIZE = 5 * 1024 * 1024
def backfill_sizes_from_data(): def backfill_sizes_from_data():
logger.setLevel(logging.DEBUG)
logger.debug('Starting uncompressed image size backfill')
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch = logging.StreamHandler(sys.stdout)
ch.setFormatter(formatter)
logger.addHandler(ch)
while True: while True:
# Load the record from the DB. # Load the record from the DB.
batch_ids = list(ImageStorage batch_ids = list(ImageStorage
@ -47,7 +57,9 @@ def backfill_sizes_from_data():
if len(current_data) == 0: if len(current_data) == 0:
break break
uncompressed_size += len(decompressor.decompress(current_data)) while current_data:
uncompressed_size += len(decompressor.decompress(current_data, CHUNK_SIZE))
current_data = decompressor.unconsumed_tail
# Write the size to the image storage. We do so under a transaction AFTER checking to # Write the size to the image storage. We do so under a transaction AFTER checking to
# make sure the image storage still exists and has not changed. # make sure the image storage still exists and has not changed.