- Update the migrations tool to verify migrations work up and down for both MySQL and PostgresSQL.
- Add migrations for the squashed image tables and for backfilling the uncompressed sizes - Make sure gzip stream uses a max length when determining the uncompressed size
This commit is contained in:
		
							parent
							
								
									f38ce51943
								
							
						
					
					
						commit
						f4daa5e97b
					
				
					 10 changed files with 152 additions and 43 deletions
				
			
		|  | @ -168,7 +168,7 @@ class Visibility(BaseModel): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class Repository(BaseModel): | class Repository(BaseModel): | ||||||
|   namespace_user = ForeignKeyField(User) |   namespace_user = ForeignKeyField(User, null=True) | ||||||
|   name = CharField() |   name = CharField() | ||||||
|   visibility = ForeignKeyField(Visibility) |   visibility = ForeignKeyField(Visibility) | ||||||
|   description = TextField(null=True) |   description = TextField(null=True) | ||||||
|  |  | ||||||
|  | @ -17,7 +17,12 @@ from util.morecollections import AttrDict | ||||||
| # access to the values within the .ini file in use. | # access to the values within the .ini file in use. | ||||||
| db_uri = unquote(app.config['DB_URI']) | db_uri = unquote(app.config['DB_URI']) | ||||||
| if 'GENMIGRATE' in os.environ: | if 'GENMIGRATE' in os.environ: | ||||||
|     db_uri = 'mysql+pymysql://root:password@192.168.59.103/genschema' |     docker_host = os.environ.get('DOCKER_HOST') | ||||||
|  |     docker_host_ip = docker_host[len('tcp://'):].split(':')[0] | ||||||
|  |     if os.environ.get('GENMIGRATE') == 'mysql': | ||||||
|  |         db_uri = 'mysql+pymysql://root:password@%s/genschema' % (docker_host_ip) | ||||||
|  |     else: | ||||||
|  |         db_uri = 'postgresql://postgres@%s/genschema' % (docker_host_ip) | ||||||
| 
 | 
 | ||||||
| config = context.config | config = context.config | ||||||
| config.set_main_option('sqlalchemy.url', db_uri) | config.set_main_option('sqlalchemy.url', db_uri) | ||||||
|  |  | ||||||
|  | @ -1,21 +0,0 @@ | ||||||
| set -e  |  | ||||||
| 
 |  | ||||||
| # Run a MySQL database on port 3306 inside of Docker. |  | ||||||
| docker run --name mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=password -d mysql |  | ||||||
| 
 |  | ||||||
| # Sleep for 5s to get MySQL get started. |  | ||||||
| echo 'Sleeping for 5...' |  | ||||||
| sleep 5 |  | ||||||
| 
 |  | ||||||
| # Add the database to mysql. |  | ||||||
| docker run --link mysql:mysql mysql sh -c 'echo "create database genschema" | mysql -h"$MYSQL_PORT_3306_TCP_ADDR" -P"$MYSQL_PORT_3306_TCP_PORT" -uroot -ppassword' |  | ||||||
| 
 |  | ||||||
| # Generate a SQLite database with the schema as defined by the existing alembic model. |  | ||||||
| GENMIGRATE=true PYTHONPATH=. alembic upgrade head |  | ||||||
| 
 |  | ||||||
| # Generate the migration to the current model. |  | ||||||
| GENMIGRATE=true PYTHONPATH=. alembic revision --autogenerate -m "$@" |  | ||||||
| 
 |  | ||||||
| # Kill the MySQL instance. |  | ||||||
| docker kill mysql |  | ||||||
| docker rm mysql |  | ||||||
							
								
								
									
										82
									
								
								data/migrations/migration.sh
									
										
									
									
									
										Executable file
									
								
							
							
						
						
									
										82
									
								
								data/migrations/migration.sh
									
										
									
									
									
										Executable file
									
								
							|  | @ -0,0 +1,82 @@ | ||||||
|  | set -e  | ||||||
|  | 
 | ||||||
|  | up_mysql() { | ||||||
|  |   # Run a SQL database on port 3306 inside of Docker. | ||||||
|  |   docker run --name mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=password -d mysql | ||||||
|  | 
 | ||||||
|  |   # Sleep for 5s to get MySQL get started. | ||||||
|  |   echo 'Sleeping for 5...' | ||||||
|  |   sleep 5 | ||||||
|  | 
 | ||||||
|  |   # Add the database to mysql. | ||||||
|  |   docker run --rm --link mysql:mysql mysql sh -c 'echo "create database genschema" | mysql -h"$MYSQL_PORT_3306_TCP_ADDR" -P"$MYSQL_PORT_3306_TCP_PORT" -uroot -ppassword' | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | down_mysql() { | ||||||
|  |   docker kill mysql | ||||||
|  |   docker rm mysql | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | up_postgres() { | ||||||
|  |   # Run a SQL database on port 5432 inside of Docker. | ||||||
|  |   docker run --name postgres -p 5432:5432 -d postgres | ||||||
|  | 
 | ||||||
|  |   # Sleep for 5s to get SQL get started. | ||||||
|  |   echo 'Sleeping for 5...' | ||||||
|  |   sleep 5 | ||||||
|  | 
 | ||||||
|  |   # Add the database to postgres. | ||||||
|  |   docker run --rm --link postgres:postgres postgres sh -c 'echo "create database genschema" | psql -h "$POSTGRES_PORT_5432_TCP_ADDR" -p "$POSTGRES_PORT_5432_TCP_PORT" -U postgres' | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | down_postgres() { | ||||||
|  |   docker kill postgres | ||||||
|  |   docker rm postgres | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | gen_migrate() { | ||||||
|  |   # Generate the migration to the current model. | ||||||
|  |   GENMIGRATE=$1 PYTHONPATH=. alembic revision --autogenerate -m "$@" | ||||||
|  | 
 | ||||||
|  |   # Generate a SQLite database with the schema as defined by the existing alembic model. | ||||||
|  |   GENMIGRATE=$1 PYTHONPATH=. alembic upgrade head | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | test_migrate() { | ||||||
|  |   # Generate a SQLite database with the schema as defined by the existing alembic model. | ||||||
|  |   GENMIGRATE=$1 PYTHONPATH=. alembic upgrade head | ||||||
|  | 
 | ||||||
|  |   # Downgrade to verify it works in both directions. | ||||||
|  |   COUNT=`ls data/migrations/versions/*.py | wc -l | tr -d ' '` | ||||||
|  |   GENMIGRATE=$1 PYTHONPATH=. alembic downgrade "-$COUNT" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Test (and generate, if requested) via MySQL. | ||||||
|  | echo '> Starting MySQL' | ||||||
|  | up_mysql | ||||||
|  | 
 | ||||||
|  | if [ ! -z "$@" ] | ||||||
|  |   then | ||||||
|  |     set +e | ||||||
|  |     echo '> Generating Migration' | ||||||
|  |     gen_migrate "mysql" | ||||||
|  |     set -e | ||||||
|  |   fi | ||||||
|  | 
 | ||||||
|  | echo '> Testing Migration (mysql)' | ||||||
|  | set +e | ||||||
|  | test_migrate "mysql" | ||||||
|  | set -e | ||||||
|  | down_mysql | ||||||
|  | 
 | ||||||
|  | # Test via Postgres. | ||||||
|  | echo '> Starting Postgres' | ||||||
|  | up_postgres | ||||||
|  | 
 | ||||||
|  | echo '> Testing Migration (postgres)' | ||||||
|  | set +e | ||||||
|  | test_migrate "postgres" | ||||||
|  | set -e | ||||||
|  | down_postgres | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @ -0,0 +1,22 @@ | ||||||
|  | """Calculate uncompressed sizes for all images | ||||||
|  | 
 | ||||||
|  | Revision ID: 2430f55c41d5 | ||||||
|  | Revises: 3b4d3a4461dc | ||||||
|  | Create Date: 2014-10-07 14:50:04.660315 | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | # revision identifiers, used by Alembic. | ||||||
|  | revision = '2430f55c41d5' | ||||||
|  | down_revision = '3b4d3a4461dc' | ||||||
|  | 
 | ||||||
|  | from alembic import op | ||||||
|  | import sqlalchemy as sa | ||||||
|  | from util.uncompressedsize import backfill_sizes_from_data | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def upgrade(tables): | ||||||
|  |   backfill_sizes_from_data() | ||||||
|  | 
 | ||||||
|  | def downgrade(tables): | ||||||
|  |   pass | ||||||
|  | @ -1,25 +1,24 @@ | ||||||
| """add support for squashing images | """Add support for squashed images | ||||||
| 
 | 
 | ||||||
| Revision ID: 3f6d26399bd2 | Revision ID: 3b4d3a4461dc | ||||||
| Revises: 34fd69f63809 | Revises: b1d41e2071b | ||||||
| Create Date: 2014-09-22 14:37:30.821785 | Create Date: 2014-10-07 14:49:13.105746 | ||||||
| 
 | 
 | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
| # revision identifiers, used by Alembic. | # revision identifiers, used by Alembic. | ||||||
| revision = '3f6d26399bd2' | revision = '3b4d3a4461dc' | ||||||
| down_revision = '34fd69f63809' | down_revision = 'b1d41e2071b' | ||||||
| 
 | 
 | ||||||
| from alembic import op | from alembic import op | ||||||
| import sqlalchemy as sa | import sqlalchemy as sa | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| def upgrade(tables): | def upgrade(tables): | ||||||
|     ### commands auto generated by Alembic - please adjust! ### |     ### commands auto generated by Alembic - please adjust! ### | ||||||
|     op.create_table('imagestoragetransformation', |     op.create_table('imagestoragetransformation', | ||||||
|     sa.Column('id', sa.Integer(), nullable=False), |     sa.Column('id', sa.Integer(), nullable=False), | ||||||
|     sa.Column('name', sa.String(length=255), nullable=False), |     sa.Column('name', sa.String(length=255), nullable=False), | ||||||
|     sa.PrimaryKeyConstraint('id') |     sa.PrimaryKeyConstraint('id', name=op.f('pk_imagestoragetransformation')) | ||||||
|     ) |     ) | ||||||
|     op.create_index('imagestoragetransformation_name', 'imagestoragetransformation', ['name'], unique=True) |     op.create_index('imagestoragetransformation_name', 'imagestoragetransformation', ['name'], unique=True) | ||||||
|     op.create_table('derivedimagestorage', |     op.create_table('derivedimagestorage', | ||||||
|  | @ -27,10 +26,10 @@ def upgrade(tables): | ||||||
|     sa.Column('source_id', sa.Integer(), nullable=True), |     sa.Column('source_id', sa.Integer(), nullable=True), | ||||||
|     sa.Column('derivative_id', sa.Integer(), nullable=False), |     sa.Column('derivative_id', sa.Integer(), nullable=False), | ||||||
|     sa.Column('transformation_id', sa.Integer(), nullable=False), |     sa.Column('transformation_id', sa.Integer(), nullable=False), | ||||||
|     sa.ForeignKeyConstraint(['derivative_id'], ['imagestorage.id'], ), |     sa.ForeignKeyConstraint(['derivative_id'], ['imagestorage.id'], name=op.f('fk_derivedimagestorage_derivative_id_imagestorage')), | ||||||
|     sa.ForeignKeyConstraint(['source_id'], ['imagestorage.id'], ), |     sa.ForeignKeyConstraint(['source_id'], ['imagestorage.id'], name=op.f('fk_derivedimagestorage_source_id_imagestorage')), | ||||||
|     sa.ForeignKeyConstraint(['transformation_id'], ['imagestoragetransformation.id'], ), |     sa.ForeignKeyConstraint(['transformation_id'], ['imagestoragetransformation.id'], name=op.f('fk_dis_transformation_id_ist')), | ||||||
|     sa.PrimaryKeyConstraint('id') |     sa.PrimaryKeyConstraint('id', name=op.f('pk_derivedimagestorage')) | ||||||
|     ) |     ) | ||||||
|     op.create_index('derivedimagestorage_derivative_id', 'derivedimagestorage', ['derivative_id'], unique=False) |     op.create_index('derivedimagestorage_derivative_id', 'derivedimagestorage', ['derivative_id'], unique=False) | ||||||
|     op.create_index('derivedimagestorage_source_id', 'derivedimagestorage', ['source_id'], unique=False) |     op.create_index('derivedimagestorage_source_id', 'derivedimagestorage', ['source_id'], unique=False) | ||||||
|  | @ -38,18 +37,21 @@ def upgrade(tables): | ||||||
|     op.create_index('derivedimagestorage_transformation_id', 'derivedimagestorage', ['transformation_id'], unique=False) |     op.create_index('derivedimagestorage_transformation_id', 'derivedimagestorage', ['transformation_id'], unique=False) | ||||||
|     op.drop_index('image_repository_id_docker_image_id', table_name='image') |     op.drop_index('image_repository_id_docker_image_id', table_name='image') | ||||||
|     op.create_index('image_repository_id_docker_image_id', 'image', ['repository_id', 'docker_image_id'], unique=True) |     op.create_index('image_repository_id_docker_image_id', 'image', ['repository_id', 'docker_image_id'], unique=True) | ||||||
|  |     op.drop_index('imagestorage_uuid', table_name='imagestorage') | ||||||
|  |     op.create_index('imagestorage_uuid', 'imagestorage', ['uuid'], unique=False) | ||||||
|  |     op.drop_column(u'repository', 'namespace') | ||||||
|  |     op.create_index('repository_namespace_user_id', 'repository', ['namespace_user_id'], unique=False) | ||||||
|     ### end Alembic commands ### |     ### end Alembic commands ### | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def downgrade(tables): | def downgrade(tables): | ||||||
|     ### commands auto generated by Alembic - please adjust! ### |     ### commands auto generated by Alembic - please adjust! ### | ||||||
|  |     op.drop_index('repository_namespace_user_id', table_name='repository') | ||||||
|  |     op.add_column(u'repository', sa.Column('namespace', sa.String(length=255), nullable=True)) | ||||||
|  |     op.drop_index('imagestorage_uuid', table_name='imagestorage') | ||||||
|  |     op.create_index('imagestorage_uuid', 'imagestorage', ['uuid'], unique=True) | ||||||
|     op.drop_index('image_repository_id_docker_image_id', table_name='image') |     op.drop_index('image_repository_id_docker_image_id', table_name='image') | ||||||
|     op.create_index('image_repository_id_docker_image_id', 'image', ['repository_id', 'docker_image_id'], unique=False) |     op.create_index('image_repository_id_docker_image_id', 'image', ['repository_id', 'docker_image_id'], unique=False) | ||||||
|     op.drop_index('derivedimagestorage_transformation_id', table_name='derivedimagestorage') |  | ||||||
|     op.drop_index('derivedimagestorage_source_id_transformation_id', table_name='derivedimagestorage') |  | ||||||
|     op.drop_index('derivedimagestorage_source_id', table_name='derivedimagestorage') |  | ||||||
|     op.drop_index('derivedimagestorage_derivative_id', table_name='derivedimagestorage') |  | ||||||
|     op.drop_table('derivedimagestorage') |     op.drop_table('derivedimagestorage') | ||||||
|     op.drop_index('imagestoragetransformation_name', table_name='imagestoragetransformation') |  | ||||||
|     op.drop_table('imagestoragetransformation') |     op.drop_table('imagestoragetransformation') | ||||||
|     ### end Alembic commands ### |     ### end Alembic commands ### | ||||||
|  | @ -13,7 +13,7 @@ from data.database import (User, Repository, Image, AccessToken, Role, Repositor | ||||||
|                            Notification, ImageStorageLocation, ImageStoragePlacement, |                            Notification, ImageStorageLocation, ImageStoragePlacement, | ||||||
|                            ExternalNotificationEvent, ExternalNotificationMethod, |                            ExternalNotificationEvent, ExternalNotificationMethod, | ||||||
|                            RepositoryNotification, RepositoryAuthorizedEmail, TeamMemberInvite, |                            RepositoryNotification, RepositoryAuthorizedEmail, TeamMemberInvite, | ||||||
|                            random_string_generator, db, BUILD_PHASE) |                            DerivedImageStorage, random_string_generator, db, BUILD_PHASE) | ||||||
| from peewee import JOIN_LEFT_OUTER, fn | from peewee import JOIN_LEFT_OUTER, fn | ||||||
| from util.validation import (validate_username, validate_email, validate_password, | from util.validation import (validate_username, validate_email, validate_password, | ||||||
|                              INVALID_PASSWORD_MESSAGE) |                              INVALID_PASSWORD_MESSAGE) | ||||||
|  |  | ||||||
										
											Binary file not shown.
										
									
								
							|  | @ -9,6 +9,8 @@ import zlib | ||||||
| # http://stackoverflow.com/questions/3122145/zlib-error-error-3-while-decompressing-incorrect-header-check/22310760#22310760 | # http://stackoverflow.com/questions/3122145/zlib-error-error-3-while-decompressing-incorrect-header-check/22310760#22310760 | ||||||
| ZLIB_GZIP_WINDOW = zlib.MAX_WBITS | 32 | ZLIB_GZIP_WINDOW = zlib.MAX_WBITS | 32 | ||||||
| 
 | 
 | ||||||
|  | CHUNK_SIZE = 5 * 1024 * 1024 | ||||||
|  | 
 | ||||||
| class SizeInfo(object): | class SizeInfo(object): | ||||||
|   def __init__(self): |   def __init__(self): | ||||||
|     self.size = 0 |     self.size = 0 | ||||||
|  | @ -23,6 +25,11 @@ def calculate_size_handler(): | ||||||
|   decompressor = zlib.decompressobj(ZLIB_GZIP_WINDOW) |   decompressor = zlib.decompressobj(ZLIB_GZIP_WINDOW) | ||||||
| 
 | 
 | ||||||
|   def fn(buf): |   def fn(buf): | ||||||
|     size_info.size += len(decompressor.decompress(buf)) |     # Note: We set a maximum CHUNK_SIZE to prevent the decompress from taking too much | ||||||
|  |     # memory. As a result, we have to loop until the unconsumed tail is empty. | ||||||
|  |     current_data = buf | ||||||
|  |     while len(current_data) > 0: | ||||||
|  |       size_info.size += len(decompressor.decompress(current_data, CHUNK_SIZE)) | ||||||
|  |       current_data = decompressor.unconsumed_tail | ||||||
| 
 | 
 | ||||||
|   return size_info, fn |   return size_info, fn | ||||||
|  |  | ||||||
|  | @ -1,5 +1,6 @@ | ||||||
| import logging | import logging | ||||||
| import zlib | import zlib | ||||||
|  | import sys | ||||||
| 
 | 
 | ||||||
| from data import model | from data import model | ||||||
| from data.database import ImageStorage | from data.database import ImageStorage | ||||||
|  | @ -15,6 +16,15 @@ CHUNK_SIZE = 5 * 1024 * 1024 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def backfill_sizes_from_data(): | def backfill_sizes_from_data(): | ||||||
|  |   logger.setLevel(logging.DEBUG) | ||||||
|  |   logger.debug('Starting uncompressed image size backfill') | ||||||
|  |    | ||||||
|  |   formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') | ||||||
|  |    | ||||||
|  |   ch = logging.StreamHandler(sys.stdout) | ||||||
|  |   ch.setFormatter(formatter) | ||||||
|  |   logger.addHandler(ch) | ||||||
|  | 
 | ||||||
|   while True: |   while True: | ||||||
|     # Load the record from the DB. |     # Load the record from the DB. | ||||||
|     batch_ids = list(ImageStorage |     batch_ids = list(ImageStorage | ||||||
|  | @ -47,7 +57,9 @@ def backfill_sizes_from_data(): | ||||||
|             if len(current_data) == 0: |             if len(current_data) == 0: | ||||||
|               break |               break | ||||||
| 
 | 
 | ||||||
|             uncompressed_size += len(decompressor.decompress(current_data)) |             while current_data: | ||||||
|  |               uncompressed_size += len(decompressor.decompress(current_data, CHUNK_SIZE)) | ||||||
|  |               current_data = decompressor.unconsumed_tail | ||||||
| 
 | 
 | ||||||
|         # Write the size to the image storage. We do so under a transaction AFTER checking to |         # Write the size to the image storage. We do so under a transaction AFTER checking to | ||||||
|         # make sure the image storage still exists and has not changed. |         # make sure the image storage still exists and has not changed. | ||||||
		Reference in a new issue