From 7d6b4b621fd1b7a83fd64d512708fca3de8d04c1 Mon Sep 17 00:00:00 2001
From: Joseph Schorr
Date: Wed, 4 Nov 2015 14:50:56 -0500
Subject: [PATCH 01/25] Show an error if the gunzip returns an empty buffer
---
static/js/services/datafile-service.js | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/static/js/services/datafile-service.js b/static/js/services/datafile-service.js
index 2471ac32f..76197a666 100644
--- a/static/js/services/datafile-service.js
+++ b/static/js/services/datafile-service.js
@@ -55,6 +55,11 @@ angular.module('quay').factory('DataFileService', [function() {
return;
}
+ if (plain.byteLength == 0) {
+ failure();
+ return;
+ }
+
dataFileService.tryAsTar_(plain, success, failure);
};
From 4d81567a0c23b13ad76ae2525e174c687ea5e0a6 Mon Sep 17 00:00:00 2001
From: Matt Jibson
Date: Wed, 4 Nov 2015 16:11:15 -0500
Subject: [PATCH 02/25] Remove error notification when user deletes repos
Also prevent duplicate notifications of that type.
fixes #493
---
endpoints/api/billing.py | 21 +++++++++++++--------
endpoints/api/repository.py | 6 +++++-
endpoints/common.py | 7 +++++--
3 files changed, 23 insertions(+), 11 deletions(-)
diff --git a/endpoints/api/billing.py b/endpoints/api/billing.py
index aae577908..b6596054b 100644
--- a/endpoints/api/billing.py
+++ b/endpoints/api/billing.py
@@ -18,15 +18,14 @@ import features
import uuid
import json
-def lookup_allowed_private_repos(namespace):
- """ Returns false if the given namespace has used its allotment of private repositories. """
- # Lookup the namespace and verify it has a subscription.
+def get_namespace_plan(namespace):
+ """ Returns the plan of the given namespace. """
namespace_user = model.user.get_namespace_user(namespace)
if namespace_user is None:
- return False
+ return None
if not namespace_user.stripe_id:
- return False
+ return None
# Ask Stripe for the subscribed plan.
# TODO: Can we cache this or make it faster somehow?
@@ -36,14 +35,20 @@ def lookup_allowed_private_repos(namespace):
abort(503, message='Cannot contact Stripe')
if not cus.subscription:
+ return None
+
+ return get_plan(cus.subscription.plan.id)
+
+
+def lookup_allowed_private_repos(namespace):
+ """ Returns false if the given namespace has used its allotment of private repositories. """
+ current_plan = get_namespace_plan(namespace)
+ if current_plan is None:
return False
# Find the number of private repositories used by the namespace and compare it to the
# plan subscribed.
private_repos = model.user.get_private_repo_count(namespace)
- current_plan = get_plan(cus.subscription.plan.id)
- if current_plan is None:
- return False
return private_repos < current_plan['privateRepos']
diff --git a/endpoints/api/repository.py b/endpoints/api/repository.py
index b9664864e..851bd644d 100644
--- a/endpoints/api/repository.py
+++ b/endpoints/api/repository.py
@@ -17,7 +17,8 @@ from endpoints.api import (truthy_bool, format_date, nickname, log_action, valid
RepositoryParamResource, resource, query_param, parse_args, ApiResource,
request_error, require_scope, Unauthorized, NotFound, InvalidRequest,
path_param, ExceedsLicenseException)
-from endpoints.api.billing import lookup_allowed_private_repos
+from endpoints.api.billing import lookup_allowed_private_repos, get_namespace_plan
+from endpoints.common import check_repository_usage
from auth.permissions import (ModifyRepositoryPermission, AdministerRepositoryPermission,
CreateRepositoryPermission)
@@ -329,6 +330,9 @@ class Repository(RepositoryParamResource):
def delete(self, namespace, repository):
""" Delete a repository. """
model.repository.purge_repository(namespace, repository)
+ user = model.user.get_namespace_user(namespace)
+ plan = get_namespace_plan(namespace)
+ check_repository_usage(user, plan)
log_action('delete_repo', namespace,
{'repo': repository, 'namespace': namespace})
return 'Deleted', 204
diff --git a/endpoints/common.py b/endpoints/common.py
index 7469c58be..6e3abb4ac 100644
--- a/endpoints/common.py
+++ b/endpoints/common.py
@@ -200,10 +200,13 @@ def render_page_template(name, **kwargs):
def check_repository_usage(user_or_org, plan_found):
private_repos = model.user.get_private_repo_count(user_or_org.username)
- repos_allowed = plan_found['privateRepos']
+ if plan_found is None:
+ repos_allowed = 0
+ else:
+ repos_allowed = plan_found['privateRepos']
if private_repos > repos_allowed:
- model.notification.create_notification('over_private_usage', user_or_org,
+ model.notification.create_unique_notification('over_private_usage', user_or_org,
{'namespace': user_or_org.username})
else:
model.notification.delete_notifications_by_kind(user_or_org, 'over_private_usage')
From c3c13de63bc8a3f29f1bfd3f1891e68954d714cc Mon Sep 17 00:00:00 2001
From: Joseph Schorr
Date: Wed, 4 Nov 2015 16:20:45 -0500
Subject: [PATCH 03/25] Remove the used_legacy_github column
---
data/database.py | 3 --
...73669db7e12_remove_legacy_github_column.py | 25 ++++++++++++++++
util/migrate/migrategithubdeploykeys.py | 29 ++++++++++++++++++-
3 files changed, 53 insertions(+), 4 deletions(-)
create mode 100644 data/migrations/versions/73669db7e12_remove_legacy_github_column.py
diff --git a/data/database.py b/data/database.py
index cc0beafa6..d540ec013 100644
--- a/data/database.py
+++ b/data/database.py
@@ -472,9 +472,6 @@ class RepositoryBuildTrigger(BaseModel):
pull_robot = QuayUserField(allows_robots=True, null=True, related_name='triggerpullrobot',
robot_null_delete=True)
- # TODO(jschorr): Remove this column once we verify the backfill has succeeded.
- used_legacy_github = BooleanField(null=True, default=False)
-
class EmailConfirmation(BaseModel):
code = CharField(default=random_string_generator(), unique=True, index=True)
diff --git a/data/migrations/versions/73669db7e12_remove_legacy_github_column.py b/data/migrations/versions/73669db7e12_remove_legacy_github_column.py
new file mode 100644
index 000000000..38698c5eb
--- /dev/null
+++ b/data/migrations/versions/73669db7e12_remove_legacy_github_column.py
@@ -0,0 +1,25 @@
+"""Remove legacy github column
+
+Revision ID: 73669db7e12
+Revises: 35f538da62
+Create Date: 2015-11-04 16:18:18.107314
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '73669db7e12'
+down_revision = '35f538da62'
+
+from alembic import op
+import sqlalchemy as sa
+
+def upgrade(tables):
+ ### commands auto generated by Alembic - please adjust! ###
+ op.drop_column('repositorybuildtrigger', 'used_legacy_github')
+ ### end Alembic commands ###
+
+
+def downgrade(tables):
+ ### commands auto generated by Alembic - please adjust! ###
+ op.add_column('repositorybuildtrigger', sa.Column('used_legacy_github', sa.Boolean(), nullable=True))
+ ### end Alembic commands ###
diff --git a/util/migrate/migrategithubdeploykeys.py b/util/migrate/migrategithubdeploykeys.py
index 5e2f4089e..b2c1903a1 100644
--- a/util/migrate/migrategithubdeploykeys.py
+++ b/util/migrate/migrategithubdeploykeys.py
@@ -2,7 +2,8 @@ import logging
import logging.config
import json
-from data.database import RepositoryBuildTrigger, BuildTriggerService, db, db_for_update
+from data.database import (db, db_for_update, BaseModel, CharField, ForeignKeyField,
+ TextField, BooleanField)
from app import app
from buildtrigger.basehandler import BuildTriggerHandler
from util.security.ssh import generate_ssh_keypair
@@ -10,6 +11,32 @@ from github import GithubException
logger = logging.getLogger(__name__)
+class BuildTriggerService(BaseModel):
+ name = CharField(index=True, unique=True)
+
+class Repository(BaseModel):
+ pass
+
+class User(BaseModel):
+ pass
+
+class AccessToken(BaseModel):
+ pass
+
+class RepositoryBuildTrigger(BaseModel):
+ uuid = CharField()
+ service = ForeignKeyField(BuildTriggerService, index=True)
+ repository = ForeignKeyField(Repository, index=True)
+ connected_user = ForeignKeyField(User)
+ auth_token = CharField(null=True)
+ private_key = TextField(null=True)
+ config = TextField(default='{}')
+ write_token = ForeignKeyField(AccessToken, null=True)
+ pull_robot = ForeignKeyField(User, null=True, related_name='triggerpullrobot')
+
+ used_legacy_github = BooleanField(null=True, default=False)
+
+
def backfill_github_deploykeys():
""" Generates and saves private deploy keys for any GitHub build triggers still relying on
the old buildpack behavior. """
From a7fef8377c01594a11870d4187cb6221f7f1673d Mon Sep 17 00:00:00 2001
From: Silas Sewell
Date: Fri, 6 Nov 2015 13:34:49 -0500
Subject: [PATCH 04/25] Enable storage preference
---
app.py | 7 +++++--
data/model/image.py | 2 +-
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/app.py b/app.py
index 0fcda573c..52f18b6af 100644
--- a/app.py
+++ b/app.py
@@ -74,11 +74,14 @@ else:
config_provider.update_app_config(app.config)
# Update any configuration found in the override environment variable.
-OVERRIDE_CONFIG_KEY = 'QUAY_OVERRIDE_CONFIG'
-
environ_config = json.loads(os.environ.get(OVERRIDE_CONFIG_KEY, '{}'))
app.config.update(environ_config)
+# Allow user to define a custom storage preference for the local instance.
+_distributed_storage_preference = os.environ.get('QUAY_DISTRIBUTED_STORAGE_PREFERENCE', '').split()
+if _distributed_storage_preference:
+ app.config['DISTRIBUTED_STORAGE_PREFERENCE'] = _distributed_storage_preference
+
class RequestWithId(Request):
request_gen = staticmethod(urn_generator(['request']))
diff --git a/data/model/image.py b/data/model/image.py
index 078875417..cdd81cf54 100644
--- a/data/model/image.py
+++ b/data/model/image.py
@@ -46,7 +46,7 @@ def get_repo_image(namespace_name, repository_name, docker_image_id):
def get_repo_image_extended(namespace_name, repository_name, docker_image_id):
def limit_to_image_id(query):
- return query.where(Image.docker_image_id == docker_image_id).limit(1)
+ return query.where(Image.docker_image_id == docker_image_id)
images = get_repository_images_base(namespace_name, repository_name, limit_to_image_id)
if not images:
From 2325328bbd1cca132e9173e8fcb0bdc792e9fa5d Mon Sep 17 00:00:00 2001
From: Matt Jibson
Date: Fri, 6 Nov 2015 15:34:52 -0500
Subject: [PATCH 05/25] Update mjibson ssh key
---
buildman/templates/cloudconfig.yaml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/buildman/templates/cloudconfig.yaml b/buildman/templates/cloudconfig.yaml
index af83464ce..75454fd43 100644
--- a/buildman/templates/cloudconfig.yaml
+++ b/buildman/templates/cloudconfig.yaml
@@ -6,7 +6,7 @@ ssh_authorized_keys:
- ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDNvw8qo9m8np7yQ/Smv/oklM8bo8VyNRZriGYBDuolWDL/mZpYCQnZJXphQo7RFdNABYistikjJlBuuwUohLf2uSq0iKoFa2TgwI43wViWzvuzU4nA02/ITD5BZdmWAFNyIoqeB50Ol4qUgDwLAZ+7Kv7uCi6chcgr9gTi99jY3GHyZjrMiXMHGVGi+FExFuzhVC2drKjbz5q6oRfQeLtNfG4psl5GU3MQU6FkX4fgoCx0r9R48/b7l4+TT7pWblJQiRfeldixu6308vyoTUEHasdkU3/X0OTaGz/h5XqTKnGQc6stvvoED3w+L3QFp0H5Z8sZ9stSsitmCBrmbcKZ jakemoshenko
- ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAgEAo/JkbGO6R7g1ZxARi0xWVM7FOfN02snRAcIO6vT9M7xMUkWVLgD+hM/o91lk+UFiYdql0CATobpFWncRL36KaUqsbw9/1BlI40wg296XHXSSnxhxZ4L7ytf6G1tyN319HXlI2kh9vAf/fy++yDvkH8dI3k1oLoW+mZPET6Pff04/6AXXrRlS5mhmGv9irGwiDHtVKpj6lU8DN/UtOrv1tiQ0pgwEJq05fLGoQfgPNaBCnW2z4Ubpn2gyMcMBMpSwo4hCqJePd349e4bLmFcT+gXYg7Mnup1DoTDlowFFN56wpxQbdp96IxWzU+jYPaIAuRo+BJzCyOS8qBv0Z4RZrgop0qp2JYiVwmViO6TZhIDz6loQJXUOIleQmNgTbiZx8Bwv5GY2jMYoVwlBp7yy5bRjxfbFsJ0vU7TVzNAG7oEJy/74HmHmWzRQlSlQjesr8gRbm9zgR8wqc/L107UOWFg7Cgh8ZNjKuADbXqYuda1Y9m2upcfS26UPz5l5PW5uFRMHZSi8pb1XV6/0Z8H8vwsh37Ur6aLi/5jruRmKhdlsNrB1IiDicBsPW3yg7HHSIdPU4oBNPC77yDCT3l4CKr4el81RrZt7FbJPfY+Ig9Q5O+05f6I8+ZOlJGyZ/Qfyl2aVm1HnlJKuBqPxeic8tMng/9B5N7uZL6Y3k5jFU8c= quentin
- ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDI7LtxLItapmUbt3Gs+4Oxa1i22fkx1+aJDkAjiRWPSX3+cxOzuPfHX9uFzr+qj5hy4J7ErrPp8q9alu+il9lE26GQuUxOZiaUrXu4dRCXXdCqTHARWBxGUXjkxdMp2HIzFpBxmVqcRubrgM36LBzKapdDOqQdz7XnNm5Jmf0tH/N0+TgV60P0WVY1CxmTya+JHNFVgazhd+oIGEhTyW/eszMGcFUgZet7DQFytYIQXYSwwGpGdJ+0InKAJ2SzCt/yuUlSrhrVM8vSGeami1XYmgQiyth1zjteMd8uTrc9NREH7bZTNcMFBqVYE3BYQWGRrv8pMMgP9gxgLbxtVsUl barakmich-titania
-- ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDUWB4aSjSRHCz5/6H9/EJhJVvRmPThvEzyHinaWPsuM9prBSLci9NF9WneVl30nczkvllA+w34kycdrS3fKpjTbODaEOLHBobWl3bccY0I6kr86q5z67NZffjCm/P/RL+dBaOiBWS8PV8oiDF1P6YdMo8Jk46n9fozmLCXHUuCw5BJ8PGjQqbsEzA3qFMeKZYdJHOizOfeIfKfCWYrrumVRY9v6SAUDoFOl4PZEM7QdGp9EoRYb9MNLgKLnZ4RjbcLoFwiqxY4KEM4zfjZPNOECiLCuJqvHM2QawwuO1klJ16HpJk+FzOTWQoZtT47LoE/XNSOcNtAOiD+OQ449ia1EArhm7+1DnLXvHXKIl1JtuqJz+wFCsbNSdB7P562OHAGRIxYK3DfE+0CZH1BeHYl7xiRBeCtZ+OZMIocqeJtq8taIS7Un5wnGcQWxFtQnr/f65EgbIi7G2dxPcjhr6K+GWYezsiReVVKnIClq2MHhABG9QOncKDIa47L3nyx3pm4ZfMbC2jmnK2pFgGGSfYDy4487JnAUOG1mzZ9vm4gDhatT+vZFSBOwv1e4CErBh/wYXooF5I0nGmE6y6zkKFqP+ZolJ6iXmXQ7Ea2oaGeyaprweBjkhHgghi4KbwKbClope4Zo9X9JJYBLQSW33sEEuy8MlSBpdZAbz9t/FvJaw== mjibson
+- ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQCxfToKI6APRz7uJF7cmb45k3rVE2FoDHI+NgDZ5nZP/QeuP5VjPwGjRXIaAIWJjF1tH3xJ02MIMb0gzWECdPOeU7Ft26xa7VybFDmog81zxq/h64AINhjZuT0qilr1Y8ojv+m1HOdPa3LNSRTGUZNVpotV8LVcyjKGax1uWlyQUmlrNAe1iNIEghGJiihj2ctsuNLfUQ5IosvOjzNpbbOj6fI2ByVAPZVL6eBvMWggTgQxKaZKAzhAoZW/IMUCuvQB0MKLdXDBkcMZx/09pb1R20RAKRZOOlVgFvyFbgjmTtjGOjmonxNzmKRcxOvmQQtrespDghACin2mOxsdvQ8wbeOKMnay2HXSK6FITHOaoJhSBjFz2elqahd49VRdKsWHKdvAs+UeaC5lTKMJULckM2Ra6HJqI7xzyAQvCcJqjtslrzhPCA7mpW8YPygZFKe4oqcqp5yYgm7XjsUOORDmqRN+K3gttaw/rPLMhHKbInLHj0VzGk1tyNTXbsWqZL4rLmjd/oNU3XuoMYSalNKkp1y967mGAYOlT2JdrXAusuzpr5QJVTDEa7GMY7gD4WP8EOqfg7U/9xCJosPpGilA5pp6bge/I3RURsYQT/CGg0nWjjZ5nKlYh8u4oHgBfM3RGIw5bBQuer5e2L3RNVlka+MdEOVQGA4UEKl6E+Vs5Q== matt.jibson@gmail.com
- ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDiNawWSZL2MF99zwG9cFjGmML6agsKwaacQEoTsjcjHGixyUnqHXaLdrGma5i/uphZPkI5XRBKiuIROACY/aRoIxJUpV7AQ1Zx87cILx6fDVePvU5lW2DdhlCDUdwjuzDb/WO/c/qMWjOPqRG4q8XvB7nhuORMMgdpDXWVH4LXPmFez1iIBCKNk04l6Se7wiEOQjaBnTDiBDYlWD78r6RdiAU5eIxpq+lKBDTcET0vegwcA/WE4YOlYBbOrgtHrgwWqG/pXxUu77aapDOmfjtDrgim6XP5kEnytg5gCaN9iLvIpT8b1wD/1Z+LoNSZg6m9gkcC2yTRI0apOBa2G8lz silas@pro.local
write_files:
From afa119d82e0d8ed512567f76bc2c35a8dbc3b114 Mon Sep 17 00:00:00 2001
From: Matt Jibson
Date: Fri, 6 Nov 2015 18:48:47 -0500
Subject: [PATCH 06/25] Set timeout for redis commands
fixes #779
---
data/buildlogs.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/data/buildlogs.py b/data/buildlogs.py
index ac58bd368..07127b65f 100644
--- a/data/buildlogs.py
+++ b/data/buildlogs.py
@@ -19,7 +19,7 @@ class RedisBuildLogs(object):
def __init__(self, redis_config):
args = dict(redis_config)
- args.update({'socket_connect_timeout': 5})
+ args.update({'socket_connect_timeout': 5, 'socket_timeout': 5})
self._redis_config = redis_config
self._redis = redis.StrictRedis(**args)
From 8463514a09a934430728da2a295368047ac53b76 Mon Sep 17 00:00:00 2001
From: Joseph Schorr
Date: Sun, 8 Nov 2015 13:10:01 -0500
Subject: [PATCH 07/25] Fix delete_user call to remove all user data
---
data/model/user.py | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/data/model/user.py b/data/model/user.py
index 1079f8e0d..d05ea1693 100644
--- a/data/model/user.py
+++ b/data/model/user.py
@@ -651,9 +651,12 @@ def detach_external_login(user, service_name):
def delete_user(user):
- user.delete_instance(recursive=True, delete_nullable=True)
+ # Delete any repositories under the user's namespace.
+ for repo in list(Repository.select().where(Repository.namespace_user == user)):
+ repository.purge_repository(user.username, repo.name)
- # TODO: also delete any repository data associated
+ # Delete the user itself.
+ user.delete_instance(recursive=True, delete_nullable=True)
def get_pull_credentials(robotname):
From ade664036caa22d6409cdfbf7d720a68bde43fd0 Mon Sep 17 00:00:00 2001
From: Quentin Machu
Date: Sun, 8 Nov 2015 15:22:15 -0500
Subject: [PATCH 08/25] Fix migration 57dad559ff2d's header
Fixes #825
---
.../57dad559ff2d_add_support_for_quay_s_security_indexer.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/data/migrations/versions/57dad559ff2d_add_support_for_quay_s_security_indexer.py b/data/migrations/versions/57dad559ff2d_add_support_for_quay_s_security_indexer.py
index ed317f327..9e4d0e6c2 100644
--- a/data/migrations/versions/57dad559ff2d_add_support_for_quay_s_security_indexer.py
+++ b/data/migrations/versions/57dad559ff2d_add_support_for_quay_s_security_indexer.py
@@ -1,7 +1,9 @@
"""add support for quay's security indexer
+
Revision ID: 57dad559ff2d
Revises: 154f2befdfbe
Create Date: 2015-07-13 16:51:41.669249
+
"""
# revision identifiers, used by Alembic.
From b9fce4f5d0b6845caf62cbb85d751a4446493fca Mon Sep 17 00:00:00 2001
From: Jake Moshenko
Date: Mon, 9 Nov 2015 11:12:23 -0500
Subject: [PATCH 09/25] Add debug logs to migrations
---
alembic.ini | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/alembic.ini b/alembic.ini
index 5089cce9c..91fb72bc0 100644
--- a/alembic.ini
+++ b/alembic.ini
@@ -24,7 +24,7 @@ sqlalchemy.url = sqlite:///will/be/overridden
# Logging configuration
[loggers]
-keys = root,sqlalchemy,alembic
+keys = root,sqlalchemy,alembic,peewee,boto
[handlers]
keys = console
@@ -33,10 +33,20 @@ keys = console
keys = generic
[logger_root]
-level = WARN
+level = DEBUG
handlers = console
qualname =
+[logger_peewee]
+level = WARN
+handlers =
+qualname = peewee
+
+[logger_boto]
+level = WARN
+handlers =
+qualname = boto
+
[logger_sqlalchemy]
level = WARN
handlers =
From daa74b5132e8c70501440824f4032e83ed3eb117 Mon Sep 17 00:00:00 2001
From: Joseph Schorr
Date: Mon, 9 Nov 2015 14:41:53 -0500
Subject: [PATCH 10/25] Fix test issue with initdb
Non-sqlite DBs don't like maxvalue for Integer
---
initdb.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/initdb.py b/initdb.py
index 80c9fa952..ce51c1749 100644
--- a/initdb.py
+++ b/initdb.py
@@ -5,7 +5,6 @@ import random
import calendar
import os
-from sys import maxsize
from datetime import datetime, timedelta
from peewee import (SqliteDatabase, create_model_tables, drop_model_tables, savepoint_sqlite,
savepoint)
@@ -97,7 +96,7 @@ def __create_subtree(repo, structure, creator_username, parent, tag_map):
store.put_content('local_us', path, checksum)
new_image.security_indexed = False
- new_image.security_indexed_engine = maxsize
+ new_image.security_indexed_engine = -1
new_image.save()
creation_time = REFERENCE_DATE + timedelta(weeks=image_num) + timedelta(days=model_num)
From 2d2662f53fc5ada9aa77e19c7bdefe453445de55 Mon Sep 17 00:00:00 2001
From: Joseph Schorr
Date: Mon, 9 Nov 2015 14:42:05 -0500
Subject: [PATCH 11/25] Fix deleting repos and images under MySQL
MySQL doesn't handle constraints at the end of transactions, so deleting images currently fails. This removes the constraint and just leaves parent_id as an int
---
data/database.py | 2 +-
.../57dad559ff2d_add_support_for_quay_s_security_indexer.py | 3 +--
data/model/image.py | 2 +-
data/model/repository.py | 4 ++--
util/migrate/backfill_parent_id.py | 4 ++--
workers/securityworker.py | 4 ++--
6 files changed, 9 insertions(+), 10 deletions(-)
diff --git a/data/database.py b/data/database.py
index c87ece328..67f5ba2ab 100644
--- a/data/database.py
+++ b/data/database.py
@@ -575,7 +575,7 @@ class Image(BaseModel):
security_indexed = BooleanField(default=False)
security_indexed_engine = IntegerField(default=-1)
- parent = ForeignKeyField('self', index=True, null=True, related_name='children')
+ parent_id = IntegerField(index=True, null=True)
class Meta:
database = db
diff --git a/data/migrations/versions/57dad559ff2d_add_support_for_quay_s_security_indexer.py b/data/migrations/versions/57dad559ff2d_add_support_for_quay_s_security_indexer.py
index 9e4d0e6c2..078137c61 100644
--- a/data/migrations/versions/57dad559ff2d_add_support_for_quay_s_security_indexer.py
+++ b/data/migrations/versions/57dad559ff2d_add_support_for_quay_s_security_indexer.py
@@ -19,14 +19,13 @@ def upgrade(tables):
op.add_column('image', sa.Column('security_indexed', sa.Boolean(), nullable=False, default=False, server_default=sa.sql.expression.false()))
op.add_column('image', sa.Column('security_indexed_engine', sa.Integer(), nullable=False, default=-1, server_default="-1"))
op.create_index('image_parent_id', 'image', ['parent_id'], unique=False)
- op.create_foreign_key(op.f('fk_image_parent_id_image'), 'image', 'image', ['parent_id'], ['id'])
### end Alembic commands ###
+
op.create_index('image_security_indexed_engine_security_indexed', 'image', ['security_indexed_engine', 'security_indexed'])
def downgrade(tables):
### commands auto generated by Alembic - please adjust! ###
op.drop_index('image_security_indexed_engine_security_indexed', 'image')
- op.drop_constraint(op.f('fk_image_parent_id_image'), 'image', type_='foreignkey')
op.drop_index('image_parent_id', table_name='image')
op.drop_column('image', 'security_indexed')
op.drop_column('image', 'security_indexed_engine')
diff --git a/data/model/image.py b/data/model/image.py
index 7b673ee2f..f668b8001 100644
--- a/data/model/image.py
+++ b/data/model/image.py
@@ -303,7 +303,7 @@ def set_image_metadata(docker_image_id, namespace_name, repository_name, created
if parent:
fetched.ancestors = '%s%s/' % (parent.ancestors, parent.id)
- fetched.parent = parent
+ fetched.parent_id = parent.id
fetched.save()
fetched.storage.save()
diff --git a/data/model/repository.py b/data/model/repository.py
index f78b92b8b..8379a1f6d 100644
--- a/data/model/repository.py
+++ b/data/model/repository.py
@@ -131,12 +131,12 @@ def garbage_collect_repo(repo):
# iterable of tuples containing [(k, v), (k, v), ...]
all_repo_images = Image.select(Image.id, Image.storage).where(Image.repository == repo).tuples()
images_to_storages = dict(all_repo_images)
- to_remove = set(images_to_storages.keys()).difference(referenced_ancestors)
+ to_remove = list(set(images_to_storages.keys()).difference(referenced_ancestors))
if len(to_remove) > 0:
logger.info('Cleaning up unreferenced images: %s', to_remove)
storage_id_whitelist = {images_to_storages[to_remove_id] for to_remove_id in to_remove}
- Image.delete().where(Image.id << list(to_remove)).execute()
+ Image.delete().where(Image.id << to_remove).execute()
if len(to_remove) > 0:
logger.info('Garbage collecting storage for images: %s', to_remove)
diff --git a/util/migrate/backfill_parent_id.py b/util/migrate/backfill_parent_id.py
index 2a4e7b091..1d2cf4136 100644
--- a/util/migrate/backfill_parent_id.py
+++ b/util/migrate/backfill_parent_id.py
@@ -18,7 +18,7 @@ def backfill_parent_id():
return (Image
.select(Image.id, Image.ancestors)
.join(ImageStorage)
- .where(Image.parent >> None, Image.ancestors != '/',
+ .where(Image.parent_id >> None, Image.ancestors != '/',
ImageStorage.uploading == False))
for to_backfill in yield_random_entries(fetch_batch, 10000, 0.3):
@@ -27,7 +27,7 @@ def backfill_parent_id():
image = db_for_update(Image
.select()
.where(Image.id == to_backfill.id)).get()
- image.parent = to_backfill.ancestors.split('/')[-2]
+ image.parent_id = int(to_backfill.ancestors.split('/')[-2])
image.save()
except Image.DoesNotExist:
pass
diff --git a/workers/securityworker.py b/workers/securityworker.py
index 81402cd07..e6ccd369b 100644
--- a/workers/securityworker.py
+++ b/workers/securityworker.py
@@ -27,7 +27,7 @@ def _get_image_to_export(version):
candidates = (Image
.select(Image.docker_image_id, ImageStorage.uuid, ImageStorage.checksum)
.join(ImageStorage)
- .where(Image.security_indexed_engine < version, Image.parent >> None, ImageStorage.uploading == False, ImageStorage.checksum != '')
+ .where(Image.security_indexed_engine < version, Image.parent_id >> None, ImageStorage.uploading == False, ImageStorage.checksum != '')
.limit(BATCH_SIZE*10)
.alias('candidates'))
@@ -44,7 +44,7 @@ def _get_image_to_export(version):
# With analyzed parent
candidates = (Image
.select(Image.docker_image_id, ImageStorage.uuid, ImageStorage.checksum, Parent.docker_image_id.alias('parent_docker_image_id'), ParentImageStorage.uuid.alias('parent_storage_uuid'))
- .join(Parent, on=(Image.parent == Parent.id))
+ .join(Parent, on=(Image.parent_id == Parent.id))
.join(ParentImageStorage, on=(ParentImageStorage.id == Parent.storage))
.switch(Image)
.join(ImageStorage)
From b0ec7074d04e82fc0ab766c2837279dfb8fe2d5b Mon Sep 17 00:00:00 2001
From: Matt Jibson
Date: Mon, 9 Nov 2015 15:17:34 -0500
Subject: [PATCH 12/25] Pass down page size from the scope
Instead of duplicating it here and in repo-panel-tags.js.
---
static/directives/repo-view/repo-panel-tags.html | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/static/directives/repo-view/repo-panel-tags.html b/static/directives/repo-view/repo-panel-tags.html
index 420f95f60..e1d94db67 100644
--- a/static/directives/repo-view/repo-panel-tags.html
+++ b/static/directives/repo-view/repo-panel-tags.html
@@ -53,7 +53,7 @@
-
+
From 7a503d678ba23f638093c74994b1abb980a29f99 Mon Sep 17 00:00:00 2001
From: Matt Jibson
Date: Mon, 9 Nov 2015 16:21:43 -0500
Subject: [PATCH 13/25] Add page offset to track line tag index
fixes #755
---
static/js/directives/repo-view/repo-panel-tags.js | 1 +
1 file changed, 1 insertion(+)
diff --git a/static/js/directives/repo-view/repo-panel-tags.js b/static/js/directives/repo-view/repo-panel-tags.js
index 95e365a56..e7618bb35 100644
--- a/static/js/directives/repo-view/repo-panel-tags.js
+++ b/static/js/directives/repo-view/repo-panel-tags.js
@@ -169,6 +169,7 @@ angular.module('quay').directive('repoPanelTags', function () {
$scope.trackLineClass = function(index, track_info) {
var startIndex = $.inArray(track_info.tags[0], $scope.tags);
var endIndex = $.inArray(track_info.tags[track_info.tags.length - 1], $scope.tags);
+ index += $scope.options.page * $scope.tagsPerPage;
if (index == startIndex) {
return 'start';
From 5000b1621cee4052c0bad68f981913c8cbb975e7 Mon Sep 17 00:00:00 2001
From: Silas Sewell
Date: Mon, 26 Oct 2015 16:06:05 -0700
Subject: [PATCH 14/25] superuser: add storage replication config
---
Dockerfile | 2 +-
boot.py | 20 +++
conf/init/zz_boot.sh | 3 +
conf/init/zz_release.sh | 8 -
data/model/image.py | 16 ++
endpoints/api/suconfig.py | 6 +-
release.py | 12 --
static/css/pages/superuser.css | 28 ++-
static/css/quay.css | 2 +-
.../directives/config/config-setup-tool.html | 130 +++++++++-----
static/js/core-config-setup.js | 167 ++++++++++++++++--
storage/distributedstorage.py | 7 +-
util/config/configutil.py | 8 +-
util/config/database.py | 9 +
util/config/validator.py | 45 +++--
15 files changed, 357 insertions(+), 106 deletions(-)
create mode 100644 boot.py
create mode 100755 conf/init/zz_boot.sh
delete mode 100755 conf/init/zz_release.sh
create mode 100644 util/config/database.py
diff --git a/Dockerfile b/Dockerfile
index 5a757aa21..737ae4934 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -43,7 +43,7 @@ ADD conf/init/doupdatelimits.sh /etc/my_init.d/
ADD conf/init/copy_syslog_config.sh /etc/my_init.d/
ADD conf/init/runmigration.sh /etc/my_init.d/
ADD conf/init/syslog-ng.conf /etc/syslog-ng/
-ADD conf/init/zz_release.sh /etc/my_init.d/
+ADD conf/init/zz_boot.sh /etc/my_init.d/
ADD conf/init/service/ /etc/service/
diff --git a/boot.py b/boot.py
new file mode 100644
index 000000000..f78ad6ab8
--- /dev/null
+++ b/boot.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+
+import release
+
+from app import app
+from data.model.release import set_region_release
+from util.config.database import sync_database_with_config
+
+
+def main():
+ if app.config.get('SETUP_COMPLETE', False):
+ sync_database_with_config(app.config)
+
+ # Record deploy
+ if release.REGION and release.GIT_HEAD:
+ set_region_release(release.SERVICE, release.REGION, release.GIT_HEAD)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/conf/init/zz_boot.sh b/conf/init/zz_boot.sh
new file mode 100755
index 000000000..ab760266b
--- /dev/null
+++ b/conf/init/zz_boot.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+/venv/bin/python /boot.py
diff --git a/conf/init/zz_release.sh b/conf/init/zz_release.sh
deleted file mode 100755
index 152494cff..000000000
--- a/conf/init/zz_release.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/bash
-set -e
-
-source venv/bin/activate
-
-export PYTHONPATH=.
-
-python /release.py
diff --git a/data/model/image.py b/data/model/image.py
index 078875417..f084f2662 100644
--- a/data/model/image.py
+++ b/data/model/image.py
@@ -356,3 +356,19 @@ def get_image(repo, dockerfile_id):
return Image.get(Image.docker_image_id == dockerfile_id, Image.repository == repo)
except Image.DoesNotExist:
return None
+
+
+def ensure_image_locations(*names):
+ with db_transaction():
+ locations = ImageStorageLocation.select().where(ImageStorageLocation.name << names)
+
+ insert_names = list(names)
+
+ for location in locations:
+ insert_names.remove(location.name)
+
+ if not insert_names:
+ return
+
+ data = [{'name': name} for name in insert_names]
+ ImageStorageLocation.insert_many(data).execute()
diff --git a/endpoints/api/suconfig.py b/endpoints/api/suconfig.py
index aaea5a309..5a5cd9346 100644
--- a/endpoints/api/suconfig.py
+++ b/endpoints/api/suconfig.py
@@ -16,6 +16,7 @@ from auth.permissions import SuperUserPermission
from auth.auth_context import get_authenticated_user
from data.database import User
from util.config.configutil import add_enterprise_config_defaults
+from util.config.database import sync_database_with_config
from util.config.validator import validate_service_for_config, CONFIG_FILENAMES
from data.runmigration import run_alembic_migration
from data.users import get_federated_service_name
@@ -216,6 +217,9 @@ class SuperUserConfig(ApiResource):
current_user = get_authenticated_user()
model.user.confirm_attached_federated_login(current_user, service_name)
+ # Ensure database is up-to-date with config
+ sync_database_with_config(config_object)
+
return {
'exists': True,
'config': config_object
@@ -373,4 +377,4 @@ class SuperUserConfigValidate(ApiResource):
config = request.get_json()['config']
return validate_service_for_config(service, config, request.get_json().get('password', ''))
- abort(403)
\ No newline at end of file
+ abort(403)
diff --git a/release.py b/release.py
index 91a46f796..a0439d9a8 100644
--- a/release.py
+++ b/release.py
@@ -12,15 +12,3 @@ REGION = os.environ.get('QUAY_REGION')
if os.path.isfile(_GIT_HEAD_PATH):
with open(_GIT_HEAD_PATH) as f:
GIT_HEAD = f.read().strip()
-
-
-def main():
- from app import app
- from data.model.release import set_region_release
-
- if REGION and GIT_HEAD:
- set_region_release(SERVICE, REGION, GIT_HEAD)
-
-
-if __name__ == '__main__':
- main()
diff --git a/static/css/pages/superuser.css b/static/css/pages/superuser.css
index 380e31879..d1eae17bd 100644
--- a/static/css/pages/superuser.css
+++ b/static/css/pages/superuser.css
@@ -22,4 +22,30 @@
.super-user .user-row.disabled .avatar {
-webkit-filter: grayscale(100%);
-}
\ No newline at end of file
+}
+
+.super-user td .co-alert {
+ margin: 16px 0 0 0;
+}
+
+.super-user .add-storage-link {
+ margin-top: 5px;
+}
+
+.super-user .storage-config {
+ border-bottom: 1px solid #eee;
+ padding: 0 0 10px 0;
+ margin: 10px 0 0 0;
+}
+
+.super-user .last {
+ border-bottom: none;
+}
+
+.super-user .feature-storage-replication {
+ margin: 15px 0 10px 0;
+}
+
+.super-user .input-util {
+ margin-top: 10px;
+}
diff --git a/static/css/quay.css b/static/css/quay.css
index bd8e3873a..143496034 100644
--- a/static/css/quay.css
+++ b/static/css/quay.css
@@ -1363,7 +1363,7 @@ i.toggle-icon:hover {
form input.ng-invalid.ng-dirty,
*[ng-form] input.ng-invalid.ng-dirty {
- background-color: #FDD7D9;
+ background-color: #FDD7D9 !important;
}
form input.ng-valid.ng-dirty,
diff --git a/static/directives/config/config-setup-tool.html b/static/directives/config/config-setup-tool.html
index 9edbffd75..a36d08416 100644
--- a/static/directives/config/config-setup-tool.html
+++ b/static/directives/config/config-setup-tool.html
@@ -194,54 +194,94 @@
A remote storage system is required for high-avaliability systems.
-
-
- Storage Engine: |
-
-
- |
-
+
+
+
+
+
If enabled, replicates storage to other regions.
+
-
-
- {{ field.title }}: |
-
-
-
-
-
-
-
-
- |
+
+
+
+
+ {{ field.title }}: |
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {{ field.help_text }}
+
+
+ See Documentation for more information
+
+ |
+
+
+
+
+
diff --git a/static/js/core-config-setup.js b/static/js/core-config-setup.js
index e3d5c3483..13b49b865 100644
--- a/static/js/core-config-setup.js
+++ b/static/js/core-config-setup.js
@@ -185,12 +185,17 @@ angular.module("core-config-setup", ['angularFileUpload'])
$scope.checkValidateAndSave = function() {
if ($scope.configform.$valid) {
+ saveStorageConfig();
$scope.validateAndSave();
return;
}
- $element.find("input.ng-invalid:first")[0].scrollIntoView();
- $element.find("input.ng-invalid:first").focus();
+ var query = $element.find("input.ng-invalid:first");
+
+ if (query && query.length) {
+ query[0].scrollIntoView();
+ query.focus();
+ }
};
$scope.validateAndSave = function() {
@@ -277,6 +282,99 @@ angular.module("core-config-setup", ['angularFileUpload'])
}, ApiService.errorDisplay('Could not save configuration. Please report this error.'));
};
+ // Convert storage config to an array
+ var initializeStorageConfig = function($scope) {
+ var config = $scope.config.DISTRIBUTED_STORAGE_CONFIG || {};
+ var defaultLocations = $scope.config.DISTRIBUTED_STORAGE_DEFAULT_LOCATIONS || [];
+ var preference = $scope.config.DISTRIBUTED_STORAGE_PREFERENCE || [];
+
+ $scope.serverStorageConfig = angular.copy(config);
+ $scope.storageConfig = [];
+
+ Object.keys(config).forEach(function(location) {
+ $scope.storageConfig.push({
+ location: location,
+ defaultLocation: defaultLocations.indexOf(location) >= 0,
+ data: angular.copy(config[location]),
+ error: {},
+ });
+ });
+
+ if (!$scope.storageConfig.length) {
+ $scope.addStorageConfig('default');
+ return;
+ }
+
+ // match DISTRIBUTED_STORAGE_PREFERENCE order first, remaining are
+ // ordered by unicode point value
+ $scope.storageConfig.sort(function(a, b) {
+ var indexA = preference.indexOf(a.location);
+ var indexB = preference.indexOf(b.location);
+
+ if (indexA > -1 && indexB > -1) return indexA < indexB ? -1 : 1;
+ if (indexA > -1) return -1;
+ if (indexB > -1) return 1;
+
+ return a.location < b.location ? -1 : 1;
+ });
+ };
+
+ $scope.allowChangeLocationStorageConfig = function(location) {
+ if (!$scope.serverStorageConfig[location]) { return true };
+
+ // allow user to change location ID if another exists with the same ID
+ return $scope.storageConfig.filter(function(sc) {
+ return sc.location === location;
+ }).length >= 2;
+ };
+
+ $scope.allowRemoveStorageConfig = function(location) {
+ return $scope.storageConfig.length > 1 && $scope.allowChangeLocationStorageConfig(location);
+ };
+
+ $scope.canAddStorageConfig = function() {
+ return $scope.config &&
+ $scope.config.FEATURE_STORAGE_REPLICATION &&
+ $scope.storageConfig &&
+ (!$scope.storageConfig.length || $scope.storageConfig.length < 10);
+ };
+
+ $scope.addStorageConfig = function(location) {
+ var storageType = 'LocalStorage';
+
+ // Use last storage type by default
+ if ($scope.storageConfig.length) {
+ storageType = $scope.storageConfig[$scope.storageConfig.length-1].data[0];
+ }
+
+ $scope.storageConfig.push({
+ location: location || '',
+ defaultLocation: false,
+ data: [storageType, {}],
+ error: {},
+ });
+ };
+
+ $scope.removeStorageConfig = function(sc) {
+ $scope.storageConfig.splice($scope.storageConfig.indexOf(sc), 1);
+ };
+
+ var saveStorageConfig = function() {
+ var config = {};
+ var defaultLocations = [];
+ var preference = [];
+
+ $scope.storageConfig.forEach(function(sc) {
+ config[sc.location] = sc.data;
+ if (sc.defaultLocation) defaultLocations.push(sc.location);
+ preference.push(sc.location);
+ });
+
+ $scope.config.DISTRIBUTED_STORAGE_CONFIG = config;
+ $scope.config.DISTRIBUTED_STORAGE_DEFAULT_LOCATIONS = defaultLocations;
+ $scope.config.DISTRIBUTED_STORAGE_PREFERENCE = preference;
+ };
+
var gitlabSelector = function(key) {
return function(value) {
if (!value || !$scope.config) { return; }
@@ -378,18 +476,11 @@ angular.module("core-config-setup", ['angularFileUpload'])
$scope.$watch('mapped.redis.port', redisSetter('port'));
$scope.$watch('mapped.redis.password', redisSetter('password'));
- // Add a watch to remove any fields not allowed by the current storage configuration.
- // We have to do this otherwise extra fields (which are not allowed) can end up in the
- // configuration.
- $scope.$watch('config.DISTRIBUTED_STORAGE_CONFIG.local[0]', function(value) {
- // Remove any fields not associated with the current kind.
- if (!value || !$scope.STORAGE_CONFIG_FIELDS[value]
- || !$scope.config.DISTRIBUTED_STORAGE_CONFIG
- || !$scope.config.DISTRIBUTED_STORAGE_CONFIG.local
- || !$scope.config.DISTRIBUTED_STORAGE_CONFIG.local[1]) { return; }
-
- var allowedFields = $scope.STORAGE_CONFIG_FIELDS[value];
- var configObject = $scope.config.DISTRIBUTED_STORAGE_CONFIG.local[1];
+ // Remove extra extra fields (which are not allowed) from storage config.
+ var updateFields = function(sc) {
+ var type = sc.data[0];
+ var configObject = sc.data[1];
+ var allowedFields = $scope.STORAGE_CONFIG_FIELDS[type];
// Remove any fields not allowed.
for (var fieldName in configObject) {
@@ -412,8 +503,53 @@ angular.module("core-config-setup", ['angularFileUpload'])
configObject[allowedFields[i].name] = configObject[allowedFields[i].name] || false;
}
}
+ };
+
+ // Validate and update storage config on update.
+ var refreshStorageConfig = function() {
+ if (!$scope.config || !$scope.storageConfig) return;
+
+ var locationCounts = {};
+ var errors = [];
+ var valid = true;
+
+ $scope.storageConfig.forEach(function(sc) {
+ // remove extra fields from storage config
+ updateFields(sc);
+
+ if (!locationCounts[sc.location]) locationCounts[sc.location] = 0;
+ locationCounts[sc.location]++;
+ });
+
+ // validate storage config
+ $scope.storageConfig.forEach(function(sc) {
+ var error = {};
+
+ if ($scope.config.FEATURE_STORAGE_REPLICATION && sc.data[0] === 'LocalStorage') {
+ error.engine = 'Replication to a locally mounted directory is unsupported as it is only accessible on a single machine.';
+ valid = false;
+ }
+
+ if (locationCounts[sc.location] > 1) {
+ error.location = 'Location ID must be unique.';
+ valid = false;
+ }
+
+ errors.push(error);
+ });
+
+ $scope.storageConfigError = errors;
+ $scope.configform.$setValidity('storageConfig', valid);
+ };
+
+ $scope.$watch('config.FEATURE_STORAGE_REPLICATION', function() {
+ refreshStorageConfig();
});
+ $scope.$watch('storageConfig', function() {
+ refreshStorageConfig();
+ }, true);
+
$scope.$watch('config', function(value) {
$scope.mapped['$hasChanges'] = true;
}, true);
@@ -424,6 +560,7 @@ angular.module("core-config-setup", ['angularFileUpload'])
ApiService.scGetConfig().then(function(resp) {
$scope.config = resp['config'] || {};
initializeMappedLogic($scope.config);
+ initializeStorageConfig($scope);
$scope.mapped['$hasChanges'] = false;
}, ApiService.errorDisplay('Could not load config'));
});
@@ -919,4 +1056,4 @@ angular.module("core-config-setup", ['angularFileUpload'])
}
};
return directiveDefinitionObject;
- });
\ No newline at end of file
+ });
diff --git a/storage/distributedstorage.py b/storage/distributedstorage.py
index 49f32c559..55afec293 100644
--- a/storage/distributedstorage.py
+++ b/storage/distributedstorage.py
@@ -16,6 +16,7 @@ def _location_aware(unbound_func):
for preferred in self.preferred_locations:
if preferred in locations:
storage = self._storages[preferred]
+ break
if not storage:
storage = self._storages[random.sample(locations, 1)[0]]
@@ -26,10 +27,10 @@ def _location_aware(unbound_func):
class DistributedStorage(StoragePaths):
- def __init__(self, storages, preferred_locations=[], default_locations=[]):
+ def __init__(self, storages, preferred_locations=None, default_locations=None):
self._storages = dict(storages)
- self.preferred_locations = list(preferred_locations)
- self.default_locations = list(default_locations)
+ self.preferred_locations = list(preferred_locations or [])
+ self.default_locations = list(default_locations or [])
@property
def locations(self):
diff --git a/util/config/configutil.py b/util/config/configutil.py
index 2a007a794..5b9bc1dcb 100644
--- a/util/config/configutil.py
+++ b/util/config/configutil.py
@@ -32,15 +32,15 @@ def add_enterprise_config_defaults(config_obj, current_secret_key, hostname):
# Default storage configuration.
if not 'DISTRIBUTED_STORAGE_CONFIG' in config_obj:
- config_obj['DISTRIBUTED_STORAGE_PREFERENCE'] = ['local']
+ config_obj['DISTRIBUTED_STORAGE_PREFERENCE'] = ['default']
config_obj['DISTRIBUTED_STORAGE_CONFIG'] = {
- 'local': ['LocalStorage', {'storage_path': '/datastorage/registry'}]
+ 'default': ['LocalStorage', {'storage_path': '/datastorage/registry'}]
}
- config_obj['USERFILES_LOCATION'] = 'local'
+ config_obj['USERFILES_LOCATION'] = 'default'
config_obj['USERFILES_PATH'] = 'userfiles/'
- config_obj['LOG_ARCHIVE_LOCATION'] = 'local'
+ config_obj['LOG_ARCHIVE_LOCATION'] = 'default'
if not 'SERVER_HOSTNAME' in config_obj:
config_obj['SERVER_HOSTNAME'] = hostname
diff --git a/util/config/database.py b/util/config/database.py
new file mode 100644
index 000000000..ea04dc5dc
--- /dev/null
+++ b/util/config/database.py
@@ -0,0 +1,9 @@
+from data import model
+
+
+def sync_database_with_config(config):
+ """ This ensures all implicitly required reference table entries exist in the database. """
+
+ location_names = config.get('DISTRIBUTED_STORAGE_CONFIG', {}).keys()
+ if location_names:
+ model.image.ensure_image_locations(*location_names)
diff --git a/util/config/validator.py b/util/config/validator.py
index d4dc1cd39..1e8846646 100644
--- a/util/config/validator.py
+++ b/util/config/validator.py
@@ -30,12 +30,18 @@ JWT_FILENAMES = ['jwt-authn.cert']
CONFIG_FILENAMES = SSL_FILENAMES + DB_SSL_FILENAMES + JWT_FILENAMES
-def get_storage_provider(config):
- parameters = config.get('DISTRIBUTED_STORAGE_CONFIG', {}).get('local', ['LocalStorage', {}])
+def get_storage_providers(config):
+ storage_config = config.get('DISTRIBUTED_STORAGE_CONFIG', {})
+
+ drivers = {}
+
try:
- return get_storage_driver(parameters)
+ for name, parameters in storage_config.items():
+ drivers[name] = (parameters[0], get_storage_driver(parameters))
except TypeError:
- raise Exception('Missing required storage configuration parameter(s)')
+ raise Exception('Missing required storage configuration parameter(s): %s' % name)
+
+ return drivers
def validate_service_for_config(service, config, password=None):
""" Attempts to validate the configuration for the given service. """
@@ -80,20 +86,29 @@ def _validate_redis(config, _):
def _validate_registry_storage(config, _):
""" Validates registry storage. """
- driver = get_storage_provider(config)
+ replication_enabled = config.get('FEATURE_STORAGE_REPLICATION', False)
- # Run custom validation on the driver.
- driver.validate(app.config['HTTPCLIENT'])
+ providers = get_storage_providers(config).items()
- # Put and remove a temporary file to make sure the normal storage paths work.
- driver.put_content('_verify', 'testing 123')
- driver.remove('_verify')
+ if not providers:
+ raise Exception('Storage configuration required')
- # Run setup on the driver if the read/write succeeded.
- try:
- driver.setup()
- except Exception as ex:
- raise Exception('Could not prepare storage: %s' % str(ex))
+ for name, (storage_type, driver) in providers:
+ try:
+ if replication_enabled and storage_type == 'LocalStorage':
+ raise Exception('Locally mounted directory not supported with storage replication')
+
+ # Run custom validation on the driver.
+ driver.validate(app.config['HTTPCLIENT'])
+
+ # Put and remove a temporary file to make sure the normal storage paths work.
+ driver.put_content('_verify', 'testing 123')
+ driver.remove('_verify')
+
+ # Run setup on the driver if the read/write succeeded.
+ driver.setup()
+ except Exception as ex:
+ raise Exception('Invalid storage configuration: %s: %s' % (name, str(ex)))
def _validate_mailing(config, _):
From dc24e8b1a1b7f55d1a5123879c6dcc1d2d326c37 Mon Sep 17 00:00:00 2001
From: Jake Moshenko
Date: Mon, 9 Nov 2015 20:51:38 -0500
Subject: [PATCH 15/25] Backfill by allocating and selecting ids in random
blocks
Fixes #826
---
requirements-nover.txt | 1 +
requirements.txt | 1 +
test/test_backfill_allocator.py | 132 +++++++++++++++++
util/migrate/__init__.py | 54 -------
util/migrate/allocator.py | 156 +++++++++++++++++++++
util/migrate/backfill_content_checksums.py | 11 +-
util/migrate/backfill_parent_id.py | 48 ++++++-
util/migrate/backfill_v1_checksums.py | 14 +-
8 files changed, 347 insertions(+), 70 deletions(-)
create mode 100644 test/test_backfill_allocator.py
create mode 100644 util/migrate/allocator.py
diff --git a/requirements-nover.txt b/requirements-nover.txt
index 0ebe5534d..e55df79f7 100644
--- a/requirements-nover.txt
+++ b/requirements-nover.txt
@@ -56,3 +56,4 @@ toposort
rfc3987
pyjwkest
jsonpath-rw
+bintrees
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index a6f4c28a3..1fe1e4248 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,6 +4,7 @@ APScheduler==3.0.3
autobahn==0.9.3-3
Babel==1.3
beautifulsoup4==4.4.0
+bintrees==2.0.2
blinker==1.3
boto==2.38.0
cachetools==1.0.3
diff --git a/test/test_backfill_allocator.py b/test/test_backfill_allocator.py
new file mode 100644
index 000000000..40000cdb5
--- /dev/null
+++ b/test/test_backfill_allocator.py
@@ -0,0 +1,132 @@
+import unittest
+import logging
+import random
+
+from datetime import datetime, timedelta
+from util.migrate.allocator import CompletedKeys, NoAvailableKeysError
+
+
+class CompletedTestCase(unittest.TestCase):
+ def test_merge_blocks_operations(self):
+ candidates = CompletedKeys(10)
+ candidates.mark_completed(1, 5)
+
+ self.assertTrue(candidates.is_available(5))
+ self.assertTrue(candidates.is_available(0))
+ self.assertFalse(candidates.is_available(1))
+ self.assertFalse(candidates.is_available(4))
+ self.assertFalse(candidates.is_available(11))
+ self.assertFalse(candidates.is_available(10))
+ self.assertEqual(1, len(candidates._slabs))
+
+ candidates.mark_completed(5, 6)
+ self.assertFalse(candidates.is_available(5))
+ self.assertTrue(candidates.is_available(6))
+ self.assertEqual(1, len(candidates._slabs))
+
+ candidates.mark_completed(3, 8)
+ self.assertTrue(candidates.is_available(9))
+ self.assertTrue(candidates.is_available(8))
+ self.assertFalse(candidates.is_available(7))
+ self.assertEqual(1, len(candidates._slabs))
+
+ def test_adjust_max(self):
+ candidates = CompletedKeys(10)
+ self.assertEqual(0, len(candidates._slabs))
+
+ self.assertTrue(candidates.is_available(9))
+ candidates.mark_completed(5, 12)
+ self.assertEqual(0, len(candidates._slabs))
+
+ self.assertFalse(candidates.is_available(9))
+ self.assertTrue(candidates.is_available(4))
+
+ def test_adjust_min(self):
+ candidates = CompletedKeys(10)
+ self.assertEqual(0, len(candidates._slabs))
+
+ self.assertTrue(candidates.is_available(2))
+ candidates.mark_completed(0, 3)
+ self.assertEqual(0, len(candidates._slabs))
+
+ self.assertFalse(candidates.is_available(2))
+ self.assertTrue(candidates.is_available(4))
+
+ def test_inside_block(self):
+ candidates = CompletedKeys(10)
+ candidates.mark_completed(1, 8)
+ self.assertEqual(1, len(candidates._slabs))
+
+ candidates.mark_completed(2, 5)
+ self.assertEqual(1, len(candidates._slabs))
+ self.assertFalse(candidates.is_available(1))
+ self.assertFalse(candidates.is_available(5))
+
+ def test_wrap_block(self):
+ candidates = CompletedKeys(10)
+ candidates.mark_completed(2, 5)
+ self.assertEqual(1, len(candidates._slabs))
+
+ candidates.mark_completed(1, 8)
+ self.assertEqual(1, len(candidates._slabs))
+ self.assertFalse(candidates.is_available(1))
+ self.assertFalse(candidates.is_available(5))
+
+ def test_non_contiguous(self):
+ candidates = CompletedKeys(10)
+
+ candidates.mark_completed(1, 5)
+ self.assertEqual(1, len(candidates._slabs))
+ self.assertTrue(candidates.is_available(5))
+ self.assertTrue(candidates.is_available(6))
+
+ candidates.mark_completed(6, 8)
+ self.assertEqual(2, len(candidates._slabs))
+ self.assertTrue(candidates.is_available(5))
+ self.assertFalse(candidates.is_available(6))
+
+ def test_big_merge(self):
+ candidates = CompletedKeys(10)
+
+ candidates.mark_completed(1, 5)
+ self.assertEqual(1, len(candidates._slabs))
+
+ candidates.mark_completed(6, 8)
+ self.assertEqual(2, len(candidates._slabs))
+
+ candidates.mark_completed(5, 6)
+ self.assertEqual(1, len(candidates._slabs))
+
+ def test_range_limits(self):
+ candidates = CompletedKeys(10)
+ self.assertFalse(candidates.is_available(-1))
+ self.assertFalse(candidates.is_available(10))
+
+ self.assertTrue(candidates.is_available(9))
+ self.assertTrue(candidates.is_available(0))
+
+ def test_random_saturation(self):
+ candidates = CompletedKeys(100)
+ with self.assertRaises(NoAvailableKeysError):
+ for _ in range(101):
+ start = candidates.get_block_start_index(10)
+ self.assertTrue(candidates.is_available(start))
+ candidates.mark_completed(start, start + 10)
+
+ def test_huge_dataset(self):
+ candidates = CompletedKeys(1024 * 1024)
+ start_time = datetime.now()
+ iterations = 0
+ with self.assertRaises(NoAvailableKeysError):
+ while (datetime.now() - start_time) < timedelta(seconds=10):
+ start = candidates.get_block_start_index(1024)
+ self.assertTrue(candidates.is_available(start))
+ candidates.mark_completed(start, start + random.randint(512, 1024))
+ iterations += 1
+
+ self.assertGreater(iterations, 1024)
+
+
+if __name__ == '__main__':
+ logging.basicConfig(level=logging.DEBUG)
+ unittest.main()
diff --git a/util/migrate/__init__.py b/util/migrate/__init__.py
index e1770742d..809bcaef8 100644
--- a/util/migrate/__init__.py
+++ b/util/migrate/__init__.py
@@ -2,7 +2,6 @@ import logging
from sqlalchemy.types import TypeDecorator, Text
from sqlalchemy.dialects.mysql import TEXT as MySQLText, LONGTEXT
-from random import shuffle
logger = logging.getLogger(__name__)
@@ -21,56 +20,3 @@ class UTF8LongText(TypeDecorator):
return dialect.type_descriptor(LONGTEXT(charset='utf8mb4', collation='utf8mb4_unicode_ci'))
else:
return dialect.type_descriptor(Text())
-
-
-def _chance_duplication(pop_size, samples):
- """ The chance of randomly selecting a duplicate when you choose the specified number of samples
- from the specified population size.
- """
- pairs = (samples * (samples - 1)) / 2.0
- unique = (pop_size - 1.0)/pop_size
- all_unique = pow(unique, pairs)
- return 1 - all_unique
-
-
-def _num_checks(pop_size, desired):
- """ Binary search for the proper number of entries to use to get the specified collision
- probability.
- """
- s_max = pop_size
- s_min = 0
- last_test = -1
- s_test = s_max
-
- while s_max > s_min and last_test != s_test:
- last_test = s_test
- s_test = (s_max + s_min)/2
- chance = _chance_duplication(pop_size, s_test)
- if chance > desired:
- s_max = s_test - 1
- else:
- s_min = s_test
-
- return s_test
-
-
-def yield_random_entries(batch_query, batch_size, collision_chance):
- """ This method will yield semi-random items from a query in a database friendly way until no
- more items match the base query modifier. It will pull batches of batch_size from the query
- and yield enough items from each batch so that concurrent workers have a reduced chance of
- selecting the same items. For example, if your batches return 10,000 entries, and you desire
- only a .03 collision_chance, we will only use 25 random entries before going back to the db
- for a new batch.
- """
-
- # Seed with some data which will pass the condition, but will be immediately discarded
- all_candidates = [1]
- while len(all_candidates) > 0:
- all_candidates = list(batch_query().limit(batch_size))
- shuffle(all_candidates)
- num_selections = max(1, _num_checks(len(all_candidates), collision_chance))
- logger.debug('Found %s/%s matching entries, processing %s', len(all_candidates), batch_size,
- num_selections)
- candidates = all_candidates[0:num_selections]
- for candidate in candidates:
- yield candidate
diff --git a/util/migrate/allocator.py b/util/migrate/allocator.py
new file mode 100644
index 000000000..999416ace
--- /dev/null
+++ b/util/migrate/allocator.py
@@ -0,0 +1,156 @@
+import logging
+import random
+
+from bintrees import RBTree
+from threading import Event
+
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
+class NoAvailableKeysError(ValueError):
+ pass
+
+
+class CompletedKeys(object):
+ def __init__(self, max_index):
+ self._max_index = max_index
+ self._min_index = 0
+ self._slabs = RBTree()
+
+ def _get_previous_or_none(self, index):
+ try:
+ return self._slabs.floor_item(index)
+ except KeyError:
+ return None
+
+ def is_available(self, index):
+ logger.debug('Testing index %s', index)
+ if index >= self._max_index or index < self._min_index:
+ logger.debug('Index out of range')
+ return False
+
+ try:
+ prev_start, prev_length = self._slabs.floor_item(index)
+ logger.debug('Prev range: %s-%s', prev_start, prev_start + prev_length)
+ return (prev_start + prev_length) <= index
+ except KeyError:
+ return True
+
+ def mark_completed(self, start_index, past_last_index):
+ logger.debug('Marking the range completed: %s-%s', start_index, past_last_index)
+ # Find the item directly before this and see if there is overlap
+ to_discard = set()
+ try:
+ prev_start, prev_length = self._slabs.floor_item(start_index)
+ if prev_start + prev_length >= start_index:
+ # we are going to merge with the range before us
+ logger.debug('Merging with the prev range: %s-%s', prev_start, prev_start + prev_length)
+ to_discard.add(prev_start)
+ start_index = prev_start
+ past_last_index = max(past_last_index, prev_start + prev_length)
+ except KeyError:
+ pass
+
+ # Find all keys between the start and last index and merge them into one block
+ for merge_start, merge_length in self._slabs.iter_items(start_index, past_last_index + 1):
+ candidate_next_index = merge_start + merge_length
+ logger.debug('Merging with block %s-%s', merge_start, candidate_next_index)
+ to_discard.add(merge_start)
+ past_last_index = max(past_last_index, candidate_next_index)
+
+ # write the new block which is fully merged
+ discard = False
+ if past_last_index >= self._max_index:
+ logger.debug('Discarding block and setting new max to: %s', start_index)
+ self._max_index = start_index
+ discard = True
+
+ if start_index <= self._min_index:
+ logger.debug('Discarding block and setting new min to: %s', past_last_index)
+ self._min_index = past_last_index
+ discard = True
+
+ if to_discard:
+ logger.debug('Discarding %s obsolte blocks', len(to_discard))
+ self._slabs.remove_items(to_discard)
+
+ if not discard:
+ logger.debug('Writing new block with range: %s-%s', start_index, past_last_index)
+ self._slabs.insert(start_index, past_last_index - start_index)
+
+ logger.debug('Total blocks: %s', len(self._slabs))
+
+ def get_block_start_index(self, block_size_estimate):
+ logger.debug('Total range: %s-%s', self._min_index, self._max_index)
+ if self._max_index <= self._min_index:
+ raise NoAvailableKeysError('All indexes have been marked completed')
+
+ num_holes = len(self._slabs) + 1
+ random_hole = random.randint(0, num_holes - 1)
+ logger.debug('Selected random hole %s with %s total holes', random_hole, num_holes)
+
+ hole_start = self._min_index
+ past_hole_end = self._max_index
+
+ # Now that we have picked a hole, we need to define the bounds
+ if random_hole > 0:
+ # There will be a slab before this hole, find where it ends
+ bound_entries = self._slabs.nsmallest(random_hole + 1)[-2:]
+ left_index, left_len = bound_entries[0]
+ logger.debug('Left range %s-%s', left_index, left_index + left_len)
+ hole_start = left_index + left_len
+
+ if len(bound_entries) > 1:
+ right_index, right_len = bound_entries[1]
+ logger.debug('Right range %s-%s', right_index, right_index + right_len)
+ past_hole_end, _ = bound_entries[1]
+ elif not self._slabs.is_empty():
+ right_index, right_len = self._slabs.nsmallest(1)[0]
+ logger.debug('Right range %s-%s', right_index, right_index + right_len)
+ past_hole_end, _ = self._slabs.nsmallest(1)[0]
+
+ # Now that we have our hole bounds, select a random block from [0:len - block_size_estimate]
+ logger.debug('Selecting from hole range: %s-%s', hole_start, past_hole_end)
+ rand_max_bound = max(hole_start, past_hole_end - block_size_estimate)
+ logger.debug('Rand max bound: %s', rand_max_bound)
+ return random.randint(hole_start, rand_max_bound)
+
+
+def yield_random_entries(batch_query, primary_key_field, batch_size, max_id):
+ """ This method will yield items from random blocks in the database. We will track metadata
+ about which keys are available for work, and we will complete the backfill when there is no
+ more work to be done. The method yields tupes of (candidate, Event), and if the work was
+ already done by another worker, the caller should set the event. Batch candidates must have
+ an "id" field which can be inspected.
+ """
+
+ allocator = CompletedKeys(max_id + 1)
+
+ try:
+ while True:
+ start_index = allocator.get_block_start_index(batch_size)
+ all_candidates = list(batch_query()
+ .limit(batch_size)
+ .where(primary_key_field >= start_index))
+
+ if len(all_candidates) == 0:
+ logger.debug('No candidates, new highest id: %s', start_index)
+ allocator.mark_completed(start_index, max_id)
+ continue
+
+ logger.debug('Found %s candidates, processing block')
+ for candidate in all_candidates:
+ abort_early = Event()
+ yield candidate, abort_early
+ if abort_early.is_set():
+ logger.debug('Overlap with another worker, aborting')
+ break
+
+ completed_through = candidate.id + 1
+ logger.debug('Marking id range as completed: %s-%s', start_index, completed_through)
+ allocator.mark_completed(start_index, completed_through)
+
+ except NoAvailableKeysError:
+ logger.debug('No more work')
diff --git a/util/migrate/backfill_content_checksums.py b/util/migrate/backfill_content_checksums.py
index 645b5539e..86770a35b 100644
--- a/util/migrate/backfill_content_checksums.py
+++ b/util/migrate/backfill_content_checksums.py
@@ -3,12 +3,12 @@ import logging
from peewee import JOIN_LEFT_OUTER
from peewee import (CharField, BigIntegerField, BooleanField, ForeignKeyField, DateTimeField,
- TextField)
+ TextField, fn)
from data.database import BaseModel, db, db_for_update, CloseForLongOperation
from app import app, storage
from digest import checksums
-from util.migrate import yield_random_entries
+from util.migrate.allocator import yield_random_entries
logger = logging.getLogger(__name__)
@@ -76,7 +76,9 @@ def backfill_content_checksums():
.select(ImageStorage.id, ImageStorage.uuid)
.where(ImageStorage.content_checksum >> None, ImageStorage.uploading == False))
- for candidate_storage in yield_random_entries(batch_query, 10000, 0.1):
+ max_id = ImageStorage.select(fn.Max(ImageStorage.id)).scalar()
+
+ for candidate_storage, abort in yield_random_entries(batch_query, ImageStorage.id, 1000, max_id):
logger.debug('Computing content checksum for storage: %s', candidate_storage.uuid)
locations = _get_image_storage_locations(candidate_storage.id)
@@ -97,12 +99,13 @@ def backfill_content_checksums():
to_update = db_for_update(ImageStorage.get(ImageStorage.id == candidate_storage.id))
if to_update.content_checksum is not None:
logger.info('Another worker filled in the checksum: %s', candidate_storage.uuid)
+ abort.set()
else:
logger.debug('Setting content checksum to %s for %s', checksum, candidate_storage.uuid)
to_update.content_checksum = checksum
to_update.save()
-if __name__ == "__main__":
+if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)
# logging.getLogger('peewee').setLevel(logging.CRITICAL)
backfill_content_checksums()
diff --git a/util/migrate/backfill_parent_id.py b/util/migrate/backfill_parent_id.py
index 1d2cf4136..fc2db3bff 100644
--- a/util/migrate/backfill_parent_id.py
+++ b/util/migrate/backfill_parent_id.py
@@ -1,8 +1,41 @@
import logging
-from data.database import Image, ImageStorage, db, db_for_update
+from data.database import BaseModel, db, db_for_update
+from peewee import (fn, CharField, BigIntegerField, ForeignKeyField, BooleanField, DateTimeField,
+ TextField, IntegerField)
from app import app
-from util.migrate import yield_random_entries
+from util.migrate.allocator import yield_random_entries
+
+class Repository(BaseModel):
+ pass
+
+
+# Vendor the information from tables we will be writing to at the time of this migration
+class ImageStorage(BaseModel):
+ uuid = CharField(index=True, unique=True)
+ checksum = CharField(null=True)
+ image_size = BigIntegerField(null=True)
+ uncompressed_size = BigIntegerField(null=True)
+ uploading = BooleanField(default=True, null=True)
+ cas_path = BooleanField(default=True)
+ content_checksum = CharField(null=True, index=True)
+
+
+class Image(BaseModel):
+ docker_image_id = CharField(index=True)
+ repository = ForeignKeyField(Repository)
+ ancestors = CharField(index=True, default='/', max_length=64535, null=True)
+ storage = ForeignKeyField(ImageStorage, index=True, null=True)
+ created = DateTimeField(null=True)
+ comment = TextField(null=True)
+ command = TextField(null=True)
+ aggregate_size = BigIntegerField(null=True)
+ v1_json_metadata = TextField(null=True)
+ v1_checksum = CharField(null=True)
+
+ security_indexed = BooleanField(default=False)
+ security_indexed_engine = IntegerField(default=-1)
+ parent_id = IntegerField(index=True, null=True)
logger = logging.getLogger(__name__)
@@ -21,20 +54,23 @@ def backfill_parent_id():
.where(Image.parent_id >> None, Image.ancestors != '/',
ImageStorage.uploading == False))
- for to_backfill in yield_random_entries(fetch_batch, 10000, 0.3):
+ max_id = Image.select(fn.Max(Image.id)).scalar()
+
+ for to_backfill, abort in yield_random_entries(fetch_batch, Image.id, 1000, max_id):
with app.config['DB_TRANSACTION_FACTORY'](db):
try:
image = db_for_update(Image
.select()
- .where(Image.id == to_backfill.id)).get()
+ .where(Image.id == to_backfill.id, Image.parent_id >> None)).get()
image.parent_id = int(to_backfill.ancestors.split('/')[-2])
image.save()
except Image.DoesNotExist:
- pass
+ logger.info('Collision with another worker, aborting batch')
+ abort.set()
logger.debug('backfill_parent_id: Completed')
-if __name__ == "__main__":
+if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('peewee').setLevel(logging.CRITICAL)
diff --git a/util/migrate/backfill_v1_checksums.py b/util/migrate/backfill_v1_checksums.py
index aef7af72e..e9516cf91 100644
--- a/util/migrate/backfill_v1_checksums.py
+++ b/util/migrate/backfill_v1_checksums.py
@@ -1,9 +1,9 @@
import logging
from peewee import (CharField, BigIntegerField, BooleanField, ForeignKeyField, DateTimeField,
- TextField)
+ TextField, fn)
from data.database import BaseModel, db, db_for_update
-from util.migrate import yield_random_entries
+from util.migrate.allocator import yield_random_entries
from app import app
@@ -48,20 +48,22 @@ def backfill_checksums():
.where(Image.v1_checksum >> None, ImageStorage.uploading == False,
~(ImageStorage.checksum >> None)))
- for candidate_image in yield_random_entries(batch_query, 10000, 0.1):
- logger.debug('Computing content checksum for storage: %s', candidate_image.id)
+ max_id = Image.select(fn.Max(Image.id)).scalar()
+ for candidate_image, abort in yield_random_entries(batch_query, Image.id, 1000, max_id):
with app.config['DB_TRANSACTION_FACTORY'](db):
try:
image = db_for_update(Image
.select(Image, ImageStorage)
.join(ImageStorage)
- .where(Image.id == candidate_image.id)).get()
+ .where(Image.id == candidate_image.id,
+ Image.v1_checksum >> None)).get()
image.v1_checksum = image.storage.checksum
image.save()
except Image.DoesNotExist:
- pass
+ logger.info('Collision with another worker, aborting batch')
+ abort.set()
if __name__ == "__main__":
From 5fc7884e938cbe356ad2ad810add60565376e529 Mon Sep 17 00:00:00 2001
From: Jake Moshenko
Date: Mon, 9 Nov 2015 20:53:25 -0500
Subject: [PATCH 16/25] Print the time when logging from migrations
---
alembic.ini | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/alembic.ini b/alembic.ini
index 91fb72bc0..fb3473286 100644
--- a/alembic.ini
+++ b/alembic.ini
@@ -64,5 +64,5 @@ level = NOTSET
formatter = generic
[formatter_generic]
-format = %(levelname)-5.5s [%(name)s] %(message)s
+format = %(asctime)s %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S
From a33077b9785351d23844a0b9b6ced619d502b662 Mon Sep 17 00:00:00 2001
From: Jake Moshenko
Date: Tue, 10 Nov 2015 11:10:09 -0500
Subject: [PATCH 17/25] Optimistically update backfill items, reducing RTs
---
util/migrate/backfill_content_checksums.py | 36 +++++++++++++---------
util/migrate/backfill_parent_id.py | 31 +++++++++++--------
util/migrate/backfill_v1_checksums.py | 35 +++++++++++----------
3 files changed, 59 insertions(+), 43 deletions(-)
diff --git a/util/migrate/backfill_content_checksums.py b/util/migrate/backfill_content_checksums.py
index 86770a35b..2b2c0cdca 100644
--- a/util/migrate/backfill_content_checksums.py
+++ b/util/migrate/backfill_content_checksums.py
@@ -5,12 +5,15 @@ from peewee import JOIN_LEFT_OUTER
from peewee import (CharField, BigIntegerField, BooleanField, ForeignKeyField, DateTimeField,
TextField, fn)
-from data.database import BaseModel, db, db_for_update, CloseForLongOperation
+from data.database import BaseModel, CloseForLongOperation
from app import app, storage
from digest import checksums
from util.migrate.allocator import yield_random_entries
+BATCH_SIZE = 1000
+
+
logger = logging.getLogger(__name__)
@@ -69,7 +72,8 @@ def _get_image_storage_locations(storage_id):
def backfill_content_checksums():
""" Copies metadata from image storages to their images. """
- logger.debug('Image content checksum backfill: Began execution')
+ logger.debug('Began execution')
+ logger.debug('This may be a long operation!')
def batch_query():
return (ImageStorage
@@ -78,9 +82,9 @@ def backfill_content_checksums():
max_id = ImageStorage.select(fn.Max(ImageStorage.id)).scalar()
- for candidate_storage, abort in yield_random_entries(batch_query, ImageStorage.id, 1000, max_id):
- logger.debug('Computing content checksum for storage: %s', candidate_storage.uuid)
-
+ written = 0
+ for candidate_storage, abort in yield_random_entries(batch_query, ImageStorage.id, BATCH_SIZE,
+ max_id):
locations = _get_image_storage_locations(candidate_storage.id)
checksum = None
@@ -95,15 +99,19 @@ def backfill_content_checksums():
checksum = 'unknown:{0}'.format(exc.__class__.__name__)
# Now update the ImageStorage with the checksum
- with app.config['DB_TRANSACTION_FACTORY'](db):
- to_update = db_for_update(ImageStorage.get(ImageStorage.id == candidate_storage.id))
- if to_update.content_checksum is not None:
- logger.info('Another worker filled in the checksum: %s', candidate_storage.uuid)
- abort.set()
- else:
- logger.debug('Setting content checksum to %s for %s', checksum, candidate_storage.uuid)
- to_update.content_checksum = checksum
- to_update.save()
+ num_updated = (ImageStorage
+ .update(content_checksum=checksum)
+ .where(ImageStorage.id == candidate_storage.id,
+ ImageStorage.content_checksum >> None)).execute()
+ if num_updated == 0:
+ logger.info('Another worker filled in the checksum: %s', candidate_storage.uuid)
+ abort.set()
+
+ written += num_updated
+ if (written % BATCH_SIZE) == 0:
+ logger.debug('%s entries written', written)
+
+ logger.debug('Completed, %s entries written', written)
if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)
diff --git a/util/migrate/backfill_parent_id.py b/util/migrate/backfill_parent_id.py
index fc2db3bff..0c1621775 100644
--- a/util/migrate/backfill_parent_id.py
+++ b/util/migrate/backfill_parent_id.py
@@ -1,11 +1,15 @@
import logging
-from data.database import BaseModel, db, db_for_update
+from data.database import BaseModel
from peewee import (fn, CharField, BigIntegerField, ForeignKeyField, BooleanField, DateTimeField,
TextField, IntegerField)
from app import app
from util.migrate.allocator import yield_random_entries
+
+BATCH_SIZE = 1000
+
+
class Repository(BaseModel):
pass
@@ -56,19 +60,20 @@ def backfill_parent_id():
max_id = Image.select(fn.Max(Image.id)).scalar()
- for to_backfill, abort in yield_random_entries(fetch_batch, Image.id, 1000, max_id):
- with app.config['DB_TRANSACTION_FACTORY'](db):
- try:
- image = db_for_update(Image
- .select()
- .where(Image.id == to_backfill.id, Image.parent_id >> None)).get()
- image.parent_id = int(to_backfill.ancestors.split('/')[-2])
- image.save()
- except Image.DoesNotExist:
- logger.info('Collision with another worker, aborting batch')
- abort.set()
+ written = 0
+ for to_backfill, abort in yield_random_entries(fetch_batch, Image.id, BATCH_SIZE, max_id):
+ computed_parent = int(to_backfill.ancestors.split('/')[-2])
+ num_changed = (Image
+ .update(parent_id=computed_parent)
+ .where(Image.id == to_backfill.id, Image.parent_id >> None)).execute()
+ if num_changed == 0:
+ logger.info('Collision with another worker, aborting batch')
+ abort.set()
+ written += num_changed
+ if (written % BATCH_SIZE) == 0:
+ logger.debug('%s entries written', written)
- logger.debug('backfill_parent_id: Completed')
+ logger.debug('backfill_parent_id: Completed, updated %s entries', written)
if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)
diff --git a/util/migrate/backfill_v1_checksums.py b/util/migrate/backfill_v1_checksums.py
index e9516cf91..0c4e190ae 100644
--- a/util/migrate/backfill_v1_checksums.py
+++ b/util/migrate/backfill_v1_checksums.py
@@ -2,11 +2,14 @@ import logging
from peewee import (CharField, BigIntegerField, BooleanField, ForeignKeyField, DateTimeField,
TextField, fn)
-from data.database import BaseModel, db, db_for_update
+from data.database import BaseModel
from util.migrate.allocator import yield_random_entries
from app import app
+BATCH_SIZE = 1000
+
+
logger = logging.getLogger(__name__)
@@ -40,30 +43,30 @@ class Image(BaseModel):
def backfill_checksums():
""" Copies checksums from image storages to their images. """
- logger.debug('Image v1 checksum backfill: Began execution')
+ logger.debug('Began execution')
+ logger.debug('This may be a long operation!')
def batch_query():
return (Image
- .select(Image.id)
+ .select(Image, ImageStorage)
.join(ImageStorage)
.where(Image.v1_checksum >> None, ImageStorage.uploading == False,
~(ImageStorage.checksum >> None)))
max_id = Image.select(fn.Max(Image.id)).scalar()
- for candidate_image, abort in yield_random_entries(batch_query, Image.id, 1000, max_id):
- with app.config['DB_TRANSACTION_FACTORY'](db):
- try:
- image = db_for_update(Image
- .select(Image, ImageStorage)
- .join(ImageStorage)
- .where(Image.id == candidate_image.id,
- Image.v1_checksum >> None)).get()
+ written = 0
+ for candidate_image, abort in yield_random_entries(batch_query, Image.id, BATCH_SIZE, max_id):
+ num_changed = (Image
+ .update(v1_checksum=candidate_image.storage.checksum)
+ .where(Image.id == candidate_image.id, Image.v1_checksum >> None)).execute()
+ if num_changed == 0:
+ logger.info('Collision with another worker, aborting batch')
+ abort.set()
+ written += num_changed
+ if (written % BATCH_SIZE) == 0:
+ logger.debug('%s entries written', written)
- image.v1_checksum = image.storage.checksum
- image.save()
- except Image.DoesNotExist:
- logger.info('Collision with another worker, aborting batch')
- abort.set()
+ logger.debug('Completed, updated %s entries', written)
if __name__ == "__main__":
From 112bef8f8ca85bc2bb28dfa11743463d27d4051b Mon Sep 17 00:00:00 2001
From: Jimmy Zelinskie
Date: Tue, 10 Nov 2015 14:01:28 -0500
Subject: [PATCH 18/25] fix bug where v1 backfill never completed
---
util/migrate/backfill_v1_metadata.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/util/migrate/backfill_v1_metadata.py b/util/migrate/backfill_v1_metadata.py
index 28dda6827..e9de7e9a2 100644
--- a/util/migrate/backfill_v1_metadata.py
+++ b/util/migrate/backfill_v1_metadata.py
@@ -104,8 +104,8 @@ def backfill_v1_metadata():
try:
data = storage.get_content(repo_image.storage.locations, json_path)
except IOError:
- data = None
- logger.exception('failed to find v1 metadata, defaulting to None')
+ data = "{}"
+ logger.warning('failed to find v1 metadata, defaulting to {}')
repo_image.v1_json_metadata = data
repo_image.save()
except ImageStoragePlacement.DoesNotExist:
From da72b6a38a24c64531fe7ad845d979c7a0a8139f Mon Sep 17 00:00:00 2001
From: Jimmy Zelinskie
Date: Tue, 10 Nov 2015 14:02:54 -0500
Subject: [PATCH 19/25] cut 1.13.3
---
CHANGELOG.md | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 397831439..588fdbb30 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+### v1.13.3
+
+- Fixed backfill for migration (#846)
+
### v1.13.2
- Fixed 404 API calls redirecting to 404 page (#762)
From 1c6919dd9377e0f6b00c97eab58bcba25e6fad0e Mon Sep 17 00:00:00 2001
From: Jake Moshenko
Date: Tue, 10 Nov 2015 13:33:03 -0500
Subject: [PATCH 20/25] We must fill in the parent_id on linking
---
data/model/image.py | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/data/model/image.py b/data/model/image.py
index 9001dd13a..c607e8823 100644
--- a/data/model/image.py
+++ b/data/model/image.py
@@ -196,14 +196,21 @@ def _find_or_link_image(existing_image, repo_obj, username, translations, prefer
copied_storage.locations = {placement.location.name
for placement in copied_storage.imagestorageplacement_set}
+ translated_parent_id = None
+ if new_image_ancestry != '/':
+ translated_parent_id = int(new_image_ancestry.split('/')[-2])
+
new_image = Image.create(docker_image_id=existing_image.docker_image_id,
- repository=repo_obj, storage=copied_storage,
+ repository=repo_obj,
+ storage=copied_storage,
ancestors=new_image_ancestry,
command=existing_image.command,
created=existing_image.created,
comment=existing_image.comment,
v1_json_metadata=existing_image.v1_json_metadata,
- aggregate_size=existing_image.aggregate_size)
+ aggregate_size=existing_image.aggregate_size,
+ parent_id=translated_parent_id,
+ v1_checksum=existing_image.v1_checksum)
logger.debug('Storing translation %s -> %s', existing_image.id, new_image.id)
From 888ec17538e990f329dfc04c94284fe74d5578ab Mon Sep 17 00:00:00 2001
From: Joseph Schorr
Date: Tue, 10 Nov 2015 15:41:19 -0500
Subject: [PATCH 21/25] Recover by email needs to allow anon access to its
endpoints
---
endpoints/api/user.py | 1 +
endpoints/web.py | 5 ++++-
2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/endpoints/api/user.py b/endpoints/api/user.py
index 0f21273b3..1801598f7 100644
--- a/endpoints/api/user.py
+++ b/endpoints/api/user.py
@@ -643,6 +643,7 @@ class Recovery(ApiResource):
}
@nickname('requestRecoveryEmail')
+ @anon_allowed
@validate_json_request('RequestRecovery')
def post(self):
""" Request a password recovery email."""
diff --git a/endpoints/web.py b/endpoints/web.py
index ac2bf4dce..0fa19ef9b 100644
--- a/endpoints/web.py
+++ b/endpoints/web.py
@@ -19,7 +19,7 @@ from util.invoice import renderInvoiceToPdf
from util.seo import render_snapshot
from util.cache import no_cache
from endpoints.common import common_login, render_page_template, route_show_if, param_required
-from endpoints.decorators import anon_protect
+from endpoints.decorators import anon_protect, anon_allowed
from endpoints.csrf import csrf_protect, generate_csrf_token, verify_csrf
from buildtrigger.customhandler import CustomBuildTrigger
@@ -366,6 +366,7 @@ def confirm_repo_email():
@web.route('/confirm', methods=['GET'])
@route_show_if(features.MAILING)
+@anon_allowed
def confirm_email():
code = request.values['code']
user = None
@@ -386,6 +387,8 @@ def confirm_email():
@web.route('/recovery', methods=['GET'])
+@route_show_if(features.MAILING)
+@anon_allowed
def confirm_recovery():
code = request.values['code']
user = model.user.validate_reset_code(code)
From a9d6aa4adc6a3c176b4d9a0dd6a6d660cbe48848 Mon Sep 17 00:00:00 2001
From: Matt Jibson
Date: Tue, 10 Nov 2015 16:03:37 -0500
Subject: [PATCH 22/25] Reduce z-index of landing to below search
Also remove unneeded second block.
fixes #844
---
static/css/pages/landing.css | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/static/css/pages/landing.css b/static/css/pages/landing.css
index 5689a967c..48b9d62ab 100644
--- a/static/css/pages/landing.css
+++ b/static/css/pages/landing.css
@@ -85,16 +85,13 @@
}
.landing-content {
- z-index: 4;
+ z-index: 1;
+ text-align: center;
padding-left: 20px;
padding-right: 20px;
position: relative;
}
-.landing-content {
- text-align: center;
-}
-
.landing-content .works-with img {
max-height: 28px;
margin-left: 30px;
From 941d13ea3e144ea5d4e2acf5dd5c1a65574cc209 Mon Sep 17 00:00:00 2001
From: Jake Moshenko
Date: Tue, 10 Nov 2015 16:14:44 -0500
Subject: [PATCH 23/25] Fix an off by one error in the common backfill code
---
test/test_backfill_allocator.py | 27 ++++++++++++++++++++++++++-
util/migrate/allocator.py | 12 ++++++------
2 files changed, 32 insertions(+), 7 deletions(-)
diff --git a/test/test_backfill_allocator.py b/test/test_backfill_allocator.py
index 40000cdb5..54258cb35 100644
--- a/test/test_backfill_allocator.py
+++ b/test/test_backfill_allocator.py
@@ -3,7 +3,7 @@ import logging
import random
from datetime import datetime, timedelta
-from util.migrate.allocator import CompletedKeys, NoAvailableKeysError
+from util.migrate.allocator import CompletedKeys, NoAvailableKeysError, yield_random_entries
class CompletedTestCase(unittest.TestCase):
@@ -127,6 +127,31 @@ class CompletedTestCase(unittest.TestCase):
self.assertGreater(iterations, 1024)
+class FakeQuery(object):
+ def __init__(self, result_list):
+ self._result_list = result_list
+
+ def limit(self, *args, **kwargs):
+ return self
+
+ def where(self, *args, **kwargs):
+ return self
+
+ def __iter__(self):
+ return self._result_list.__iter__()
+
+
+class QueryAllocatorTest(unittest.TestCase):
+ FAKE_PK_FIELD = 10 # Must be able to compare to integers
+
+ def test_no_work(self):
+ def create_empty_query():
+ return FakeQuery([])
+
+ for _ in yield_random_entries(create_empty_query, self.FAKE_PK_FIELD, 1, 10):
+ self.fail('There should never be any actual work!')
+
+
if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)
unittest.main()
diff --git a/util/migrate/allocator.py b/util/migrate/allocator.py
index 999416ace..64fea9ea4 100644
--- a/util/migrate/allocator.py
+++ b/util/migrate/allocator.py
@@ -136,21 +136,21 @@ def yield_random_entries(batch_query, primary_key_field, batch_size, max_id):
.where(primary_key_field >= start_index))
if len(all_candidates) == 0:
- logger.debug('No candidates, new highest id: %s', start_index)
- allocator.mark_completed(start_index, max_id)
+ logger.info('No candidates, new highest id: %s', start_index)
+ allocator.mark_completed(start_index, max_id + 1)
continue
- logger.debug('Found %s candidates, processing block')
+ logger.info('Found %s candidates, processing block')
for candidate in all_candidates:
abort_early = Event()
yield candidate, abort_early
if abort_early.is_set():
- logger.debug('Overlap with another worker, aborting')
+ logger.info('Overlap with another worker, aborting')
break
completed_through = candidate.id + 1
- logger.debug('Marking id range as completed: %s-%s', start_index, completed_through)
+ logger.info('Marking id range as completed: %s-%s', start_index, completed_through)
allocator.mark_completed(start_index, completed_through)
except NoAvailableKeysError:
- logger.debug('No more work')
+ logger.info('No more work')
From 83c98882bb29b39858d4729578ce56bcf65f225c Mon Sep 17 00:00:00 2001
From: Jake Moshenko
Date: Tue, 10 Nov 2015 19:49:00 -0500
Subject: [PATCH 24/25] Fix the backfill batch message to report the number
---
util/migrate/allocator.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/util/migrate/allocator.py b/util/migrate/allocator.py
index 64fea9ea4..6e531d251 100644
--- a/util/migrate/allocator.py
+++ b/util/migrate/allocator.py
@@ -140,7 +140,7 @@ def yield_random_entries(batch_query, primary_key_field, batch_size, max_id):
allocator.mark_completed(start_index, max_id + 1)
continue
- logger.info('Found %s candidates, processing block')
+ logger.info('Found %s candidates, processing block', len(all_candidates))
for candidate in all_candidates:
abort_early = Event()
yield candidate, abort_early
From 88bbf34993c83a935c21dd42dbb119cbad6e0209 Mon Sep 17 00:00:00 2001
From: Jake Moshenko
Date: Tue, 10 Nov 2015 19:49:23 -0500
Subject: [PATCH 25/25] Silence a lot of the useless logs for the checksum
backfill
---
util/migrate/backfill_content_checksums.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/util/migrate/backfill_content_checksums.py b/util/migrate/backfill_content_checksums.py
index 2b2c0cdca..c03bb13b6 100644
--- a/util/migrate/backfill_content_checksums.py
+++ b/util/migrate/backfill_content_checksums.py
@@ -115,5 +115,7 @@ def backfill_content_checksums():
if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)
- # logging.getLogger('peewee').setLevel(logging.CRITICAL)
+ logging.getLogger('peewee').setLevel(logging.WARNING)
+ logging.getLogger('boto').setLevel(logging.WARNING)
+ logging.getLogger('data.database').setLevel(logging.WARNING)
backfill_content_checksums()