Make namespace deletion asynchronous

Instead of deleting a namespace synchronously as before, we now mark the namespace for deletion, disable it, and rename it. A worker then comes along and deletes the namespace in the background. This results in a *significantly* better user experience, as the namespace deletion operation now "completes" in under a second, where before it could take 10s of minutes at the worse.

Fixes https://jira.coreos.com/browse/QUAY-838
This commit is contained in:
Joseph Schorr 2018-02-23 16:45:16 -05:00
parent d9015a1863
commit 8bc55a5676
21 changed files with 244 additions and 129 deletions

View file

@ -470,13 +470,21 @@ class User(BaseModel):
RepositoryNotification, OAuthAuthorizationCode,
RepositoryActionCount, TagManifestLabel, Tag,
ManifestLabel, BlobUploading, TeamSync,
RepositorySearchScore} | beta_classes
RepositorySearchScore, DeletedNamespace} | beta_classes
delete_instance_filtered(self, User, delete_nullable, skip_transitive_deletes)
Namespace = User.alias()
class DeletedNamespace(BaseModel):
namespace = QuayUserField(index=True, allows_robots=False, unique=True)
marked = DateTimeField(default=datetime.now)
original_username = CharField(index=True)
original_email = CharField(index=True)
queue_id = CharField(null=True, index=True)
class UserPromptTypes(object):
CONFIRM_USERNAME = 'confirm_username'
ENTER_NAME = 'enter_name'

View file

@ -0,0 +1,39 @@
"""Add deleted namespace table
Revision ID: b4c2d45bc132
Revises: 152edccba18c
Create Date: 2018-02-27 11:43:02.329941
"""
# revision identifiers, used by Alembic.
revision = 'b4c2d45bc132'
down_revision = '152edccba18c'
from alembic import op
import sqlalchemy as sa
def upgrade(tables):
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('deletednamespace',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('namespace_id', sa.Integer(), nullable=False),
sa.Column('marked', sa.DateTime(), nullable=False),
sa.Column('original_username', sa.String(length=255), nullable=False),
sa.Column('original_email', sa.String(length=255), nullable=False),
sa.Column('queue_id', sa.String(length=255), nullable=True),
sa.ForeignKeyConstraint(['namespace_id'], ['user.id'], name=op.f('fk_deletednamespace_namespace_id_user')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_deletednamespace'))
)
op.create_index('deletednamespace_namespace_id', 'deletednamespace', ['namespace_id'], unique=True)
op.create_index('deletednamespace_original_email', 'deletednamespace', ['original_email'], unique=False)
op.create_index('deletednamespace_original_username', 'deletednamespace', ['original_username'], unique=False)
op.create_index('deletednamespace_queue_id', 'deletednamespace', ['queue_id'], unique=False)
# ### end Alembic commands ###
def downgrade(tables):
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('deletednamespace')
# ### end Alembic commands ###

View file

@ -4,9 +4,11 @@ import pytest
from mock import patch
from data.database import EmailConfirmation
from data.database import EmailConfirmation, User, DeletedNamespace
from data.model.user import create_user_noverify, validate_reset_code, get_active_users
from data.model.user import mark_namespace_for_deletion, delete_namespace_via_marker
from util.timedeltastring import convert_to_timedelta
from data.queue import WorkQueue
from test.fixtures import *
def test_create_user_with_expiration(initialized_db):
@ -38,3 +40,46 @@ def test_get_active_users(disabled, initialized_db):
for user in users:
if not disabled:
assert user.enabled
def test_mark_namespace_for_deletion(initialized_db):
def create_transaction(db):
return db.transaction()
# Create a user and then mark it for deletion.
user = create_user_noverify('foobar', 'foo@example.com', email_required=False)
# Mark the user for deletion.
queue = WorkQueue('testgcnamespace', create_transaction)
mark_namespace_for_deletion(user, [], queue)
# Ensure the older user is still in the DB.
assert User.get(id=user.id).username != 'foobar'
# Ensure we can create a user with the same namespace again.
new_user = create_user_noverify('foobar', 'foo@example.com', email_required=False)
assert new_user.id != user.id
# Ensure the older user is still in the DB.
assert User.get(id=user.id).username != 'foobar'
def test_delete_namespace_via_marker(initialized_db):
def create_transaction(db):
return db.transaction()
# Create a user and then mark it for deletion.
user = create_user_noverify('foobar', 'foo@example.com', email_required=False)
# Mark the user for deletion.
queue = WorkQueue('testgcnamespace', create_transaction)
marker_id = mark_namespace_for_deletion(user, [], queue)
# Delete the user.
delete_namespace_via_marker(marker_id, [])
# Ensure the user was actually deleted.
with pytest.raises(User.DoesNotExist):
User.get(id=user.id)
with pytest.raises(DeletedNamespace.DoesNotExist):
DeletedNamespace.get(id=marker_id)

View file

@ -14,7 +14,7 @@ from data.database import (User, LoginService, FederatedLogin, RepositoryPermiss
EmailConfirmation, Role, db_for_update, random_string_generator,
UserRegion, ImageStorageLocation,
ServiceKeyApproval, OAuthApplication, RepositoryBuildTrigger,
UserPromptKind, UserPrompt, UserPromptTypes)
UserPromptKind, UserPrompt, UserPromptTypes, DeletedNamespace)
from data.model import (DataModelException, InvalidPasswordException, InvalidRobotException,
InvalidUsernameException, InvalidEmailAddressException,
TooManyLoginAttemptsException, db_transaction,
@ -835,7 +835,14 @@ def get_solely_admined_organizations(user_obj):
return solely_admined
def delete_user(user, queues, force=False):
def mark_namespace_for_deletion(user, queues, namespace_gc_queue, force=False):
""" Marks a namespace (as referenced by the given user) for deletion. A queue item will be added
to delete the namespace's repositories and storage, while the namespace itself will be
renamed, disabled, and delinked from other tables.
"""
if not user.enabled:
return None
if not force and not user.organization:
# Ensure that the user is not the sole admin for any organizations. If so, then the user
# cannot be deleted before those organizations are deleted or reassigned.
@ -854,10 +861,66 @@ def delete_user(user, queues, force=False):
for queue in queues:
queue.delete_namespaced_items(user.username)
# Delete non-repository related items. This operation is very quick, so we can do so here.
_delete_user_linked_data(user)
with db_transaction():
original_username = user.username
user = db_for_update(User.select().where(User.id == user.id)).get()
# Mark the namespace as deleted and ready for GC.
try:
marker = DeletedNamespace.create(namespace=user,
original_username=original_username,
original_email=user.email)
except IntegrityError:
return
# Disable the namespace itself, and replace its various unique fields with UUIDs.
user.enabled = False
user.username = str(uuid4())
user.email = str(uuid4())
user.save()
# Add a queueitem to delete the namespace.
marker.queue_id = namespace_gc_queue.put([str(user.id)], json.dumps({
'marker_id': marker.id,
'original_username': original_username,
}))
marker.save()
return marker.id
def delete_namespace_via_marker(marker_id, queues):
""" Deletes a namespace referenced by the given DeletedNamespace marker ID. """
try:
marker = DeletedNamespace.get(id=marker_id)
except DeletedNamespace.DoesNotExist:
return
delete_user(marker.namespace, queues)
def delete_user(user, queues):
""" Deletes a user/organization/robot. Should *not* be called by any user-facing API. Instead,
mark_namespace_for_deletion should be used, and the queue should call this method.
"""
# Delete all queue items for the user.
for queue in queues:
queue.delete_namespaced_items(user.username)
# Delete any repositories under the user's namespace.
for repo in list(Repository.select().where(Repository.namespace_user == user)):
repository.purge_repository(user.username, repo.name)
# Delete non-repository related items.
_delete_user_linked_data(user)
# Delete the user itself.
user.delete_instance(recursive=True, delete_nullable=True)
def _delete_user_linked_data(user):
if user.organization:
# Delete the organization's teams.
for team in Team.select().where(Team.organization == user):
@ -879,9 +942,6 @@ def delete_user(user, queues, force=False):
# falling and only occurs if a superuser is being deleted.
ServiceKeyApproval.update(approver=None).where(ServiceKeyApproval.approver == user).execute()
# Delete the user itself.
user.delete_instance(recursive=True, delete_nullable=True)
def get_pull_credentials(robotname):
try: