From 0b5cd9569336727f120641ee724a1c57a6142f53 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Wed, 3 Aug 2016 15:03:02 -0400 Subject: [PATCH] Have repo deletion not lock all the things --- data/database.py | 30 +++++++++++++++++++++++++++++- test/test_api_usage.py | 11 +++++++---- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/data/database.py b/data/database.py index a866aa161..53a53b520 100644 --- a/data/database.py +++ b/data/database.py @@ -46,6 +46,32 @@ def real_for_update(query): def null_for_update(query): return query +def _clear_log_entries(instance, model_class, log_entry_field, *extra_fields): + """ Temporary method for clearing out LogEntry records before the general deletion of a + dependent record (e.g. Repository or User). Used because deleting all of the log entries + under a transaction destroys the database for all other work. + """ + # TODO: Remove this hack once LogEntry no longer has foreign key constraints to everything. + # Delete all records in LogEntry that reference the instance. + where_clause = (log_entry_field == instance) + for field in extra_fields: + where_clause = where_clause | (field == instance) + + # Delete the log entries in chunks of 1000. + deleted_count = 1 + while deleted_count >= 1: + todelete = (LogEntry + .select(LogEntry.id) + .where(where_clause) + .limit(1000) + .alias('todelete')) + + subquery = (LogEntry + .select(todelete.c.id) + .from_(todelete)) + + deleted_count = LogEntry.delete().where(LogEntry.id << subquery).execute() + def delete_instance_filtered(instance, model_class, delete_nullable, skip_transitive_deletes): """ Deletes the DB instance recursively, skipping any models in the skip_transitive_deletes set. @@ -88,6 +114,7 @@ def delete_instance_filtered(instance, model_class, delete_nullable, skip_transi with db_transaction(): for query, fk in filtered_ops: model = fk.model_class + if fk.null and not delete_nullable: model.update(**{fk.name: None}).where(query).execute() else: @@ -320,6 +347,7 @@ class User(BaseModel): # These models don't need to use transitive deletes, because the referenced objects # are cleaned up directly skip_transitive_deletes = {Image} + _clear_log_entries(self, User, LogEntry.performer, LogEntry.account) delete_instance_filtered(self, User, delete_nullable, skip_transitive_deletes) @@ -416,7 +444,7 @@ class Repository(BaseModel): # are cleaned up directly skip_transitive_deletes = {RepositoryTag, RepositoryBuild, RepositoryBuildTrigger, BlobUpload, Image, TagManifest, DerivedStorageForImage} - + _clear_log_entries(self, User, LogEntry.repository) delete_instance_filtered(self, Repository, delete_nullable, skip_transitive_deletes) diff --git a/test/test_api_usage.py b/test/test_api_usage.py index d0cf90589..842f07e5b 100644 --- a/test/test_api_usage.py +++ b/test/test_api_usage.py @@ -1654,14 +1654,17 @@ class log_queries(object): class check_transitive_deletes(log_queries): - def __init__(self): + def __init__(self, allowed_query_check=None): super(check_transitive_deletes, self).__init__(query_filter=r'^DELETE.+IN \(SELECT.+$') + self.allowed_query_check = allowed_query_check def __exit__(self, exc_type, exc_val, exc_tb): super(check_transitive_deletes, self).__exit__(exc_type, exc_val, exc_tb) queries = self.get_queries() - if queries: - raise Exception('Detected transitive deletion in queries: %s' % queries) + if queries and self.allowed_query_check: + for query in queries: + if query.find(self.allowed_query_check) < 0: + raise Exception('Detected transitive deletion in queries: %s' % queries) class TestDeleteRepository(ApiTestCase): @@ -1747,7 +1750,7 @@ class TestDeleteRepository(ApiTestCase): date=datetime.datetime.now() - datetime.timedelta(days=5), count=6) # Delete the repository. - with check_transitive_deletes(): + with check_transitive_deletes(allowed_query_check='todelete'): self.deleteResponse(Repository, params=dict(repository=self.COMPLEX_REPO)) # Verify the repo was deleted.