From 2e7835c3726112217b5efa87a5768fff9fa22477 Mon Sep 17 00:00:00 2001
From: Joseph Schorr <josephschorr@users.noreply.github.com>
Date: Thu, 17 Dec 2015 12:29:44 -0500
Subject: [PATCH] Fix user deletion under MySQL

Fixes #973
---
 data/database.py | 95 +++++++++++++++++++++++++++++-------------------
 1 file changed, 58 insertions(+), 37 deletions(-)

diff --git a/data/database.py b/data/database.py
index eb7cf8ed3..6a7d6c102 100644
--- a/data/database.py
+++ b/data/database.py
@@ -41,6 +41,56 @@ def real_for_update(query):
 def null_for_update(query):
   return query
 
+def delete_instance_filtered(instance, model_class, delete_nullable, skip_transitive_deletes):
+  """ Deletes the DB instance recursively, skipping any models in the skip_transitive_deletes set.
+
+      Callers *must* ensure that any models listed in the skip_transitive_deletes must be capable
+      of being directly deleted when the instance is deleted (with automatic sorting handling
+      dependency order).
+
+      For example, the RepositoryTag and Image tables for Repository will always refer to the
+      *same* repository when RepositoryTag references Image, so we can safely skip
+      transitive deletion for the RepositoryTag table.
+  """
+  # We need to sort the ops so that models get cleaned in order of their dependencies
+  ops = reversed(list(instance.dependencies(delete_nullable)))
+  filtered_ops = []
+
+  dependencies = defaultdict(set)
+
+  for query, fk in ops:
+    # We only want to skip transitive deletes, which are done using subqueries in the form of
+    # DELETE FROM <table> in <subquery>. If an op is not using a subquery, we allow it to be
+    # applied directly.
+    if fk.model_class not in skip_transitive_deletes or query.op != 'in':
+      filtered_ops.append((query, fk))
+
+    if query.op == 'in':
+      dependencies[fk.model_class.__name__].add(query.rhs.model_class.__name__)
+    elif query.op == '=':
+      dependencies[fk.model_class.__name__].add(model_class.__name__)
+    else:
+      raise RuntimeError('Unknown operator in recursive repository delete query')
+
+  sorted_models = list(reversed(toposort.toposort_flatten(dependencies)))
+  def sorted_model_key(query_fk_tuple):
+    cmp_query, cmp_fk = query_fk_tuple
+    if cmp_query.op == 'in':
+      return -1
+    return sorted_models.index(cmp_fk.model_class.__name__)
+  filtered_ops.sort(key=sorted_model_key)
+
+  with db_transaction():
+    for query, fk in filtered_ops:
+      model = fk.model_class
+      if fk.null and not delete_nullable:
+        model.update(**{fk.name: None}).where(query).execute()
+      else:
+        model.delete().where(query).execute()
+
+    return instance.delete().where(instance._pk_expr()).execute()
+
+
 SCHEME_SPECIALIZED_FOR_UPDATE = {
   'sqlite': null_for_update,
 }
@@ -255,7 +305,13 @@ class User(BaseModel):
       # Delete the instance itself.
       super(User, self).delete_instance(recursive=False, delete_nullable=False)
     else:
-      super(User, self).delete_instance(recursive=recursive, delete_nullable=delete_nullable)
+      if not recursive:
+        raise RuntimeError('Non-recursive delete on user.')
+
+      # These models don't need to use transitive deletes, because the referenced objects
+      # are cleaned up directly
+      skip_transitive_deletes = {Image}
+      delete_instance_filtered(self, User, delete_nullable, skip_transitive_deletes)
 
 
 Namespace = User.alias()
@@ -352,43 +408,8 @@ class Repository(BaseModel):
     skip_transitive_deletes = {RepositoryTag, RepositoryBuild, RepositoryBuildTrigger, BlobUpload,
                                Image}
 
-    # We need to sort the ops so that models get cleaned in order of their dependencies
-    ops = reversed(list(self.dependencies(delete_nullable)))
-    filtered_ops = []
+    delete_instance_filtered(self, Repository, delete_nullable, skip_transitive_deletes)
 
-    dependencies = defaultdict(set)
-
-    for query, fk in ops:
-      # We only want to skip transitive deletes, which are done using subqueries in the form of
-      # DELETE FROM <table> in <subquery>. If an op is not using a subquery, we allow it to be
-      # applied directly.
-      if fk.model_class not in skip_transitive_deletes or query.op != 'in':
-        filtered_ops.append((query, fk))
-
-      if query.op == 'in':
-        dependencies[fk.model_class.__name__].add(query.rhs.model_class.__name__)
-      elif query.op == '=':
-        dependencies[fk.model_class.__name__].add(Repository.__name__)
-      else:
-        raise RuntimeError('Unknown operator in recursive repository delete query')
-
-    sorted_models = list(reversed(toposort.toposort_flatten(dependencies)))
-    def sorted_model_key(query_fk_tuple):
-      cmp_query, cmp_fk = query_fk_tuple
-      if cmp_query.op == 'in':
-        return -1
-      return sorted_models.index(cmp_fk.model_class.__name__)
-    filtered_ops.sort(key=sorted_model_key)
-
-    with db_transaction():
-      for query, fk in filtered_ops:
-        model = fk.model_class
-        if fk.null and not delete_nullable:
-          model.update(**{fk.name: None}).where(query).execute()
-        else:
-          model.delete().where(query).execute()
-
-      return self.delete().where(self._pk_expr()).execute()
 
 class Star(BaseModel):
   user = ForeignKeyField(User, index=True)