Merge pull request #2693 from coreos-inc/gc-query-opt

Optimize GC query join a bit by reducing the surface
This commit is contained in:
josephschorr 2017-06-12 13:37:55 -04:00 committed by GitHub
commit 4fd2047e30
2 changed files with 43 additions and 6 deletions

View file

@ -194,17 +194,21 @@ def garbage_collect_repo(repo, extra_candidate_set=None):
# directly referenced by a tag. This can be used in a subquery to directly
# find which candidates are being referenced without any client side
# computation or extra round trips.
direct_referenced = (RepositoryTag
.select(RepositoryTag.image)
.where(RepositoryTag.repository == repo.id,
RepositoryTag.image << candidates_orphans))
cloned = direct_referenced.clone().alias('direct_ref')
directly_referenced_subquery = Image.alias().select(cloned.c.image_id).from_(cloned)
ancestor_referenced = (Candidate
.select(Candidate.id)
.join(Tagged, on=ancestor_superset)
.join(RepositoryTag, on=(Tagged.id == RepositoryTag.image))
.where(RepositoryTag.repository == repo.id,
Candidate.id << candidates_orphans))
direct_referenced = (RepositoryTag
.select(RepositoryTag.image)
.where(RepositoryTag.repository == repo.id,
RepositoryTag.image << candidates_orphans))
Candidate.id << candidates_orphans,
~(Candidate.id << directly_referenced_subquery)))
referenced_candidates = (direct_referenced | ancestor_referenced)

View file

@ -314,38 +314,71 @@ class TestGarbageCollection(unittest.TestCase):
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
third=['t1', 't2', 't3'], fourth=['i1', 'f1'])
# Current state:
# latest -> i3->i2->i1
# other -> f2->f1->i1
# third -> t3->t2->t1
# fourth -> f1->i1
# Delete tag other. Should delete f2, since it is not shared.
self.deleteTag(repository, 'other')
self.assertDeleted(repository, 'f2')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1')
# Current state:
# latest -> i3->i2->i1
# third -> t3->t2->t1
# fourth -> f1->i1
# Move tag fourth to i3. This should remove f1 since it is no longer referenced.
self.moveTag(repository, 'fourth', 'i3')
self.assertDeleted(repository, 'f1')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3')
# Current state:
# latest -> i3->i2->i1
# third -> t3->t2->t1
# fourth -> i3->i2->i1
# Delete tag 'latest'. This should do nothing since fourth is on the same branch.
self.deleteTag(repository, 'latest')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3')
# Current state:
# third -> t3->t2->t1
# fourth -> i3->i2->i1
# Delete tag 'third'. This should remove t1->t3.
self.deleteTag(repository, 'third')
self.assertDeleted(repository, 't1', 't2', 't3')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3')
# Current state:
# fourth -> i3->i2->i1
# Add tag to i1.
self.moveTag(repository, 'newtag', 'i1')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3')
# Current state:
# fourth -> i3->i2->i1
# newtag -> i1
# Delete tag 'fourth'. This should remove i2 and i3.
self.deleteTag(repository, 'fourth')
self.assertDeleted(repository, 'i2', 'i3')
self.assertNotDeleted(repository, 'i1')
# Current state:
# newtag -> i1
# Delete tag 'newtag'. This should remove the remaining image.
self.deleteTag(repository, 'newtag')
self.assertDeleted(repository, 'i1')
# Current state:
# (Empty)
def test_empty_gc(self):
with self.assert_gc_integrity(expect_storage_removed=False):
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],