Merge pull request #2693 from coreos-inc/gc-query-opt

Optimize GC query join a bit by reducing the surface
This commit is contained in:
josephschorr 2017-06-12 13:37:55 -04:00 committed by GitHub
commit 4fd2047e30
2 changed files with 43 additions and 6 deletions

View file

@ -194,17 +194,21 @@ def garbage_collect_repo(repo, extra_candidate_set=None):
# directly referenced by a tag. This can be used in a subquery to directly # directly referenced by a tag. This can be used in a subquery to directly
# find which candidates are being referenced without any client side # find which candidates are being referenced without any client side
# computation or extra round trips. # computation or extra round trips.
direct_referenced = (RepositoryTag
.select(RepositoryTag.image)
.where(RepositoryTag.repository == repo.id,
RepositoryTag.image << candidates_orphans))
cloned = direct_referenced.clone().alias('direct_ref')
directly_referenced_subquery = Image.alias().select(cloned.c.image_id).from_(cloned)
ancestor_referenced = (Candidate ancestor_referenced = (Candidate
.select(Candidate.id) .select(Candidate.id)
.join(Tagged, on=ancestor_superset) .join(Tagged, on=ancestor_superset)
.join(RepositoryTag, on=(Tagged.id == RepositoryTag.image)) .join(RepositoryTag, on=(Tagged.id == RepositoryTag.image))
.where(RepositoryTag.repository == repo.id, .where(RepositoryTag.repository == repo.id,
Candidate.id << candidates_orphans)) Candidate.id << candidates_orphans,
~(Candidate.id << directly_referenced_subquery)))
direct_referenced = (RepositoryTag
.select(RepositoryTag.image)
.where(RepositoryTag.repository == repo.id,
RepositoryTag.image << candidates_orphans))
referenced_candidates = (direct_referenced | ancestor_referenced) referenced_candidates = (direct_referenced | ancestor_referenced)

View file

@ -314,38 +314,71 @@ class TestGarbageCollection(unittest.TestCase):
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'], repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
third=['t1', 't2', 't3'], fourth=['i1', 'f1']) third=['t1', 't2', 't3'], fourth=['i1', 'f1'])
# Current state:
# latest -> i3->i2->i1
# other -> f2->f1->i1
# third -> t3->t2->t1
# fourth -> f1->i1
# Delete tag other. Should delete f2, since it is not shared. # Delete tag other. Should delete f2, since it is not shared.
self.deleteTag(repository, 'other') self.deleteTag(repository, 'other')
self.assertDeleted(repository, 'f2') self.assertDeleted(repository, 'f2')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1') self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1')
# Current state:
# latest -> i3->i2->i1
# third -> t3->t2->t1
# fourth -> f1->i1
# Move tag fourth to i3. This should remove f1 since it is no longer referenced. # Move tag fourth to i3. This should remove f1 since it is no longer referenced.
self.moveTag(repository, 'fourth', 'i3') self.moveTag(repository, 'fourth', 'i3')
self.assertDeleted(repository, 'f1') self.assertDeleted(repository, 'f1')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3')
# Current state:
# latest -> i3->i2->i1
# third -> t3->t2->t1
# fourth -> i3->i2->i1
# Delete tag 'latest'. This should do nothing since fourth is on the same branch. # Delete tag 'latest'. This should do nothing since fourth is on the same branch.
self.deleteTag(repository, 'latest') self.deleteTag(repository, 'latest')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3')
# Current state:
# third -> t3->t2->t1
# fourth -> i3->i2->i1
# Delete tag 'third'. This should remove t1->t3. # Delete tag 'third'. This should remove t1->t3.
self.deleteTag(repository, 'third') self.deleteTag(repository, 'third')
self.assertDeleted(repository, 't1', 't2', 't3') self.assertDeleted(repository, 't1', 't2', 't3')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3') self.assertNotDeleted(repository, 'i1', 'i2', 'i3')
# Current state:
# fourth -> i3->i2->i1
# Add tag to i1. # Add tag to i1.
self.moveTag(repository, 'newtag', 'i1') self.moveTag(repository, 'newtag', 'i1')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3') self.assertNotDeleted(repository, 'i1', 'i2', 'i3')
# Current state:
# fourth -> i3->i2->i1
# newtag -> i1
# Delete tag 'fourth'. This should remove i2 and i3. # Delete tag 'fourth'. This should remove i2 and i3.
self.deleteTag(repository, 'fourth') self.deleteTag(repository, 'fourth')
self.assertDeleted(repository, 'i2', 'i3') self.assertDeleted(repository, 'i2', 'i3')
self.assertNotDeleted(repository, 'i1') self.assertNotDeleted(repository, 'i1')
# Current state:
# newtag -> i1
# Delete tag 'newtag'. This should remove the remaining image. # Delete tag 'newtag'. This should remove the remaining image.
self.deleteTag(repository, 'newtag') self.deleteTag(repository, 'newtag')
self.assertDeleted(repository, 'i1') self.assertDeleted(repository, 'i1')
# Current state:
# (Empty)
def test_empty_gc(self): def test_empty_gc(self):
with self.assert_gc_integrity(expect_storage_removed=False): with self.assert_gc_integrity(expect_storage_removed=False):
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'], repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],