Merge pull request #2693 from coreos-inc/gc-query-opt
Optimize GC query join a bit by reducing the surface
This commit is contained in:
commit
4fd2047e30
2 changed files with 43 additions and 6 deletions
|
@ -194,17 +194,21 @@ def garbage_collect_repo(repo, extra_candidate_set=None):
|
||||||
# directly referenced by a tag. This can be used in a subquery to directly
|
# directly referenced by a tag. This can be used in a subquery to directly
|
||||||
# find which candidates are being referenced without any client side
|
# find which candidates are being referenced without any client side
|
||||||
# computation or extra round trips.
|
# computation or extra round trips.
|
||||||
|
direct_referenced = (RepositoryTag
|
||||||
|
.select(RepositoryTag.image)
|
||||||
|
.where(RepositoryTag.repository == repo.id,
|
||||||
|
RepositoryTag.image << candidates_orphans))
|
||||||
|
|
||||||
|
cloned = direct_referenced.clone().alias('direct_ref')
|
||||||
|
directly_referenced_subquery = Image.alias().select(cloned.c.image_id).from_(cloned)
|
||||||
|
|
||||||
ancestor_referenced = (Candidate
|
ancestor_referenced = (Candidate
|
||||||
.select(Candidate.id)
|
.select(Candidate.id)
|
||||||
.join(Tagged, on=ancestor_superset)
|
.join(Tagged, on=ancestor_superset)
|
||||||
.join(RepositoryTag, on=(Tagged.id == RepositoryTag.image))
|
.join(RepositoryTag, on=(Tagged.id == RepositoryTag.image))
|
||||||
.where(RepositoryTag.repository == repo.id,
|
.where(RepositoryTag.repository == repo.id,
|
||||||
Candidate.id << candidates_orphans))
|
Candidate.id << candidates_orphans,
|
||||||
|
~(Candidate.id << directly_referenced_subquery)))
|
||||||
direct_referenced = (RepositoryTag
|
|
||||||
.select(RepositoryTag.image)
|
|
||||||
.where(RepositoryTag.repository == repo.id,
|
|
||||||
RepositoryTag.image << candidates_orphans))
|
|
||||||
|
|
||||||
referenced_candidates = (direct_referenced | ancestor_referenced)
|
referenced_candidates = (direct_referenced | ancestor_referenced)
|
||||||
|
|
||||||
|
|
|
@ -314,38 +314,71 @@ class TestGarbageCollection(unittest.TestCase):
|
||||||
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
|
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
|
||||||
third=['t1', 't2', 't3'], fourth=['i1', 'f1'])
|
third=['t1', 't2', 't3'], fourth=['i1', 'f1'])
|
||||||
|
|
||||||
|
# Current state:
|
||||||
|
# latest -> i3->i2->i1
|
||||||
|
# other -> f2->f1->i1
|
||||||
|
# third -> t3->t2->t1
|
||||||
|
# fourth -> f1->i1
|
||||||
|
|
||||||
# Delete tag other. Should delete f2, since it is not shared.
|
# Delete tag other. Should delete f2, since it is not shared.
|
||||||
self.deleteTag(repository, 'other')
|
self.deleteTag(repository, 'other')
|
||||||
self.assertDeleted(repository, 'f2')
|
self.assertDeleted(repository, 'f2')
|
||||||
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1')
|
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1')
|
||||||
|
|
||||||
|
# Current state:
|
||||||
|
# latest -> i3->i2->i1
|
||||||
|
# third -> t3->t2->t1
|
||||||
|
# fourth -> f1->i1
|
||||||
|
|
||||||
# Move tag fourth to i3. This should remove f1 since it is no longer referenced.
|
# Move tag fourth to i3. This should remove f1 since it is no longer referenced.
|
||||||
self.moveTag(repository, 'fourth', 'i3')
|
self.moveTag(repository, 'fourth', 'i3')
|
||||||
self.assertDeleted(repository, 'f1')
|
self.assertDeleted(repository, 'f1')
|
||||||
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3')
|
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3')
|
||||||
|
|
||||||
|
# Current state:
|
||||||
|
# latest -> i3->i2->i1
|
||||||
|
# third -> t3->t2->t1
|
||||||
|
# fourth -> i3->i2->i1
|
||||||
|
|
||||||
# Delete tag 'latest'. This should do nothing since fourth is on the same branch.
|
# Delete tag 'latest'. This should do nothing since fourth is on the same branch.
|
||||||
self.deleteTag(repository, 'latest')
|
self.deleteTag(repository, 'latest')
|
||||||
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3')
|
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3')
|
||||||
|
|
||||||
|
# Current state:
|
||||||
|
# third -> t3->t2->t1
|
||||||
|
# fourth -> i3->i2->i1
|
||||||
|
|
||||||
# Delete tag 'third'. This should remove t1->t3.
|
# Delete tag 'third'. This should remove t1->t3.
|
||||||
self.deleteTag(repository, 'third')
|
self.deleteTag(repository, 'third')
|
||||||
self.assertDeleted(repository, 't1', 't2', 't3')
|
self.assertDeleted(repository, 't1', 't2', 't3')
|
||||||
self.assertNotDeleted(repository, 'i1', 'i2', 'i3')
|
self.assertNotDeleted(repository, 'i1', 'i2', 'i3')
|
||||||
|
|
||||||
|
# Current state:
|
||||||
|
# fourth -> i3->i2->i1
|
||||||
|
|
||||||
# Add tag to i1.
|
# Add tag to i1.
|
||||||
self.moveTag(repository, 'newtag', 'i1')
|
self.moveTag(repository, 'newtag', 'i1')
|
||||||
self.assertNotDeleted(repository, 'i1', 'i2', 'i3')
|
self.assertNotDeleted(repository, 'i1', 'i2', 'i3')
|
||||||
|
|
||||||
|
# Current state:
|
||||||
|
# fourth -> i3->i2->i1
|
||||||
|
# newtag -> i1
|
||||||
|
|
||||||
# Delete tag 'fourth'. This should remove i2 and i3.
|
# Delete tag 'fourth'. This should remove i2 and i3.
|
||||||
self.deleteTag(repository, 'fourth')
|
self.deleteTag(repository, 'fourth')
|
||||||
self.assertDeleted(repository, 'i2', 'i3')
|
self.assertDeleted(repository, 'i2', 'i3')
|
||||||
self.assertNotDeleted(repository, 'i1')
|
self.assertNotDeleted(repository, 'i1')
|
||||||
|
|
||||||
|
# Current state:
|
||||||
|
# newtag -> i1
|
||||||
|
|
||||||
# Delete tag 'newtag'. This should remove the remaining image.
|
# Delete tag 'newtag'. This should remove the remaining image.
|
||||||
self.deleteTag(repository, 'newtag')
|
self.deleteTag(repository, 'newtag')
|
||||||
self.assertDeleted(repository, 'i1')
|
self.assertDeleted(repository, 'i1')
|
||||||
|
|
||||||
|
# Current state:
|
||||||
|
# (Empty)
|
||||||
|
|
||||||
def test_empty_gc(self):
|
def test_empty_gc(self):
|
||||||
with self.assert_gc_integrity(expect_storage_removed=False):
|
with self.assert_gc_integrity(expect_storage_removed=False):
|
||||||
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
|
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
|
||||||
|
|
Reference in a new issue