diff --git a/data/model/repository.py b/data/model/repository.py index e94b8b4fa..0b0d4ac6d 100644 --- a/data/model/repository.py +++ b/data/model/repository.py @@ -194,17 +194,21 @@ def garbage_collect_repo(repo, extra_candidate_set=None): # directly referenced by a tag. This can be used in a subquery to directly # find which candidates are being referenced without any client side # computation or extra round trips. + direct_referenced = (RepositoryTag + .select(RepositoryTag.image) + .where(RepositoryTag.repository == repo.id, + RepositoryTag.image << candidates_orphans)) + + cloned = direct_referenced.clone().alias('direct_ref') + directly_referenced_subquery = Image.alias().select(cloned.c.image_id).from_(cloned) + ancestor_referenced = (Candidate .select(Candidate.id) .join(Tagged, on=ancestor_superset) .join(RepositoryTag, on=(Tagged.id == RepositoryTag.image)) .where(RepositoryTag.repository == repo.id, - Candidate.id << candidates_orphans)) - - direct_referenced = (RepositoryTag - .select(RepositoryTag.image) - .where(RepositoryTag.repository == repo.id, - RepositoryTag.image << candidates_orphans)) + Candidate.id << candidates_orphans, + ~(Candidate.id << directly_referenced_subquery))) referenced_candidates = (direct_referenced | ancestor_referenced) diff --git a/test/test_gc.py b/test/test_gc.py index 1e5369675..a7085af04 100644 --- a/test/test_gc.py +++ b/test/test_gc.py @@ -314,38 +314,71 @@ class TestGarbageCollection(unittest.TestCase): repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'], third=['t1', 't2', 't3'], fourth=['i1', 'f1']) + # Current state: + # latest -> i3->i2->i1 + # other -> f2->f1->i1 + # third -> t3->t2->t1 + # fourth -> f1->i1 + # Delete tag other. Should delete f2, since it is not shared. self.deleteTag(repository, 'other') self.assertDeleted(repository, 'f2') self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1') + # Current state: + # latest -> i3->i2->i1 + # third -> t3->t2->t1 + # fourth -> f1->i1 + # Move tag fourth to i3. This should remove f1 since it is no longer referenced. self.moveTag(repository, 'fourth', 'i3') self.assertDeleted(repository, 'f1') self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') + # Current state: + # latest -> i3->i2->i1 + # third -> t3->t2->t1 + # fourth -> i3->i2->i1 + # Delete tag 'latest'. This should do nothing since fourth is on the same branch. self.deleteTag(repository, 'latest') self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') + # Current state: + # third -> t3->t2->t1 + # fourth -> i3->i2->i1 + # Delete tag 'third'. This should remove t1->t3. self.deleteTag(repository, 'third') self.assertDeleted(repository, 't1', 't2', 't3') self.assertNotDeleted(repository, 'i1', 'i2', 'i3') + # Current state: + # fourth -> i3->i2->i1 + # Add tag to i1. self.moveTag(repository, 'newtag', 'i1') self.assertNotDeleted(repository, 'i1', 'i2', 'i3') + # Current state: + # fourth -> i3->i2->i1 + # newtag -> i1 + # Delete tag 'fourth'. This should remove i2 and i3. self.deleteTag(repository, 'fourth') self.assertDeleted(repository, 'i2', 'i3') self.assertNotDeleted(repository, 'i1') + # Current state: + # newtag -> i1 + # Delete tag 'newtag'. This should remove the remaining image. self.deleteTag(repository, 'newtag') self.assertDeleted(repository, 'i1') + # Current state: + # (Empty) + def test_empty_gc(self): with self.assert_gc_integrity(expect_storage_removed=False): repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],