diff --git a/config.py b/config.py index 39a257b15..47f836587 100644 --- a/config.py +++ b/config.py @@ -185,5 +185,8 @@ class DefaultConfig(object): LOG_ARCHIVE_LOCATION = 'local_us' LOG_ARCHIVE_PATH = 'logarchive/' + # Number of revisions to keep expired tags + TIME_MACHINE_DELTA_SECONDS = 14 * 24 * 60 * 60 + # For enterprise: MAXIMUM_REPOSITORY_USAGE = 20 diff --git a/data/database.py b/data/database.py index eb804688e..7d027aa71 100644 --- a/data/database.py +++ b/data/database.py @@ -456,12 +456,14 @@ class RepositoryTag(BaseModel): name = CharField() image = ForeignKeyField(Image) repository = ForeignKeyField(Repository) + lifetime_start = DateTimeField(default=datetime.utcnow) + lifetime_end = DateTimeField(null=True) class Meta: database = db read_slaves = (read_slave,) indexes = ( - (('repository', 'name'), True), + (('repository', 'name'), False), ) diff --git a/data/model/legacy.py b/data/model/legacy.py index 9bae8cd32..6d3809ccb 100644 --- a/data/model/legacy.py +++ b/data/model/legacy.py @@ -106,12 +106,15 @@ class TooManyLoginAttemptsException(Exception): self.retry_after = retry_after -def _get_repository(namespace_name, repository_name): - return (Repository - .select(Repository, Namespace) - .join(Namespace, on=(Repository.namespace_user == Namespace.id)) - .where(Namespace.username == namespace_name, Repository.name == repository_name) - .get()) +def _get_repository(namespace_name, repository_name, for_update=False): + query = (Repository + .select(Repository, Namespace) + .join(Namespace, on=(Repository.namespace_user == Namespace.id)) + .where(Namespace.username == namespace_name, Repository.name == repository_name)) + if for_update: + query = db_for_update(query) + + return query.get() def hash_password(password, salt=None): @@ -1534,12 +1537,27 @@ def list_repository_tags(namespace_name, repository_name): .join(Namespace, on=(Repository.namespace_user == Namespace.id)) .switch(RepositoryTag) .join(Image) - .where(Repository.name == repository_name, Namespace.username == namespace_name)) + .where(Repository.name == repository_name, Namespace.username == namespace_name, + RepositoryTag.lifetime_end >> None)) + + +def _garbage_collect_tags(namespace_name, repository_name): + with config.app_config['DB_TRANSACTION_FACTORY'](db): + repo = _get_repository(namespace_name, repository_name) + collect_time = (datetime.utcnow() - + timedelta(seconds=config.app_config['TIME_MACHINE_DELTA_SECONDS'])) + + (RepositoryTag + .delete() + .where(RepositoryTag.repository == repo, RepositoryTag.lifetime_end < collect_time) + .execute()) def garbage_collect_repository(namespace_name, repository_name): storage_id_whitelist = {} + _garbage_collect_tags(namespace_name, repository_name) + with config.app_config['DB_TRANSACTION_FACTORY'](db): # TODO (jake): We could probably select this and all the images in a single query using # a different kind of join. @@ -1573,12 +1591,12 @@ def garbage_collect_repository(namespace_name, repository_name): if len(to_remove) > 0: logger.info('Garbage collecting storage for images: %s', to_remove) - garbage_collect_storage(storage_id_whitelist) + _garbage_collect_storage(storage_id_whitelist) return len(to_remove) -def garbage_collect_storage(storage_id_whitelist): +def _garbage_collect_storage(storage_id_whitelist): if len(storage_id_whitelist) == 0: return @@ -1710,45 +1728,57 @@ def get_parent_images(namespace_name, repository_name, image_obj): def create_or_update_tag(namespace_name, repository_name, tag_name, tag_docker_image_id): - try: - repo = _get_repository(namespace_name, repository_name) - except Repository.DoesNotExist: - raise DataModelException('Invalid repository %s/%s' % (namespace_name, repository_name)) - try: - image = Image.get(Image.docker_image_id == tag_docker_image_id, Image.repository == repo) - except Image.DoesNotExist: - raise DataModelException('Invalid image with id: %s' % tag_docker_image_id) + with config.app_config['DB_TRANSACTION_FACTORY'](db): + try: + repo = _get_repository(namespace_name, repository_name) + except Repository.DoesNotExist: + raise DataModelException('Invalid repository %s/%s' % (namespace_name, repository_name)) - try: - tag = RepositoryTag.get(RepositoryTag.repository == repo, RepositoryTag.name == tag_name) - tag.image = image - tag.save() - except RepositoryTag.DoesNotExist: - tag = RepositoryTag.create(repository=repo, image=image, name=tag_name) + try: + image = Image.get(Image.docker_image_id == tag_docker_image_id, Image.repository == repo) + except Image.DoesNotExist: + raise DataModelException('Invalid image with id: %s' % tag_docker_image_id) - return tag + now = datetime.utcnow() + + try: + # When we move a tag, we really end the timeline of the old one and create a new one + tag = RepositoryTag.get(RepositoryTag.repository == repo, RepositoryTag.name == tag_name, + RepositoryTag.lifetime_end >> None) + tag.lifetime_end = now + tag.save() + except RepositoryTag.DoesNotExist: + # No tag that needs to be ended + pass + + tag = RepositoryTag.create(repository=repo, image=image, name=tag_name, lifetime_start=now) + + return tag def delete_tag(namespace_name, repository_name, tag_name): - try: - found = (RepositoryTag - .select() - .join(Repository) - .join(Namespace, on=(Repository.namespace_user == Namespace.id)) - .where(Repository.name == repository_name, Namespace.username == namespace_name, - RepositoryTag.name == tag_name) - .get()) + with config.app_config['DB_TRANSACTION_FACTORY'](db): + try: + query = (RepositoryTag + .select(RepositoryTag, Repository) + .join(Repository) + .join(Namespace, on=(Repository.namespace_user == Namespace.id)) + .where(Repository.name == repository_name, Namespace.username == namespace_name, + RepositoryTag.name == tag_name, RepositoryTag.lifetime_end >> None)) + found = db_for_update(query).get() - except RepositoryTag.DoesNotExist: - msg = ('Invalid repository tag \'%s\' on repository \'%s/%s\'' % - (tag_name, namespace_name, repository_name)) - raise DataModelException(msg) + except RepositoryTag.DoesNotExist: + msg = ('Invalid repository tag \'%s\' on repository \'%s/%s\'' % + (tag_name, namespace_name, repository_name)) + raise DataModelException(msg) - found.delete_instance() + found.lifetime_end = datetime.utcnow() + found.save() -def delete_all_repository_tags(namespace_name, repository_name): +def purge_all_repository_tags(namespace_name, repository_name): + """ Immediately purge all repository tags without respecting the lifeline procedure """ try: repo = _get_repository(namespace_name, repository_name) except Repository.DoesNotExist: @@ -1863,7 +1893,7 @@ def set_team_repo_permission(team_name, namespace_name, repository_name, def purge_repository(namespace_name, repository_name): # Delete all tags to allow gc to reclaim storage - delete_all_repository_tags(namespace_name, repository_name) + purge_all_repository_tags(namespace_name, repository_name) # Gc to remove the images and storage garbage_collect_repository(namespace_name, repository_name) diff --git a/endpoints/api/tag.py b/endpoints/api/tag.py index 581e02a60..21972fc19 100644 --- a/endpoints/api/tag.py +++ b/endpoints/api/tag.py @@ -54,8 +54,8 @@ class RepositoryTag(RepositoryParamResource): username = get_authenticated_user().username log_action('move_tag' if original_image_id else 'create_tag', namespace, - { 'username': username, 'repo': repository, 'tag': tag, - 'image': image_id, 'original_image': original_image_id }, + {'username': username, 'repo': repository, 'tag': tag, + 'image': image_id, 'original_image': original_image_id}, repo=model.get_repository(namespace, repository)) return 'Updated', 201 diff --git a/test/data/test.db b/test/data/test.db index c37aec119..1e810ea24 100644 Binary files a/test/data/test.db and b/test/data/test.db differ diff --git a/test/test_gc.py b/test/test_gc.py index 0ad8ce7bb..5a5444f2a 100644 --- a/test/test_gc.py +++ b/test/test_gc.py @@ -207,7 +207,7 @@ class TestGarbageColection(unittest.TestCase): self.assertNotDeleted(repository,'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1', 'f2') def test_gc_storage_empty(self): - model.garbage_collect_storage(set()) + model._garbage_collect_storage(set()) if __name__ == '__main__': unittest.main()