First stab at time machine using fixed two week expiration policy.

This commit is contained in:
Jake Moshenko 2015-02-11 11:54:30 -05:00
parent 0f3d87466e
commit 90c0a9c1e0
6 changed files with 78 additions and 43 deletions

View file

@ -185,5 +185,8 @@ class DefaultConfig(object):
LOG_ARCHIVE_LOCATION = 'local_us' LOG_ARCHIVE_LOCATION = 'local_us'
LOG_ARCHIVE_PATH = 'logarchive/' LOG_ARCHIVE_PATH = 'logarchive/'
# Number of revisions to keep expired tags
TIME_MACHINE_DELTA_SECONDS = 14 * 24 * 60 * 60
# For enterprise: # For enterprise:
MAXIMUM_REPOSITORY_USAGE = 20 MAXIMUM_REPOSITORY_USAGE = 20

View file

@ -456,12 +456,14 @@ class RepositoryTag(BaseModel):
name = CharField() name = CharField()
image = ForeignKeyField(Image) image = ForeignKeyField(Image)
repository = ForeignKeyField(Repository) repository = ForeignKeyField(Repository)
lifetime_start = DateTimeField(default=datetime.utcnow)
lifetime_end = DateTimeField(null=True)
class Meta: class Meta:
database = db database = db
read_slaves = (read_slave,) read_slaves = (read_slave,)
indexes = ( indexes = (
(('repository', 'name'), True), (('repository', 'name'), False),
) )

View file

@ -106,12 +106,15 @@ class TooManyLoginAttemptsException(Exception):
self.retry_after = retry_after self.retry_after = retry_after
def _get_repository(namespace_name, repository_name): def _get_repository(namespace_name, repository_name, for_update=False):
return (Repository query = (Repository
.select(Repository, Namespace) .select(Repository, Namespace)
.join(Namespace, on=(Repository.namespace_user == Namespace.id)) .join(Namespace, on=(Repository.namespace_user == Namespace.id))
.where(Namespace.username == namespace_name, Repository.name == repository_name) .where(Namespace.username == namespace_name, Repository.name == repository_name))
.get()) if for_update:
query = db_for_update(query)
return query.get()
def hash_password(password, salt=None): def hash_password(password, salt=None):
@ -1534,12 +1537,27 @@ def list_repository_tags(namespace_name, repository_name):
.join(Namespace, on=(Repository.namespace_user == Namespace.id)) .join(Namespace, on=(Repository.namespace_user == Namespace.id))
.switch(RepositoryTag) .switch(RepositoryTag)
.join(Image) .join(Image)
.where(Repository.name == repository_name, Namespace.username == namespace_name)) .where(Repository.name == repository_name, Namespace.username == namespace_name,
RepositoryTag.lifetime_end >> None))
def _garbage_collect_tags(namespace_name, repository_name):
with config.app_config['DB_TRANSACTION_FACTORY'](db):
repo = _get_repository(namespace_name, repository_name)
collect_time = (datetime.utcnow() -
timedelta(seconds=config.app_config['TIME_MACHINE_DELTA_SECONDS']))
(RepositoryTag
.delete()
.where(RepositoryTag.repository == repo, RepositoryTag.lifetime_end < collect_time)
.execute())
def garbage_collect_repository(namespace_name, repository_name): def garbage_collect_repository(namespace_name, repository_name):
storage_id_whitelist = {} storage_id_whitelist = {}
_garbage_collect_tags(namespace_name, repository_name)
with config.app_config['DB_TRANSACTION_FACTORY'](db): with config.app_config['DB_TRANSACTION_FACTORY'](db):
# TODO (jake): We could probably select this and all the images in a single query using # TODO (jake): We could probably select this and all the images in a single query using
# a different kind of join. # a different kind of join.
@ -1573,12 +1591,12 @@ def garbage_collect_repository(namespace_name, repository_name):
if len(to_remove) > 0: if len(to_remove) > 0:
logger.info('Garbage collecting storage for images: %s', to_remove) logger.info('Garbage collecting storage for images: %s', to_remove)
garbage_collect_storage(storage_id_whitelist) _garbage_collect_storage(storage_id_whitelist)
return len(to_remove) return len(to_remove)
def garbage_collect_storage(storage_id_whitelist): def _garbage_collect_storage(storage_id_whitelist):
if len(storage_id_whitelist) == 0: if len(storage_id_whitelist) == 0:
return return
@ -1710,45 +1728,57 @@ def get_parent_images(namespace_name, repository_name, image_obj):
def create_or_update_tag(namespace_name, repository_name, tag_name, def create_or_update_tag(namespace_name, repository_name, tag_name,
tag_docker_image_id): tag_docker_image_id):
try:
repo = _get_repository(namespace_name, repository_name)
except Repository.DoesNotExist:
raise DataModelException('Invalid repository %s/%s' % (namespace_name, repository_name))
try: with config.app_config['DB_TRANSACTION_FACTORY'](db):
image = Image.get(Image.docker_image_id == tag_docker_image_id, Image.repository == repo) try:
except Image.DoesNotExist: repo = _get_repository(namespace_name, repository_name)
raise DataModelException('Invalid image with id: %s' % tag_docker_image_id) except Repository.DoesNotExist:
raise DataModelException('Invalid repository %s/%s' % (namespace_name, repository_name))
try: try:
tag = RepositoryTag.get(RepositoryTag.repository == repo, RepositoryTag.name == tag_name) image = Image.get(Image.docker_image_id == tag_docker_image_id, Image.repository == repo)
tag.image = image except Image.DoesNotExist:
tag.save() raise DataModelException('Invalid image with id: %s' % tag_docker_image_id)
except RepositoryTag.DoesNotExist:
tag = RepositoryTag.create(repository=repo, image=image, name=tag_name)
return tag now = datetime.utcnow()
try:
# When we move a tag, we really end the timeline of the old one and create a new one
tag = RepositoryTag.get(RepositoryTag.repository == repo, RepositoryTag.name == tag_name,
RepositoryTag.lifetime_end >> None)
tag.lifetime_end = now
tag.save()
except RepositoryTag.DoesNotExist:
# No tag that needs to be ended
pass
tag = RepositoryTag.create(repository=repo, image=image, name=tag_name, lifetime_start=now)
return tag
def delete_tag(namespace_name, repository_name, tag_name): def delete_tag(namespace_name, repository_name, tag_name):
try: with config.app_config['DB_TRANSACTION_FACTORY'](db):
found = (RepositoryTag try:
.select() query = (RepositoryTag
.join(Repository) .select(RepositoryTag, Repository)
.join(Namespace, on=(Repository.namespace_user == Namespace.id)) .join(Repository)
.where(Repository.name == repository_name, Namespace.username == namespace_name, .join(Namespace, on=(Repository.namespace_user == Namespace.id))
RepositoryTag.name == tag_name) .where(Repository.name == repository_name, Namespace.username == namespace_name,
.get()) RepositoryTag.name == tag_name, RepositoryTag.lifetime_end >> None))
found = db_for_update(query).get()
except RepositoryTag.DoesNotExist: except RepositoryTag.DoesNotExist:
msg = ('Invalid repository tag \'%s\' on repository \'%s/%s\'' % msg = ('Invalid repository tag \'%s\' on repository \'%s/%s\'' %
(tag_name, namespace_name, repository_name)) (tag_name, namespace_name, repository_name))
raise DataModelException(msg) raise DataModelException(msg)
found.delete_instance() found.lifetime_end = datetime.utcnow()
found.save()
def delete_all_repository_tags(namespace_name, repository_name): def purge_all_repository_tags(namespace_name, repository_name):
""" Immediately purge all repository tags without respecting the lifeline procedure """
try: try:
repo = _get_repository(namespace_name, repository_name) repo = _get_repository(namespace_name, repository_name)
except Repository.DoesNotExist: except Repository.DoesNotExist:
@ -1863,7 +1893,7 @@ def set_team_repo_permission(team_name, namespace_name, repository_name,
def purge_repository(namespace_name, repository_name): def purge_repository(namespace_name, repository_name):
# Delete all tags to allow gc to reclaim storage # Delete all tags to allow gc to reclaim storage
delete_all_repository_tags(namespace_name, repository_name) purge_all_repository_tags(namespace_name, repository_name)
# Gc to remove the images and storage # Gc to remove the images and storage
garbage_collect_repository(namespace_name, repository_name) garbage_collect_repository(namespace_name, repository_name)

View file

@ -54,8 +54,8 @@ class RepositoryTag(RepositoryParamResource):
username = get_authenticated_user().username username = get_authenticated_user().username
log_action('move_tag' if original_image_id else 'create_tag', namespace, log_action('move_tag' if original_image_id else 'create_tag', namespace,
{ 'username': username, 'repo': repository, 'tag': tag, {'username': username, 'repo': repository, 'tag': tag,
'image': image_id, 'original_image': original_image_id }, 'image': image_id, 'original_image': original_image_id},
repo=model.get_repository(namespace, repository)) repo=model.get_repository(namespace, repository))
return 'Updated', 201 return 'Updated', 201

Binary file not shown.

View file

@ -207,7 +207,7 @@ class TestGarbageColection(unittest.TestCase):
self.assertNotDeleted(repository,'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1', 'f2') self.assertNotDeleted(repository,'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1', 'f2')
def test_gc_storage_empty(self): def test_gc_storage_empty(self):
model.garbage_collect_storage(set()) model._garbage_collect_storage(set())
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()