Merge pull request #2257 from coreos-inc/clair-gc-take2
feat(gc): Garbage collection for security scanning
This commit is contained in:
commit
aafcb592a6
8 changed files with 194 additions and 62 deletions
|
@ -107,6 +107,10 @@ class Config(object):
|
|||
def __init__(self):
|
||||
self.app_config = None
|
||||
self.store = None
|
||||
self.image_cleanup_callbacks = []
|
||||
|
||||
def register_image_cleanup_callback(self, callback):
|
||||
self.image_cleanup_callbacks.append(callback)
|
||||
|
||||
|
||||
config = Config()
|
||||
|
|
|
@ -5,9 +5,9 @@ from datetime import timedelta, datetime
|
|||
from peewee import JOIN_LEFT_OUTER, fn, SQL, IntegrityError
|
||||
from cachetools import ttl_cache
|
||||
|
||||
from data.model import (DataModelException, tag, db_transaction, storage, permission,
|
||||
from data.model import (config, DataModelException, tag, db_transaction, storage, permission,
|
||||
_basequery)
|
||||
from data.database import (Repository, Namespace, RepositoryTag, Star, Image, User,
|
||||
from data.database import (Repository, Namespace, RepositoryTag, Star, Image, ImageStorage, User,
|
||||
Visibility, RepositoryPermission, RepositoryActionCount,
|
||||
Role, RepositoryAuthorizedEmail, TagManifest, DerivedStorageForImage,
|
||||
Label, TagManifestLabel, db_for_update, get_epoch_timestamp,
|
||||
|
@ -173,29 +173,24 @@ def garbage_collect_repo(repo, extra_candidate_set=None):
|
|||
|
||||
referenced_candidates = (direct_referenced | ancestor_referenced)
|
||||
|
||||
# We desire two pieces of information from the database from the following
|
||||
# We desire a few pieces of information from the database from the following
|
||||
# query: all of the image ids which are associated with this repository,
|
||||
# and the storages which are associated with those images. In order to
|
||||
# fetch just this information, and bypass all of the peewee model parsing
|
||||
# code, which is overkill for just two fields, we use a tuple query, and
|
||||
# feed that directly to the dictionary tuple constructor which takes an
|
||||
# iterable of tuples containing [(k, v), (k, v), ...]
|
||||
# and the storages which are associated with those images.
|
||||
unreferenced_candidates = (Image
|
||||
.select(Image.id, Image.storage)
|
||||
.select(Image.id, Image.docker_image_id,
|
||||
ImageStorage.id, ImageStorage.uuid)
|
||||
.join(ImageStorage)
|
||||
.where(Image.id << candidates_orphans,
|
||||
~(Image.id << referenced_candidates))
|
||||
.tuples())
|
||||
~(Image.id << referenced_candidates)))
|
||||
|
||||
unreferecend_images_to_storages = dict(unreferenced_candidates)
|
||||
to_remove = unreferecend_images_to_storages.keys()
|
||||
|
||||
if len(to_remove) > 0:
|
||||
logger.info('Cleaning up unreferenced images: %s', to_remove)
|
||||
storage_id_whitelist = set(unreferecend_images_to_storages.values())
|
||||
image_ids_to_remove = [candidate.id for candidate in unreferenced_candidates]
|
||||
if len(image_ids_to_remove) > 0:
|
||||
logger.info('Cleaning up unreferenced images: %s', image_ids_to_remove)
|
||||
storage_id_whitelist = set([candidate.storage_id for candidate in unreferenced_candidates])
|
||||
|
||||
# Lookup any derived images for the images to remove.
|
||||
derived = DerivedStorageForImage.select().where(
|
||||
DerivedStorageForImage.source_image << to_remove)
|
||||
DerivedStorageForImage.source_image << image_ids_to_remove)
|
||||
|
||||
has_derived = False
|
||||
for derived_image in derived:
|
||||
|
@ -207,21 +202,30 @@ def garbage_collect_repo(repo, extra_candidate_set=None):
|
|||
try:
|
||||
(DerivedStorageForImage
|
||||
.delete()
|
||||
.where(DerivedStorageForImage.source_image << to_remove)
|
||||
.where(DerivedStorageForImage.source_image << image_ids_to_remove)
|
||||
.execute())
|
||||
except IntegrityError:
|
||||
logger.info('Could not GC derived images %s; will try again soon', to_remove)
|
||||
logger.info('Could not GC derived images %s; will try again soon', image_ids_to_remove)
|
||||
return False
|
||||
|
||||
try:
|
||||
Image.delete().where(Image.id << to_remove).execute()
|
||||
Image.delete().where(Image.id << image_ids_to_remove).execute()
|
||||
except IntegrityError:
|
||||
logger.info('Could not GC images %s; will try again soon', to_remove)
|
||||
logger.info('Could not GC images %s; will try again soon', image_ids_to_remove)
|
||||
return False
|
||||
|
||||
if len(to_remove) > 0:
|
||||
logger.info('Garbage collecting storage for images: %s', to_remove)
|
||||
storage.garbage_collect_storage(storage_id_whitelist)
|
||||
# If any images were removed, GC any orphaned storages.
|
||||
if len(image_ids_to_remove) > 0:
|
||||
logger.info('Garbage collecting storage for images: %s', image_ids_to_remove)
|
||||
storage_ids_removed = set(storage.garbage_collect_storage(storage_id_whitelist))
|
||||
|
||||
# If any storages were removed and cleanup callbacks are registered, call them with
|
||||
# the images+storages removed.
|
||||
if storage_ids_removed and config.image_cleanup_callbacks:
|
||||
image_storages_removed = [candidate for candidate in unreferenced_candidates
|
||||
if candidate.storage_id in storage_ids_removed]
|
||||
for callback in config.image_cleanup_callbacks:
|
||||
callback(image_storages_removed)
|
||||
|
||||
return True
|
||||
|
||||
|
|
|
@ -72,8 +72,12 @@ def _orphaned_storage_query(candidate_ids):
|
|||
|
||||
|
||||
def garbage_collect_storage(storage_id_whitelist):
|
||||
""" Performs GC on a possible subset of the storage's with the IDs found in the
|
||||
whitelist. The storages in the whitelist will be checked, and any orphaned will
|
||||
be removed, with those IDs being returned.
|
||||
"""
|
||||
if len(storage_id_whitelist) == 0:
|
||||
return
|
||||
return []
|
||||
|
||||
def placements_query_to_paths_set(placements_query):
|
||||
return {(get_image_location_for_id(placement.location_id).name,
|
||||
|
@ -89,7 +93,7 @@ def garbage_collect_storage(storage_id_whitelist):
|
|||
orphaned_storage_ids = _orphaned_storage_query(storage_id_whitelist)
|
||||
if len(orphaned_storage_ids) == 0:
|
||||
# Nothing to GC.
|
||||
return
|
||||
return []
|
||||
|
||||
placements_to_remove = list(ImageStoragePlacement
|
||||
.select()
|
||||
|
@ -133,6 +137,8 @@ def garbage_collect_storage(storage_id_whitelist):
|
|||
logger.debug('Removing %s from %s', image_path, location_name)
|
||||
config.store.remove({location_name}, image_path)
|
||||
|
||||
return orphaned_storage_ids
|
||||
|
||||
|
||||
def create_v1_storage(location_name):
|
||||
storage = ImageStorage.create(cas_path=False)
|
||||
|
|
Reference in a new issue