Add GC of layers in Clair
Fixes https://www.pivotaltracker.com/story/show/135583207
This commit is contained in:
		
							parent
							
								
									0aa6e6cd58
								
							
						
					
					
						commit
						49872838ab
					
				
					 5 changed files with 57 additions and 23 deletions
				
			
		
							
								
								
									
										1
									
								
								app.py
									
										
									
									
									
								
							
							
						
						
									
										1
									
								
								app.py
									
										
									
									
									
								
							|  | @ -236,6 +236,7 @@ else: | |||
| database.configure(app.config) | ||||
| model.config.app_config = app.config | ||||
| model.config.store = storage | ||||
| model.config.secscan_api = secscan_api | ||||
| 
 | ||||
| @login_manager.user_loader | ||||
| def load_user(user_uuid): | ||||
|  |  | |||
|  | @ -107,6 +107,7 @@ class Config(object): | |||
|   def __init__(self): | ||||
|     self.app_config = None | ||||
|     self.store = None | ||||
|     self.secscan_api = None | ||||
| 
 | ||||
| 
 | ||||
| config = Config() | ||||
|  |  | |||
|  | @ -6,8 +6,8 @@ from peewee import JOIN_LEFT_OUTER, fn, SQL, IntegrityError | |||
| from cachetools import ttl_cache | ||||
| 
 | ||||
| from data.model import (DataModelException, tag, db_transaction, storage, permission, | ||||
|                         _basequery) | ||||
| from data.database import (Repository, Namespace, RepositoryTag, Star, Image, User, | ||||
|                         _basequery, config) | ||||
| from data.database import (Repository, Namespace, RepositoryTag, Star, Image, User, ImageStorage, | ||||
|                            Visibility, RepositoryPermission, RepositoryActionCount, | ||||
|                            Role, RepositoryAuthorizedEmail, TagManifest, DerivedStorageForImage, | ||||
|                            Label, TagManifestLabel, db_for_update, get_epoch_timestamp, | ||||
|  | @ -173,29 +173,26 @@ def garbage_collect_repo(repo, extra_candidate_set=None): | |||
| 
 | ||||
|     referenced_candidates = (direct_referenced | ancestor_referenced) | ||||
| 
 | ||||
|     # We desire two pieces of information from the database from the following | ||||
|     # We desire three pieces of information from the database from the following | ||||
|     # query: all of the image ids which are associated with this repository, | ||||
|     # and the storages which are associated with those images. In order to | ||||
|     # fetch just this information, and bypass all of the peewee model parsing | ||||
|     # code, which is overkill for just two fields, we use a tuple query, and | ||||
|     # feed that directly to the dictionary tuple constructor which takes an | ||||
|     # iterable of tuples containing [(k, v), (k, v), ...] | ||||
|     # the storages which are associated with those images and their docker IDs. | ||||
|     unreferenced_candidates = (Image | ||||
|                                .select(Image.id, Image.storage) | ||||
|                                .select(Image.id, Image.storage, Image.docker_image_id, ImageStorage) | ||||
|                                .join(ImageStorage) | ||||
|                                .where(Image.id << candidates_orphans, | ||||
|                                       ~(Image.id << referenced_candidates)) | ||||
|                                .tuples()) | ||||
|                                       ~(Image.id << referenced_candidates))) | ||||
| 
 | ||||
|     unreferecend_images_to_storages = dict(unreferenced_candidates) | ||||
|     to_remove = unreferecend_images_to_storages.keys() | ||||
|     tuples = [(image.id, image.storage_id) for image in unreferenced_candidates] | ||||
|     unreferenced_images_to_storage_ids = dict(tuples) | ||||
|     image_ids_to_remove = unreferenced_images_to_storage_ids.keys() | ||||
| 
 | ||||
|     if len(to_remove) > 0: | ||||
|       logger.info('Cleaning up unreferenced images: %s', to_remove) | ||||
|       storage_id_whitelist = set(unreferecend_images_to_storages.values()) | ||||
|     if len(image_ids_to_remove) > 0: | ||||
|       logger.info('Cleaning up unreferenced images: %s', image_ids_to_remove) | ||||
|       storage_id_whitelist = set(unreferenced_images_to_storage_ids.values()) | ||||
| 
 | ||||
|       # Lookup any derived images for the images to remove. | ||||
|       derived = DerivedStorageForImage.select().where( | ||||
|         DerivedStorageForImage.source_image << to_remove) | ||||
|         DerivedStorageForImage.source_image << image_ids_to_remove) | ||||
| 
 | ||||
|       has_derived = False | ||||
|       for derived_image in derived: | ||||
|  | @ -207,21 +204,22 @@ def garbage_collect_repo(repo, extra_candidate_set=None): | |||
|         try: | ||||
|           (DerivedStorageForImage | ||||
|            .delete() | ||||
|            .where(DerivedStorageForImage.source_image << to_remove) | ||||
|            .where(DerivedStorageForImage.source_image << image_ids_to_remove) | ||||
|            .execute()) | ||||
|         except IntegrityError: | ||||
|           logger.info('Could not GC derived images %s; will try again soon', to_remove) | ||||
|           logger.info('Could not GC derived images %s; will try again soon', image_ids_to_remove) | ||||
|           return False | ||||
| 
 | ||||
|       try: | ||||
|         Image.delete().where(Image.id << to_remove).execute() | ||||
|         Image.delete().where(Image.id << image_ids_to_remove).execute() | ||||
|       except IntegrityError: | ||||
|         logger.info('Could not GC images %s; will try again soon', to_remove) | ||||
|         logger.info('Could not GC images %s; will try again soon', image_ids_to_remove) | ||||
|         return False | ||||
| 
 | ||||
|   if len(to_remove) > 0: | ||||
|     logger.info('Garbage collecting storage for images: %s', to_remove) | ||||
|   if len(image_ids_to_remove) > 0: | ||||
|     logger.info('Garbage collecting storage for images: %s', image_ids_to_remove) | ||||
|     storage.garbage_collect_storage(storage_id_whitelist) | ||||
|     config.secscan_api.delete_layers(unreferenced_candidates) | ||||
| 
 | ||||
|   return True | ||||
| 
 | ||||
|  |  | |||
|  | @ -547,6 +547,19 @@ class TestSecurityScanner(unittest.TestCase): | |||
|     notification = model.notification.create_repo_notification(repo, 'vulnerability_found', 'quay_notification', {}, {'level': 0}) | ||||
|     self.assertFalse(VulnerabilityFoundEvent().should_perform(event_data, notification)) | ||||
| 
 | ||||
|   def test_garbage_collection(self): | ||||
|     requests_made = set() | ||||
| 
 | ||||
|     @urlmatch(netloc=r'(.*\.)?mockclairservice', path=r'/v1/layers/(.+)') | ||||
|     def delete_layer(url, request): | ||||
|       requests_made.add(request.method) | ||||
| 
 | ||||
|     with HTTMock(delete_layer): | ||||
|       # Purge a repository. | ||||
|       model.repository.purge_repository('devtable', 'simple') | ||||
| 
 | ||||
|     # Ensure that DELETE is called (and only DELETE) for the layers in the security scanner. | ||||
|     self.assertEquals(set(['DELETE']), requests_made) | ||||
| 
 | ||||
|   def test_notification_worker(self): | ||||
|     pages_called = [] | ||||
|  |  | |||
|  | @ -28,6 +28,7 @@ class APIRequestFailure(Exception): | |||
| 
 | ||||
| _API_METHOD_INSERT = 'layers' | ||||
| _API_METHOD_GET_LAYER = 'layers/%s' | ||||
| _API_METHOD_DELETE_LAYER = 'layers/%s' | ||||
| _API_METHOD_MARK_NOTIFICATION_READ = 'notifications/%s' | ||||
| _API_METHOD_GET_NOTIFICATION = 'notifications/%s' | ||||
| _API_METHOD_PING = 'metrics' | ||||
|  | @ -122,6 +123,26 @@ class SecurityScannerAPI(object): | |||
|       'Layer': layer_request, | ||||
|     } | ||||
| 
 | ||||
|   def delete_layers(self, layers): | ||||
|     """ Deletes the given layers in the security scanner. """ | ||||
|     if self._config is None: | ||||
|       # Not configured. | ||||
|       return | ||||
| 
 | ||||
|     for layer in layers: | ||||
|       layer_id = '%s.%s' % (layer.docker_image_id, layer.storage.uuid) | ||||
|       self.delete_layer(layer_id) | ||||
| 
 | ||||
|   def delete_layer(self, layer_id): | ||||
|     """ Calls DELETE on the layer with the given ID in the security scanner, removing it from | ||||
|         its database. | ||||
|     """ | ||||
|     try: | ||||
|       response = self._call('DELETE', _API_METHOD_DELETE_LAYER % layer_id) | ||||
|       return response.status_code / 100 == 2 | ||||
|     except requests.exceptions.RequestException: | ||||
|       logger.exception('Failed to delete layer: %s', layer_id) | ||||
|       return False | ||||
| 
 | ||||
|   def ping(self): | ||||
|     """ Calls GET on the metrics endpoint of the security scanner to ensure it is running | ||||
|  |  | |||
		Reference in a new issue