diff --git a/workers/securityworker.py b/workers/securityworker.py index 5a377ec95..82a35a5b7 100644 --- a/workers/securityworker.py +++ b/workers/securityworker.py @@ -32,44 +32,63 @@ def _get_image_to_export(version): # Without parent candidates = (Image - .select(Image.id, Image.docker_image_id, ImageStorage.uuid, ImageStorage.checksum) - .join(ImageStorage) - .where(Image.security_indexed_engine < version, Image.parent >> None, ImageStorage.uploading == False, ImageStorage.checksum != '') - .limit(BATCH_SIZE*10) - .alias('candidates')) + .select(Image.id, Image.docker_image_id, ImageStorage.uuid) + .join(ImageStorage) + .where(Image.security_indexed_engine < version, + Image.parent >> None, + ImageStorage.uploading == False) + .limit(BATCH_SIZE*10) + .alias('candidates')) images = (Image - .select(candidates.c.id, candidates.c.docker_image_id, candidates.c.uuid, candidates.c.checksum) - .distinct() - .from_(candidates) - .order_by(db_random_func()) - .tuples() - .limit(BATCH_SIZE)) + .select(candidates.c.id, candidates.c.docker_image_id, candidates.c.uuid) + .from_(candidates) + .order_by(db_random_func()) + .tuples() + .limit(BATCH_SIZE)) for image in images: - rimages.append({'image_id': image[0], 'docker_image_id': image[1], 'storage_uuid': image[2], 'storage_checksum': image[3], 'parent_docker_image_id': None, 'parent_storage_uuid': None}) + rimages.append({'image_id': image[0], + 'docker_image_id': image[0], + 'storage_uuid': image[1], + 'parent_docker_image_id': None, + 'parent_storage_uuid': None}) # With analyzed parent candidates = (Image - .select(Image.id, Image.docker_image_id, ImageStorage.uuid, ImageStorage.checksum, Parent.docker_image_id.alias('parent_docker_image_id'), ParentImageStorage.uuid.alias('parent_storage_uuid')) - .join(Parent, on=(Image.parent == Parent.id)) - .join(ParentImageStorage, on=(ParentImageStorage.id == Parent.storage)) - .switch(Image) - .join(ImageStorage) - .where(Image.security_indexed_engine < version, Parent.security_indexed == True, Parent.security_indexed_engine >= version, ImageStorage.uploading == False, ImageStorage.checksum != '') - .limit(BATCH_SIZE*10) - .alias('candidates')) + .select(Image.id, + Image.docker_image_id, + ImageStorage.uuid, + Parent.docker_image_id.alias('parent_docker_image_id'), + ParentImageStorage.uuid.alias('parent_storage_uuid')) + .join(Parent, on=(Image.parent == Parent.id)) + .join(ParentImageStorage, on=(ParentImageStorage.id == Parent.storage)) + .switch(Image) + .join(ImageStorage) + .where(Image.security_indexed_engine < version, + Parent.security_indexed == True, + Parent.security_indexed_engine >= version, + ImageStorage.uploading == False) + .limit(BATCH_SIZE*10) + .alias('candidates')) images = (Image - .select(candidates.c.id, candidates.c.docker_image_id, candidates.c.uuid, candidates.c.checksum, candidates.c.parent_docker_image_id, candidates.c.parent_storage_uuid) - .distinct() - .from_(candidates) - .order_by(db_random_func()) - .tuples() - .limit(BATCH_SIZE)) + .select(candidates.c.id, + candidates.c.docker_image_id, + candidates.c.uuid, + candidates.c.parent_docker_image_id, + candidates.c.parent_storage_uuid) + .from_(candidates) + .order_by(db_random_func()) + .tuples() + .limit(BATCH_SIZE)) for image in images: - rimages.append({'image_id': image[0], 'docker_image_id': image[1], 'storage_uuid': image[2], 'storage_checksum': image[3], 'parent_docker_image_id': image[4], 'parent_storage_uuid': image[5]}) + rimages.append({'image_id': image[0], + 'docker_image_id': image[1], + 'storage_uuid': image[2], + 'parent_docker_image_id': image[3], + 'parent_storage_uuid': image[4]}) # Re-shuffle, otherwise the images without parents will always be on the top random.shuffle(rimages) @@ -78,11 +97,11 @@ def _get_image_to_export(version): def _get_storage_locations(uuid): query = (ImageStoragePlacement - .select() - .join(ImageStorageLocation) - .switch(ImageStoragePlacement) - .join(ImageStorage, JOIN_LEFT_OUTER) - .where(ImageStorage.uuid == uuid)) + .select() + .join(ImageStorageLocation) + .switch(ImageStoragePlacement) + .join(ImageStorage, JOIN_LEFT_OUTER) + .where(ImageStorage.uuid == uuid)) locations = list() for location in query: @@ -92,9 +111,10 @@ def _get_storage_locations(uuid): def _update_image(image, indexed, version): query = (Image - .select() - .join(ImageStorage) - .where(Image.docker_image_id == image['docker_image_id'], ImageStorage.uuid == image['storage_uuid'])) + .select() + .join(ImageStorage) + .where(Image.docker_image_id == image['docker_image_id'], + ImageStorage.uuid == image['storage_uuid'])) updated_images = list() for row in query: @@ -115,18 +135,20 @@ class SecurityWorker(Worker): # Load configuration config = app.config.get('SECURITY_SCANNER') - if not config or not 'ENDPOINT' in config or not 'ENGINE_VERSION_TARGET' in config or not 'DISTRIBUTED_STORAGE_PREFERENCE' in app.config: + if (not config + or not 'ENDPOINT' in config or not 'ENGINE_VERSION_TARGET' in config + or not 'DISTRIBUTED_STORAGE_PREFERENCE' in app.config): logger.exception('No configuration found for the security worker') return False self._api = config['ENDPOINT'] self._target_version = config['ENGINE_VERSION_TARGET'] self._default_storage_locations = app.config['DISTRIBUTED_STORAGE_PREFERENCE'] - self._ca_verification = False + self._ca = False self._cert = None if 'CA_CERTIFICATE_FILENAME' in config: - self._ca_verification = os.path.join(OVERRIDE_CONFIG_DIRECTORY, config['CA_CERTIFICATE_FILENAME']) - if not os.path.isfile(self._ca_verification): + self._ca = os.path.join(OVERRIDE_CONFIG_DIRECTORY, config['CA_CERTIFICATE_FILENAME']) + if not os.path.isfile(self._ca): logger.exception('Could not find configured CA file') return False if 'PRIVATE_KEY_FILENAME' in config and 'PUBLIC_KEY_FILENAME' in config: @@ -168,9 +190,8 @@ class SecurityWorker(Worker): locations = _get_storage_locations(img['storage_uuid']) if not storage.exists(locations, path): - logger.warning('Could not find a valid location to download layer %s', img['docker_image_id']+'.'+img['storage_uuid']) - # Mark as analyzed because that error is most likely to occur during the pre-process, with the database copy - # when images are actually removed on the real database (and therefore in S3) + logger.warning('Could not find a valid location to download layer %s', + img['docker_image_id']+'.'+img['storage_uuid']) _update_image(img, False, self._target_version) continue @@ -182,10 +203,9 @@ class SecurityWorker(Worker): # Forge request request = { 'ID': img['docker_image_id']+'.'+img['storage_uuid'], - 'TarSum': img['storage_checksum'], 'Path': uri } - if img['parent_docker_image_id'] is not None: + if img['parent_docker_image_id'] is not None and img['parent_storage_uuid'] is not None: request['ParentID'] = img['parent_docker_image_id']+'.'+img['parent_storage_uuid'] # Post request @@ -193,28 +213,34 @@ class SecurityWorker(Worker): logger.info('Analyzing %s', request['ID']) # Using invalid certificates doesn't return proper errors because of # https://github.com/shazow/urllib3/issues/556 - httpResponse = requests.post(self._api + API_METHOD_INSERT, json=request, cert=self._cert, verify=self._ca_verification) + httpResponse = requests.post(self._api + API_METHOD_INSERT, json=request, + cert=self._cert, verify=self._ca) except: - logger.exception('An exception occurred when analyzing layer ID %s : %s', request['ID'], exc_info()[0]) + logger.exception('An exception occurred when analyzing layer ID %s : %s', + request['ID'], exc_info()[0]) return try: jsonResponse = httpResponse.json() except: - logger.exception('An exception occurred when analyzing layer ID %s : the response is not valid JSON (%s)', request['ID'], httpResponse.text) + logger.exception('An exception occurred when analyzing layer ID %s : the response is \ + not valid JSON (%s)', request['ID'], httpResponse.text) return if httpResponse.status_code != 201: if 'Message' in jsonResponse: if 'OS and/or package manager are not supported' in jsonResponse['Message']: # The current engine could not index this layer - logger.warning('A warning event occurred when analyzing layer ID %s : %s', request['ID'], jsonResponse['Message']) + logger.warning('A warning event occurred when analyzing layer ID %s : %s', + request['ID'], jsonResponse['Message']) # Hopefully, there is no version lower than the target one running _update_image(img, False, self._target_version) else: - logger.exception('An exception occurred when analyzing layer ID %s : %d %s', request['ID'], httpResponse.status_code, jsonResponse['Message']) + logger.exception('An exception occurred when analyzing layer ID %s : %d %s', + request['ID'], httpResponse.status_code, jsonResponse['Message']) return else: - logger.exception('An exception occurred when analyzing layer ID %s : %d', request['ID'], httpResponse.status_code) + logger.exception('An exception occurred when analyzing layer ID %s : %d', + request['ID'], httpResponse.status_code) return # The layer has been successfully indexed