import logging.config import time from math import log10 import features from app import app, secscan_api, prometheus from workers.worker import Worker from data.database import UseThenDisconnect from data.model.image import (get_images_eligible_for_scan, get_image_pk_field, get_max_id_for_sec_scan, get_min_id_for_sec_scan) from util.secscan.api import SecurityConfigValidator from util.secscan.analyzer import LayerAnalyzer, PreemptedException from util.migrate.allocator import yield_random_entries from util.log import logfile_path from endpoints.v2 import v2_bp DEFAULT_INDEXING_INTERVAL = 30 logger = logging.getLogger(__name__) unscanned_images_gauge = prometheus.create_gauge('unscanned_images', 'Number of images that clair needs to scan.') max_unscanned_images_gauge = prometheus.create_gauge('max_unscanned_image_id', 'Max ID of the unscanned images.') class SecurityWorker(Worker): def __init__(self): super(SecurityWorker, self).__init__() validator = SecurityConfigValidator(app.config) if not validator.valid(): logger.warning('Failed to validate security scan configuration') return self._target_version = app.config.get('SECURITY_SCANNER_ENGINE_VERSION_TARGET', 3) self._analyzer = LayerAnalyzer(app.config, secscan_api) self._min_id = None interval = app.config.get('SECURITY_SCANNER_INDEXING_INTERVAL', DEFAULT_INDEXING_INTERVAL) self.add_operation(self._index_images, interval) def _index_images(self): def batch_query(): return get_images_eligible_for_scan(self._target_version) # Get the ID of the last image we can analyze. Will be None if there are no images in the # database. max_id = get_max_id_for_sec_scan() if max_id is None: return if self.min_id is None or self.min_id > max_id: logger.info('Could not find any available images for scanning.') return max_unscanned_images_gauge.Set(max_id) # 4^log10(total) gives us a scalable batch size into the billions. batch_size = int(4 ** log10(max(10, max_id - self.min_id))) with UseThenDisconnect(app.config): to_scan_generator = yield_random_entries( batch_query, get_image_pk_field(), batch_size, max_id, self.min_id, ) for candidate, abt, num_remaining in to_scan_generator: try: self._analyzer.analyze_recursively(candidate) except PreemptedException: logger.info('Another worker pre-empted us for layer: %s', candidate.id) abt.set() unscanned_images_gauge.Set(num_remaining) # If we reach this point, we analyzed every images up to max_id, next time the worker runs, # we want to start from the next image. self.min_id = max_id + 1 @property def min_id(self): """ If it hasn't already been determined, finds the ID of the first image to be analyzed. First checks the config, then the database, and returns None if there are no images available for scanning. """ if self._min_id is None: self._min_id = app.config.get('SECURITY_SCANNER_INDEXING_MIN_ID') if self._min_id is None: self._min_id = get_min_id_for_sec_scan(self._target_version) return self._min_id @min_id.setter def min_id(self, new_min_id): self._min_id = new_min_id if __name__ == '__main__': app.register_blueprint(v2_bp, url_prefix='/v2') if not features.SECURITY_SCANNER: logger.debug('Security scanner disabled; skipping SecurityWorker') while True: time.sleep(100000) logging.config.fileConfig(logfile_path(debug=True), disable_existing_loggers=False) worker = SecurityWorker() worker.start()