This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/workers/securityworker.py
2017-06-07 00:02:52 +02:00

112 lines
3.7 KiB
Python

import logging.config
import time
from math import log10
import features
from app import app, secscan_api, prometheus
from workers.worker import Worker
from data.database import UseThenDisconnect
from data.model.image import (get_images_eligible_for_scan, get_image_pk_field,
get_max_id_for_sec_scan, get_min_id_for_sec_scan)
from util.secscan.api import SecurityConfigValidator
from util.secscan.analyzer import LayerAnalyzer, PreemptedException
from util.migrate.allocator import yield_random_entries
from util.log import logfile_path
from endpoints.v2 import v2_bp
DEFAULT_INDEXING_INTERVAL = 30
logger = logging.getLogger(__name__)
unscanned_images_gauge = prometheus.create_gauge('unscanned_images',
'Number of images that clair needs to scan.')
max_unscanned_images_gauge = prometheus.create_gauge('max_unscanned_image_id',
'Max ID of the unscanned images.')
class SecurityWorker(Worker):
def __init__(self):
super(SecurityWorker, self).__init__()
validator = SecurityConfigValidator(app.config)
if not validator.valid():
logger.warning('Failed to validate security scan configuration')
return
self._target_version = app.config.get('SECURITY_SCANNER_ENGINE_VERSION_TARGET', 3)
self._analyzer = LayerAnalyzer(app.config, secscan_api)
self._min_id = None
interval = app.config.get('SECURITY_SCANNER_INDEXING_INTERVAL', DEFAULT_INDEXING_INTERVAL)
self.add_operation(self._index_images, interval)
def _index_images(self):
def batch_query():
return get_images_eligible_for_scan(self._target_version)
# Get the ID of the last image we can analyze. Will be None if there are no images in the
# database.
max_id = get_max_id_for_sec_scan()
if max_id is None:
return
if self.min_id is None or self.min_id > max_id:
logger.info('Could not find any available images for scanning.')
return
max_unscanned_images_gauge.Set(max_id)
# 4^log10(total) gives us a scalable batch size into the billions.
batch_size = int(4 ** log10(max(10, max_id - self.min_id)))
with UseThenDisconnect(app.config):
to_scan_generator = yield_random_entries(
batch_query,
get_image_pk_field(),
batch_size,
max_id,
self.min_id,
)
for candidate, abt, num_remaining in to_scan_generator:
try:
self._analyzer.analyze_recursively(candidate)
except PreemptedException:
logger.info('Another worker pre-empted us for layer: %s', candidate.id)
abt.set()
unscanned_images_gauge.Set(num_remaining)
# If we reach this point, we analyzed every images up to max_id, next time the worker runs,
# we want to start from the next image.
self.min_id = max_id + 1
@property
def min_id(self):
""" If it hasn't already been determined, finds the ID of the first image to be analyzed.
First checks the config, then the database, and returns None if there are no images
available for scanning.
"""
if self._min_id is None:
self._min_id = app.config.get('SECURITY_SCANNER_INDEXING_MIN_ID')
if self._min_id is None:
self._min_id = get_min_id_for_sec_scan(self._target_version)
return self._min_id
@min_id.setter
def min_id(self, new_min_id):
self._min_id = new_min_id
if __name__ == '__main__':
app.register_blueprint(v2_bp, url_prefix='/v2')
if not features.SECURITY_SCANNER:
logger.debug('Security scanner disabled; skipping SecurityWorker')
while True:
time.sleep(100000)
logging.config.fileConfig(logfile_path(debug=True), disable_existing_loggers=False)
worker = SecurityWorker()
worker.start()